[ { "description": "Fine-tuned XLSR-53 large model for speech recognition in Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.", "url": "https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-japanese", "project_name": "wav2vec2-large-xlsr-53-japanese", "downloads": 2606575, "source": "Hugging Face", "score": 23.399156626670138, "first_commit": "2021-04-16 00:20:03", "latest_commit": "2022-12-14 01:58:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022", "url": "https://github.com/clovaai/donut", "project_name": "donut", "stargazers_count": 5553, "source": "GitHub", "score": 16.162261893544585, "first_commit": "2022-07-20 23:15:30", "latest_commit": "2023-07-31 15:14:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "BERT base Japanese (IPA dictionary)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese", "project_name": "bert-base-japanese", "downloads": 1523810, "source": "Hugging Face", "score": 13.642341752732088, "first_commit": "2020-04-28 21:34:23", "latest_commit": "2024-02-22 00:57:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "xlm-roberta-ner-japanese (Japanese caption : 日本語の固有表現抽出のモデル)", "url": "https://huggingface.co/tsmatz/xlm-roberta-ner-japanese", "project_name": "xlm-roberta-ner-japanese", "downloads": 944826, "source": "Hugging Face", "score": 8.425106523506772, "first_commit": "2022-10-24 02:08:37", "latest_commit": "2024-07-12 00:01:56", "languages": [], "model_or_dataset": "model", "model_size": 0.277, "model_architectures": "RobertaForTokenClassification" }, { "description": "Neologism dictionary based on the language resources on the Web for mecab-ipadic", "url": "https://github.com/neologd/mecab-ipadic-neologd", "project_name": "mecab-ipadic-neologd", "stargazers_count": 2695, "source": "GitHub", "score": 7.672150550745544, "first_commit": "2015-03-09 16:46:28", "latest_commit": "2020-09-14 19:56:40", "languages": [ "Perl" ], "model_or_dataset": "dataset" }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのエディター", "url": "https://github.com/VOICEVOX/voicevox", "project_name": "voicevox", "stargazers_count": 2411, "source": "GitHub", "score": 6.828486582461804, "first_commit": "2021-08-01 02:41:10", "latest_commit": "2024-08-16 01:45:33", "languages": [ "TypeScript", "JavaScript" ], "model_or_dataset": null }, { "description": "a Japanese Input Method Editor designed for multi-platform", "url": "https://github.com/google/mozc", "project_name": "mozc", "stargazers_count": 2333, "source": "GitHub", "score": 6.59677605596134, "first_commit": "2010-05-10 12:05:41", "latest_commit": "2024-08-15 07:37:57", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Engineer Vocabulary List in Japanese/English", "url": "https://github.com/mercari/engineer-vocabulary-list", "project_name": "engineer-vocabulary-list", "stargazers_count": 1766, "source": "GitHub", "score": 4.912418767169508, "first_commit": "2020-09-30 14:16:14", "latest_commit": "2020-11-04 08:32:10", "languages": [], "model_or_dataset": "dataset" }, { "description": "JMMLU Japanese Massive Multitask Language Understanding Benchmark JMMLU is a four-choice question set consisting of Japanese-translated questions of a portion of MMLU (Paper, Github) (Translated questions) and questions based on unique Japanese cultural context (Japanese questions).", "url": "https://huggingface.co/datasets/nlp-waseda/JMMLU", "project_name": "JMMLU", "downloads": 513620, "source": "Hugging Face", "score": 4.539501508077449, "first_commit": "2024-02-09 12:19:13", "latest_commit": "2024-02-27 05:22:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/kha-white/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 483043, "source": "Hugging Face", "score": 4.263971611562135, "first_commit": "2022-01-15 17:39:06", "latest_commit": "2022-06-22 15:34:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel" }, { "description": "About Optical character recognition for Japanese text, with the main focus being Japanese manga", "url": "https://github.com/kha-white/manga-ocr", "project_name": "manga-ocr", "stargazers_count": 1517, "source": "GitHub", "score": 4.172727471033412, "first_commit": "2022-01-15 18:18:35", "latest_commit": "2024-06-29 11:23:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Whisper based Japanese subtitle generator", "url": "https://github.com/Ayanaminn/N46Whisper", "project_name": "N46Whisper", "stargazers_count": 1515, "source": "GitHub", "score": 4.166786175482118, "first_commit": "2022-10-25 16:27:20", "latest_commit": "2024-06-30 15:22:02", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "BERT base Japanese (IPA dictionary, whole word masking enabled)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-whole-word-masking", "project_name": "bert-base-japanese-whole-word-masking", "downloads": 457205, "source": "Hugging Face", "score": 4.031144929213877, "first_commit": "2020-04-28 21:34:35", "latest_commit": "2024-02-22 00:57:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXの音声合成エンジン", "url": "https://github.com/VOICEVOX/voicevox_engine", "project_name": "voicevox_engine", "stargazers_count": 1257, "source": "GitHub", "score": 3.4003590493652, "first_commit": "2021-08-02 02:39:41", "latest_commit": "2024-08-15 01:08:26", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "おじさんがLINEやメールで送ってきそうな文を生成する", "url": "https://github.com/greymd/ojichat", "project_name": "ojichat", "stargazers_count": 1249, "source": "GitHub", "score": 3.376593867160024, "first_commit": "2019-05-25 03:44:08", "latest_commit": "2023-04-23 16:02:15", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "Japanese pop-up dictionary extension for Chrome and Firefox.", "url": "https://github.com/FooSoft/yomichan", "project_name": "yomichan", "stargazers_count": 1044, "source": "GitHub", "score": 2.767611073152395, "first_commit": "2016-03-16 20:33:15", "latest_commit": "2023-02-25 12:43:18", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "Kuromoji is a self-contained and very easy to use Japanese morphological analyzer designed for search", "url": "https://github.com/atilika/kuromoji", "project_name": "kuromoji", "stargazers_count": 941, "source": "GitHub", "score": 2.4616343522607567, "first_commit": "2011-01-20 15:26:44", "latest_commit": "2019-09-23 11:13:04", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "Yet another Japanese morphological analyzer", "url": "https://github.com/taku910/mecab", "project_name": "mecab", "stargazers_count": 908, "source": "GitHub", "score": 2.363602975664407, "first_commit": "2011-10-15 15:16:30", "latest_commit": "2023-05-24 16:04:25", "languages": [ "C++", "Perl", "Java", "C#", "Python", "Ruby", "C" ], "model_or_dataset": null }, { "description": "オープンソースの日本語LLMまとめ", "url": "https://github.com/llm-jp/awesome-japanese-llm", "project_name": "awesome-japanese-llm", "stargazers_count": 879, "source": "GitHub", "score": 2.2774541901706447, "first_commit": "2023-07-09 13:36:38", "latest_commit": "2024-08-10 19:11:35", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese morphological analysis engine written in pure Python", "url": "https://github.com/mocobeta/janome", "project_name": "janome", "stargazers_count": 843, "source": "GitHub", "score": 2.170510870247354, "first_commit": "2015-02-14 18:45:54", "latest_commit": "2023-07-01 20:31:23", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア", "url": "https://github.com/VOICEVOX/voicevox_core", "project_name": "voicevox_core", "stargazers_count": 839, "source": "GitHub", "score": 2.1586282791447657, "first_commit": "2021-08-31 23:19:33", "latest_commit": "2024-08-14 00:00:48", "languages": [ "Rust", "Java", "Python", "C++", "Kotlin" ], "model_or_dataset": null }, { "description": "JavaScript implementation of Japanese morphological analyzer", "url": "https://github.com/takuyaa/kuromoji.js", "project_name": "kuromoji.js", "stargazers_count": 831, "source": "GitHub", "score": 2.1348630969395903, "first_commit": "2014-12-04 17:31:39", "latest_commit": "2018-11-24 16:05:09", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool.", "url": "https://github.com/google/budoux", "project_name": "budoux", "stargazers_count": 815, "source": "GitHub", "score": 2.0873327325292386, "first_commit": "2021-11-18 09:36:21", "latest_commit": "2024-08-16 15:09:25", "languages": [ "Python", "TypeScript", "Java", "JavaScript" ], "model_or_dataset": null }, { "description": "Self-contained Japanese Morphological Analyzer written in pure Go", "url": "https://github.com/ikawaha/kagome", "project_name": "kagome", "stargazers_count": 803, "source": "GitHub", "score": 2.051684959221475, "first_commit": "2014-06-26 13:52:06", "latest_commit": "2024-08-13 10:35:54", "languages": [ "Go", "JavaScript" ], "model_or_dataset": null }, { "description": "Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported.", "url": "https://github.com/hexenq/kuroshiro", "project_name": "kuroshiro", "stargazers_count": 787, "source": "GitHub", "score": 2.0041545948111232, "first_commit": "2016-01-03 17:16:40", "latest_commit": "2021-06-07 23:02:39", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Read Japanese manga inside browser with selectable text.", "url": "https://github.com/kha-white/mokuro", "project_name": "mokuro", "stargazers_count": 781, "source": "GitHub", "score": 1.9863307081572417, "first_commit": "2022-04-16 14:44:52", "latest_commit": "2024-07-09 17:10:55", "languages": [ "Python", "JavaScript", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "This is a Japanese sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens-v2", "project_name": "sentence-bert-base-ja-mean-tokens-v2", "downloads": 225199, "source": "Hugging Face", "score": 1.9405346937122938, "first_commit": "2021-12-14 11:18:19", "latest_commit": "2024-04-17 11:39:38", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel" }, { "description": "Javascript library for detecting and transliterating Hiragana <--> Katakana <--> Romaji", "url": "https://github.com/WaniKani/WanaKana", "project_name": "WanaKana", "stargazers_count": 738, "source": "GitHub", "score": 1.8585928538044219, "first_commit": "2013-08-27 12:57:41", "latest_commit": "2023-11-20 12:22:09", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies", "url": "https://github.com/megagonlabs/ginza", "project_name": "ginza", "stargazers_count": 731, "source": "GitHub", "score": 1.837798319374893, "first_commit": "2019-03-11 16:49:15", "latest_commit": "2024-03-31 07:29:06", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Extend GNOME On-Screen Keyboard for Input Methods", "url": "https://github.com/google/shuwa", "project_name": "shuwa", "stargazers_count": 726, "source": "GitHub", "score": 1.8229450804966583, "first_commit": "2021-04-20 14:25:55", "latest_commit": "2022-12-22 19:41:35", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "- Language Understanding with Knowledge-based Embeddings", "url": "https://github.com/studio-ousia/luke", "project_name": "luke", "stargazers_count": 699, "source": "GitHub", "score": 1.74273759055419, "first_commit": "2020-03-31 21:56:47", "latest_commit": "2023-06-16 23:11:54", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "全国の町丁目レベル(277,191件)の住所データのオープンデータ", "url": "https://github.com/geolonia/japanese-addresses", "project_name": "japanese-addresses", "stargazers_count": 698, "source": "GitHub", "score": 1.739766942778543, "first_commit": "2020-07-12 13:35:51", "latest_commit": "2024-01-15 09:28:19", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "ChatdollKit enables you to make your 3D model into a chatbot", "url": "https://github.com/uezo/ChatdollKit", "project_name": "ChatdollKit", "stargazers_count": 685, "source": "GitHub", "score": 1.7011485216951323, "first_commit": "2020-03-21 22:01:11", "latest_commit": "2024-07-21 01:41:49", "languages": [ "C#" ], "model_or_dataset": null }, { "description": "オープンソースの住所正規化ライブラリ。", "url": "https://github.com/geolonia/normalize-japanese-addresses", "project_name": "normalize-japanese-addresses", "stargazers_count": 672, "source": "GitHub", "score": 1.6625301006117217, "first_commit": "2020-05-30 10:53:34", "latest_commit": "2024-07-02 10:44:21", "languages": [ "TypeScript", "JavaScript" ], "model_or_dataset": null }, { "description": "rinna/japanese-cloob-vit-b-16", "url": "https://huggingface.co/rinna/japanese-cloob-vit-b-16", "project_name": "japanese-cloob-vit-b-16", "downloads": 188490, "source": "Hugging Face", "score": 1.60974923499559, "first_commit": "2022-04-27 08:29:29", "latest_commit": "2024-07-22 08:09:24", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLOOBModel" }, { "description": "Automatically exported from code.google.com/p/mozc-morse", "url": "https://github.com/google/mozc-devices", "project_name": "mozc-devices", "stargazers_count": 635, "source": "GitHub", "score": 1.5526161329127839, "first_commit": "2012-06-06 06:06:16", "latest_commit": "2024-02-28 20:08:36", "languages": [ "Python", "C", "C++" ], "model_or_dataset": null }, { "description": "Code for producing Japanese pretrained models provided by rinna Co., Ltd.", "url": "https://github.com/rinnakk/japanese-pretrained-models", "project_name": "japanese-pretrained-models", "stargazers_count": 576, "source": "GitHub", "score": 1.3773479141496123, "first_commit": "2021-04-06 13:48:14", "latest_commit": "2022-10-28 16:08:26", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/", "url": "https://github.com/SamuraiT/mecab-python3", "project_name": "mecab-python3", "stargazers_count": 515, "source": "GitHub", "score": 1.1961383998351471, "first_commit": "2014-05-31 17:47:48", "latest_commit": "2024-04-15 21:32:53", "languages": [ "Python", "C++" ], "model_or_dataset": null }, { "description": "BERT models for Japanese text.", "url": "https://github.com/cl-tohoku/bert-japanese", "project_name": "bert-japanese", "stargazers_count": 502, "source": "GitHub", "score": 1.1575199787517365, "first_commit": "2019-03-24 22:50:33", "latest_commit": "2024-03-24 00:08:38", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "BERT with SentencePiece for Japanese text.", "url": "https://github.com/yoheikikuta/bert-japanese", "project_name": "bert-japanese", "stargazers_count": 497, "source": "GitHub", "score": 1.1426667398735018, "first_commit": "2018-12-27 20:05:33", "latest_commit": "2021-02-15 10:20:28", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "morphological analyzer (word segmentor + PoS Tagger) for Chinese and Japanese written purely in JavaScript.", "url": "https://github.com/rakuten-nlp/rakutenma", "project_name": "rakutenma", "stargazers_count": 471, "source": "GitHub", "score": 1.0654298977066805, "first_commit": "2014-08-05 19:05:12", "latest_commit": "2015-01-29 18:28:18", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Raw data for Japanese Anime", "url": "https://github.com/bangumi-data/bangumi-data", "project_name": "bangumi-data", "stargazers_count": 471, "source": "GitHub", "score": 1.0654298977066805, "first_commit": "2016-09-05 10:17:46", "latest_commit": "2024-08-12 10:50:17", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "GLuCoSE (General Luke-based Contrastive Sentence Embedding)-base-Japanese 日本語のREADME/Japanese README GLuCoSE (General LUke-based COntrastive Sentence Embedding, \"glucose\") is a Japanese text embedding model based on LUKE.", "url": "https://huggingface.co/pkshatech/GLuCoSE-base-ja", "project_name": "GLuCoSE-base-ja", "downloads": 119582, "source": "Hugging Face", "score": 0.9888180086137589, "first_commit": "2023-07-16 07:28:46", "latest_commit": "2023-08-25 02:53:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel" }, { "description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.", "url": "https://github.com/miurahr/pykakasi", "project_name": "pykakasi", "stargazers_count": 407, "source": "GitHub", "score": 0.8753084400652743, "first_commit": "2011-01-26 09:56:25", "latest_commit": "2022-07-22 12:36:37", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "テキストを壱百満天原サロメお嬢様風の口調に変換します", "url": "https://github.com/jiro4989/ojosama", "project_name": "ojosama", "stargazers_count": 380, "source": "GitHub", "score": 0.7951009501228061, "first_commit": "2022-06-16 07:21:02", "latest_commit": "2024-08-07 00:15:14", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.", "url": "https://github.com/polm/fugashi", "project_name": "fugashi", "stargazers_count": 376, "source": "GitHub", "score": 0.7832183590202182, "first_commit": "2019-10-14 13:33:36", "latest_commit": "2024-04-15 21:15:01", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese tokenizer based on recurrent neural networks", "url": "https://github.com/taishi-i/nagisa", "project_name": "nagisa", "stargazers_count": 376, "source": "GitHub", "score": 0.7832183590202182, "first_commit": "2018-02-14 23:56:05", "latest_commit": "2024-06-15 01:55:19", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Juman++ (a Morphological Analyzer Toolkit)", "url": "https://github.com/ku-nlp/jumanpp", "project_name": "jumanpp", "stargazers_count": 372, "source": "GitHub", "score": 0.7713357679176304, "first_commit": "2016-10-17 14:48:20", "latest_commit": "2023-03-06 09:27:42", "languages": [ "C++", "Ruby", "Python", "Perl" ], "model_or_dataset": null }, { "description": "A morphological analysis library.", "url": "https://github.com/lindera-morphology/lindera", "project_name": "lindera", "stargazers_count": 369, "source": "GitHub", "score": 0.7624238245906895, "first_commit": "2020-01-22 23:29:22", "latest_commit": "2024-07-15 23:23:30", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "BERT base Japanese (character tokenization)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char", "project_name": "bert-base-japanese-char", "downloads": 91576, "source": "Hugging Face", "score": 0.7364554393032353, "first_commit": "2020-04-28 21:34:05", "latest_commit": "2024-02-22 00:57:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v2", "project_name": "bert-base-japanese-char-v2", "downloads": 88094, "source": "Hugging Face", "score": 0.705079074726386, "first_commit": "2021-03-05 04:05:08", "latest_commit": "2021-09-23 15:45:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "NDLOCRのアプリケーション", "url": "https://github.com/ndl-lab/ndlocr_cli", "project_name": "ndlocr_cli", "stargazers_count": 344, "source": "GitHub", "score": 0.6881576301995151, "first_commit": "2022-03-30 11:12:01", "latest_commit": "2024-07-29 09:20:46", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "NLP libary for creating pipeline components", "url": "https://github.com/PKSHATechnology-Research/camphr", "project_name": "camphr", "stargazers_count": 340, "source": "GitHub", "score": 0.6762750390969273, "first_commit": "2019-08-27 07:22:43", "latest_commit": "2021-08-18 15:06:51", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "An input method without morphological analysis.", "url": "https://github.com/codefirst/aquaskk", "project_name": "aquaskk", "stargazers_count": 339, "source": "GitHub", "score": 0.6733043913212803, "first_commit": "2013-05-12 07:03:33", "latest_commit": "2023-07-10 00:35:46", "languages": [ "C++" ], "model_or_dataset": null }, { "description": "Japanese language data on kanji, radicals, media files, fonts and related resources from Kanji alive", "url": "https://github.com/kanjialive/kanji-data-media", "project_name": "kanji-data-media", "stargazers_count": 336, "source": "GitHub", "score": 0.6643924479943394, "first_commit": "2014-08-30 19:06:48", "latest_commit": "2023-11-15 20:28:16", "languages": [], "model_or_dataset": "dataset" }, { "description": "日本語で絵文字入力をするための IME 追加辞書 orange_book Google 日本語入力などで日本語から絵文字への変換を可能にする IME 拡張辞書", "url": "https://github.com/peaceiris/emoji-ime-dictionary", "project_name": "emoji-ime-dictionary", "stargazers_count": 331, "source": "GitHub", "score": 0.6495392091161045, "first_commit": "2018-10-13 21:37:28", "latest_commit": "2023-01-16 12:01:31", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "約100年に渡るアニメ作品リストデータベース", "url": "https://github.com/anilogia/animedb", "project_name": "animedb", "stargazers_count": 321, "source": "GitHub", "score": 0.6198327313596348, "first_commit": "2016-10-11 23:05:11", "latest_commit": "2023-01-04 16:56:30", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "Japanese GPT2 Generation Model", "url": "https://github.com/tanreinama/gpt2-japanese", "project_name": "gpt2-japanese", "stargazers_count": 313, "source": "GitHub", "score": 0.596067549154459, "first_commit": "2019-12-12 11:07:23", "latest_commit": "2023-09-02 17:23:50", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "vibrato: Viterbi-based accelerated tokenizer", "url": "https://github.com/daac-tools/vibrato", "project_name": "vibrato", "stargazers_count": 311, "source": "GitHub", "score": 0.5901262536031652, "first_commit": "2022-07-06 17:50:53", "latest_commit": "2024-05-30 19:17:07", "languages": [ "Rust", "Python" ], "model_or_dataset": null }, { "description": "The purpose of this repository is to make prototypes as case study in the context of proof of concept(PoC) and research and development(R&D) that I have written in my website. The main research topics are Auto-Encoders in relation to the representation learning, the statistical machine learning for energy-based models, adversarial generation net…", "url": "https://github.com/accel-brain/accel-brain-code", "project_name": "accel-brain-code", "stargazers_count": 305, "source": "GitHub", "score": 0.5723023669492833, "first_commit": "2016-01-26 21:16:08", "latest_commit": "2023-12-26 12:01:37", "languages": [ "Python", "C", "Jupyter Notebook", "JavaScript" ], "model_or_dataset": null }, { "description": "This is a Japanese sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens", "project_name": "sentence-bert-base-ja-mean-tokens", "downloads": 73041, "source": "Hugging Face", "score": 0.5694362212010232, "first_commit": "2021-07-22 06:11:37", "latest_commit": "2024-04-17 11:40:03", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForMaskedLM" }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v3", "project_name": "bert-base-japanese-char-v3", "downloads": 72715, "source": "Hugging Face", "score": 0.5664986293423061, "first_commit": "2023-05-19 00:33:09", "latest_commit": "2023-05-19 00:39:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, and Zenkaku", "url": "https://github.com/ikegami-yukino/jaconv", "project_name": "jaconv", "stargazers_count": 299, "source": "GitHub", "score": 0.5544784802954015, "first_commit": "2014-03-22 20:09:24", "latest_commit": "2024-08-13 14:26:29", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese and Chinese dictionaries for Yomitan.", "url": "https://github.com/marvnc/yomitan-dictionaries", "project_name": "yomitan-dictionaries", "stargazers_count": 295, "source": "GitHub", "score": 0.5425958891928137, "first_commit": "2022-08-07 01:28:52", "latest_commit": "2024-08-04 01:00:08", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "JGLUE: Japanese General Language Understanding Evaluation", "url": "https://github.com/yahoojapan/JGLUE", "project_name": "JGLUE", "stargazers_count": 294, "source": "GitHub", "score": 0.5396252414171666, "first_commit": "2022-05-27 13:35:39", "latest_commit": "2023-06-24 10:50:30", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "SudachiPy 0.6* and above are developed as Sudachi.rs.", "url": "https://github.com/WorksApplications/sudachi.rs", "project_name": "sudachi.rs", "stargazers_count": 290, "source": "GitHub", "score": 0.5277426503145788, "first_commit": "2019-11-23 21:06:49", "latest_commit": "2024-06-28 10:11:04", "languages": [ "Rust", "Python" ], "model_or_dataset": null }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-instruct", "project_name": "ELYZA-japanese-Llama-2-7b-instruct", "downloads": 66729, "source": "Hugging Face", "score": 0.5125586757647587, "first_commit": "2023-08-28 12:58:25", "latest_commit": "2023-08-29 03:46:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "Optical character recognition in manga images. Manga OCR desktop application", "url": "https://github.com/blueaxis/Poricom", "project_name": "Poricom", "stargazers_count": 283, "source": "GitHub", "score": 0.50694811588505, "first_commit": "2021-08-31 16:21:12", "latest_commit": "2023-06-04 18:55:23", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Stable Diffusion is a Japanese specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", "url": "https://github.com/rinnakk/japanese-stable-diffusion", "project_name": "japanese-stable-diffusion", "stargazers_count": 280, "source": "GitHub", "score": 0.49803617255810906, "first_commit": "2022-09-09 10:11:41", "latest_commit": "2023-03-20 08:21:57", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese to romaji converter in Python", "url": "https://github.com/polm/cutlet", "project_name": "cutlet", "stargazers_count": 278, "source": "GitHub", "score": 0.4920948770068151, "first_commit": "2020-04-16 22:59:42", "latest_commit": "2024-06-28 00:40:28", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "▽▼ SKK-like Japanese Input Method Editor for Windows", "url": "https://github.com/nathancorvussolis/corvusskk", "project_name": "corvusskk", "stargazers_count": 277, "source": "GitHub", "score": 0.4891242292311681, "first_commit": "2012-06-15 14:12:35", "latest_commit": "2024-08-04 19:22:13", "languages": [ "C++", "C", "Lua" ], "model_or_dataset": null }, { "description": "「大規模言語モデル入門」(技術評論社, 2023)のGitHubリポジトリ", "url": "https://github.com/ghmagazine/llm-book", "project_name": "llm-book", "stargazers_count": 273, "source": "GitHub", "score": 0.4772416381285802, "first_commit": "2023-07-13 16:50:00", "latest_commit": "2024-07-27 21:21:13", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Restaurant Search System through Dialogue in Japanese.", "url": "https://github.com/Hironsan/HotPepperGourmetDialogue", "project_name": "HotPepperGourmetDialogue", "stargazers_count": 272, "source": "GitHub", "score": 0.4742709903529333, "first_commit": "2016-02-13 11:37:07", "latest_commit": "2016-05-19 13:05:29", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Tacotron2 implementation of Japanese", "url": "https://github.com/CjangCjengh/tacotron2-japanese", "project_name": "tacotron2-japanese", "stargazers_count": 269, "source": "GitHub", "score": 0.46535904702599235, "first_commit": "2022-07-25 11:42:50", "latest_commit": "2022-09-04 12:57:11", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese text normalizer for mecab-neologd", "url": "https://github.com/ikegami-yukino/neologdn", "project_name": "neologdn", "stargazers_count": 267, "source": "GitHub", "score": 0.45941775147469843, "first_commit": "2015-07-22 01:15:07", "latest_commit": "2024-05-03 00:56:43", "languages": [ "Python", "C++", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "FuguMT", "url": "https://huggingface.co/staka/fugumt-en-ja", "project_name": "fugumt-en-ja", "downloads": 57811, "source": "Hugging Face", "score": 0.43219841749470045, "first_commit": "2022-05-08 04:23:57", "latest_commit": "2023-08-15 17:45:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel" }, { "description": "「BERTによる自然言語処理入門: Transformersを使った実践プログラミング」サポートページ", "url": "https://github.com/stockmarkteam/bert-book", "project_name": "bert-book", "stargazers_count": 250, "source": "GitHub", "score": 0.4089167392886999, "first_commit": "2021-02-04 22:11:11", "latest_commit": "2024-02-13 13:36:21", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "azooKey: A Japanese Keyboard iOS Application Fully Developed in Swift", "url": "https://github.com/ensan-hcl/azooKey", "project_name": "azooKey", "stargazers_count": 247, "source": "GitHub", "score": 0.40000479596175903, "first_commit": "2020-09-03 16:35:44", "latest_commit": "2024-07-04 11:03:08", "languages": [ "Swift", "Python" ], "model_or_dataset": "dataset" }, { "description": "FuguMT", "url": "https://huggingface.co/staka/fugumt-ja-en", "project_name": "fugumt-ja-en", "downloads": 53710, "source": "Hugging Face", "score": 0.39524423279347726, "first_commit": "2022-05-08 04:32:09", "latest_commit": "2023-08-15 17:40:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel" }, { "description": "pdf-translator translates English PDF files into Japanese, preserving the original layout.", "url": "https://github.com/discus0434/pdf-translator", "project_name": "pdf-translator", "stargazers_count": 245, "source": "GitHub", "score": 0.39406350041046506, "first_commit": "2023-04-17 14:55:03", "latest_commit": "2024-05-07 16:25:33", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Code for evaluating Japanese pretrained models provided by NTT Ltd.", "url": "https://github.com/nttcslab/japanese-dialog-transformers", "project_name": "japanese-dialog-transformers", "stargazers_count": 240, "source": "GitHub", "score": 0.3792102615322302, "first_commit": "2021-09-17 22:13:57", "latest_commit": "2023-06-21 09:24:35", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "A tool for dividing the Japanese full name into a family name and a given name.", "url": "https://github.com/rskmoi/namedivider-python", "project_name": "namedivider-python", "stargazers_count": 237, "source": "GitHub", "score": 0.3702983182052893, "first_commit": "2020-11-08 00:41:44", "latest_commit": "2024-05-03 15:28:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.", "url": "https://github.com/ikegami-yukino/mecab", "project_name": "mecab", "stargazers_count": 232, "source": "GitHub", "score": 0.35544507932705444, "first_commit": "2011-10-15 15:16:30", "latest_commit": "2024-05-30 20:11:11", "languages": [ "C++", "Perl", "Java", "C#", "Python", "Ruby", "C" ], "model_or_dataset": null }, { "description": "Visualization Module for Natural Language Processing", "url": "https://github.com/takapy0210/nlplot", "project_name": "nlplot", "stargazers_count": 232, "source": "GitHub", "score": 0.35544507932705444, "first_commit": "2020-05-07 00:10:04", "latest_commit": "2022-09-21 23:19:20", "languages": [ "JavaScript", "Python" ], "model_or_dataset": null }, { "description": "Sentence BERT base Japanese model This repository contains a Sentence BERT base model for Japanese.", "url": "https://huggingface.co/colorfulscoop/sbert-base-ja", "project_name": "sbert-base-ja", "downloads": 48745, "source": "Hugging Face", "score": 0.35050452856485664, "first_commit": "2021-08-01 04:12:28", "latest_commit": "2021-08-08 15:47:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "Using Vim as an input method for X11 apps", "url": "https://github.com/algon-320/vime", "project_name": "vime", "stargazers_count": 228, "source": "GitHub", "score": 0.34356248822446656, "first_commit": "2022-12-03 22:32:24", "latest_commit": "2022-12-03 22:32:24", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "A lexicon for Sudachi", "url": "https://github.com/WorksApplications/SudachiDict", "project_name": "SudachiDict", "stargazers_count": 225, "source": "GitHub", "score": 0.33465054489752566, "first_commit": "2019-04-01 17:14:39", "latest_commit": "2024-07-19 18:45:35", "languages": [ "Python", "Java" ], "model_or_dataset": null }, { "description": "A free and openly licensed Japanese-to-English dictionary compatible with multiple dictionary clients", "url": "https://github.com/stephenmk/Jitendex", "project_name": "Jitendex", "stargazers_count": 225, "source": "GitHub", "score": 0.33465054489752566, "first_commit": "2023-10-09 17:45:11", "latest_commit": "2024-07-09 16:43:23", "languages": [], "model_or_dataset": "dataset" }, { "description": "Konoha: Simple wrapper of Japanese Tokenizers", "url": "https://github.com/himkt/konoha", "project_name": "konoha", "stargazers_count": 224, "source": "GitHub", "score": 0.3316798971218787, "first_commit": "2018-08-22 13:55:37", "latest_commit": "2024-05-16 00:01:37", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Vaporetto: Very Accelerated POintwise pREdicTion based TOkenizer", "url": "https://github.com/daac-tools/vaporetto", "project_name": "vaporetto", "stargazers_count": 223, "source": "GitHub", "score": 0.3287092493462317, "first_commit": "2021-08-18 22:59:40", "latest_commit": "2024-07-14 16:33:38", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Building AI-based conversational avatars lightning fast", "url": "https://github.com/uezo/aiavatarkit", "project_name": "aiavatarkit", "stargazers_count": 218, "source": "GitHub", "score": 0.3138560104679969, "first_commit": "2023-05-27 23:14:32", "latest_commit": "2024-06-08 17:55:22", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "nlp-waseda/roberta-base-japanese-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese-with-auto-jumanpp", "project_name": "roberta-base-japanese-with-auto-jumanpp", "downloads": 44503, "source": "Hugging Face", "score": 0.3122797903296484, "first_commit": "2022-10-15 05:09:36", "latest_commit": "2022-10-21 10:57:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "日本語OCR", "url": "https://github.com/tanreinama/OCR_Japanease", "project_name": "OCR_Japanease", "stargazers_count": 216, "source": "GitHub", "score": 0.3079147149167029, "first_commit": "2020-04-08 09:25:03", "latest_commit": "2021-04-30 19:26:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Yet another Japanese IME for IBus/Linux", "url": "https://github.com/akaza-im/akaza", "project_name": "akaza", "stargazers_count": 212, "source": "GitHub", "score": 0.29603212381411503, "first_commit": "2020-09-03 01:11:08", "latest_commit": "2023-05-28 23:41:07", "languages": [ "Rust", "Perl", "C" ], "model_or_dataset": null }, { "description": "JTubeSpeech: Corpus of Japanese speech collected from YouTube", "url": "https://github.com/sarulab-speech/jtubespeech", "project_name": "jtubespeech", "stargazers_count": 208, "source": "GitHub", "score": 0.28414953271152715, "first_commit": "2021-07-01 00:11:55", "latest_commit": "2023-03-02 15:50:30", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "llm-book/bert-base-japanese-v3-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-ner-wikipedia-dataset", "project_name": "bert-base-japanese-v3-ner-wikipedia-dataset", "downloads": 40333, "source": "Hugging Face", "score": 0.27470384538839, "first_commit": "2023-05-28 08:06:41", "latest_commit": "2023-07-25 13:32:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "reazonspeech-nemo-v2 reazonspeech-nemo-v2 is an automatic speech recognition model trained on ReazonSpeech v2.0 corpus.", "url": "https://huggingface.co/reazon-research/reazonspeech-nemo-v2", "project_name": "reazonspeech-nemo-v2", "downloads": 39469, "source": "Hugging Face", "score": 0.26691832586099257, "first_commit": "2024-01-30 01:49:12", "latest_commit": "2024-02-14 01:32:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Tutorial to train fastText with Japanese corpus", "url": "https://github.com/icoxfog417/fastTextJapaneseTutorial", "project_name": "fastTextJapaneseTutorial", "stargazers_count": 202, "source": "GitHub", "score": 0.2663256460576453, "first_commit": "2016-09-28 13:06:13", "latest_commit": "2016-09-29 14:21:51", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese postal code data.", "url": "https://github.com/polm/posuto", "project_name": "posuto", "stargazers_count": 200, "source": "GitHub", "score": 0.2603843505063514, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "Llama-3-ELYZA-JP-8B Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B", "project_name": "Llama-3-ELYZA-JP-8B", "downloads": 38663, "source": "Hugging Face", "score": 0.2596554453759436, "first_commit": "2024-06-25 06:32:13", "latest_commit": "2024-06-26 02:56:23", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "The Kyoto Text Analysis Toolkit for word segmentation and pronunciation estimation, etc.", "url": "https://github.com/neubig/kytea", "project_name": "kytea", "stargazers_count": 199, "source": "GitHub", "score": 0.2574137027307044, "first_commit": "2011-03-03 14:53:14", "latest_commit": "2020-04-03 09:46:01", "languages": [ "C++", "Perl", "C" ], "model_or_dataset": null }, { "description": "BERT base Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v3", "project_name": "bert-base-japanese-v3", "downloads": 36272, "source": "Hugging Face", "score": 0.23811010140602779, "first_commit": "2023-05-19 00:13:53", "latest_commit": "2023-05-19 00:31:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "Python wrapper for OpenJTalk", "url": "https://github.com/r9y9/pyopenjtalk", "project_name": "pyopenjtalk", "stargazers_count": 192, "source": "GitHub", "score": 0.2366191683011756, "first_commit": "2018-08-07 00:37:37", "latest_commit": "2024-07-13 16:04:09", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure in Rust.", "url": "https://github.com/daac-tools/daachorse", "project_name": "daachorse", "stargazers_count": 192, "source": "GitHub", "score": 0.2366191683011756, "first_commit": "2021-09-02 11:37:02", "latest_commit": "2024-06-06 18:11:39", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "English-Japanese Dictionary data (Public Domain) EJDict-hand", "url": "https://github.com/kujirahand/EJDict", "project_name": "EJDict", "stargazers_count": 189, "source": "GitHub", "score": 0.2277072249742347, "first_commit": "2016-01-04 23:23:52", "latest_commit": "2024-02-15 09:49:00", "languages": [ "PHP" ], "model_or_dataset": "dataset" }, { "description": "shisa-gamma-7b-v1 For more information see our main Shisa 7B model We applied a version of our fine-tune data set onto Japanese Stable LM Base Gamma 7B and it performed pretty well, just sharing since it might be of interest.", "url": "https://huggingface.co/augmxnt/shisa-gamma-7b-v1", "project_name": "shisa-gamma-7b-v1", "downloads": 34053, "source": "Hugging Face", "score": 0.21811465252721424, "first_commit": "2023-12-23 20:21:44", "latest_commit": "2024-05-19 06:07:36", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Java library for identifying Japanese characters from images", "url": "https://github.com/sakarika/kanjitomo-ocr", "project_name": "kanjitomo-ocr", "stargazers_count": 184, "source": "GitHub", "score": 0.21285398609599984, "first_commit": "2019-12-08 14:09:53", "latest_commit": "2021-05-03 21:38:06", "languages": [ "Java", "JavaScript" ], "model_or_dataset": null }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 32567, "source": "Hugging Face", "score": 0.20472427982152838, "first_commit": "2023-01-05 08:04:14", "latest_commit": "2023-05-12 14:13:03", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)", "url": "https://github.com/megagonlabs/bunkai", "project_name": "bunkai", "stargazers_count": 181, "source": "GitHub", "score": 0.20394204276905895, "first_commit": "2021-04-21 12:36:10", "latest_commit": "2023-08-10 14:13:25", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "ITAコーパスの文章リスト", "url": "https://github.com/mmorise/ita-corpus", "project_name": "ita-corpus", "stargazers_count": 178, "source": "GitHub", "score": 0.19503009944211802, "first_commit": "2021-06-03 11:22:38", "latest_commit": "2023-12-04 00:50:44", "languages": [], "model_or_dataset": "dataset" }, { "description": "BERT base Japanese (unidic-lite with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v2", "project_name": "bert-base-japanese-v2", "downloads": 31476, "source": "Hugging Face", "score": 0.19489325921459483, "first_commit": "2021-03-05 03:37:30", "latest_commit": "2021-09-23 15:45:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "The Japanese analysis plugin for elasticsearch", "url": "https://github.com/worksapplications/elasticsearch-sudachi", "project_name": "elasticsearch-sudachi", "stargazers_count": 177, "source": "GitHub", "score": 0.19205945166647106, "first_commit": "2017-11-09 19:21:47", "latest_commit": "2024-07-04 14:46:30", "languages": [ "Kotlin", "Java", "Python" ], "model_or_dataset": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-v0.1", "project_name": "Swallow-7b-instruct-v0.1", "downloads": 30722, "source": "Hugging Face", "score": 0.18809895166406512, "first_commit": "2024-03-04 08:46:03", "latest_commit": "2024-07-06 15:18:14", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM" }, { "description": "Topologically ordered lists of kanji for effective learning", "url": "https://github.com/scriptin/topokanji", "project_name": "topokanji", "stargazers_count": 174, "source": "GitHub", "score": 0.18314750833953014, "first_commit": "2015-05-18 01:55:58", "latest_commit": "2016-01-24 05:18:22", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "Can neural networks transliterate Romaji into Japanese correctly?", "url": "https://github.com/Kyubyong/neural_japanese_transliterator", "project_name": "neural_japanese_transliterator", "stargazers_count": 173, "source": "GitHub", "score": 0.18017686056388318, "first_commit": "2017-01-01 13:20:54", "latest_commit": "2017-09-17 15:21:27", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Summarize arXiv paper with figures", "url": "https://github.com/rkmt/summarize_arxv", "project_name": "summarize_arxv", "stargazers_count": 173, "source": "GitHub", "score": 0.18017686056388318, "first_commit": "2023-05-21 17:03:22", "latest_commit": "2023-05-23 01:54:10", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "JMTEB:", "url": "https://huggingface.co/datasets/sbintuitions/JMTEB", "project_name": "JMTEB", "downloads": 29443, "source": "Hugging Face", "score": 0.1765738596784849, "first_commit": "2024-02-22 18:15:27", "latest_commit": "2024-06-28 15:18:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Maintainers Junfeng Jiang@Aizawa Lab: jiangjf (at) is.s.u-tokyo.ac.jp Jiahao Huang@Aizawa Lab: jiahao-huang (at) g.ecc.u-tokyo.ac.jp", "url": "https://huggingface.co/datasets/Coldog2333/JMedBench", "project_name": "JMedBench", "downloads": 29290, "source": "Hugging Face", "score": 0.1751951739288416, "first_commit": "2024-08-17 11:12:54", "latest_commit": "2024-09-01 12:41:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "国会議案データベース:衆議院", "url": "https://github.com/smartnews-smri/house-of-representatives", "project_name": "house-of-representatives", "stargazers_count": 167, "source": "GitHub", "score": 0.16235297391000134, "first_commit": "2022-04-18 14:41:05", "latest_commit": "2024-08-15 14:09:44", "languages": [ "JavaScript", "Python" ], "model_or_dataset": "dataset" }, { "description": "JMdict and JMnedict in JSON format", "url": "https://github.com/scriptin/jmdict-simplified", "project_name": "jmdict-simplified", "stargazers_count": 166, "source": "GitHub", "score": 0.15938232613435438, "first_commit": "2016-02-07 00:30:44", "latest_commit": "2024-08-12 12:21:03", "languages": [ "Kotlin", "JavaScript", "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "ディープラーニングモデルの性能を体系的に最大化するためのプレイブック", "url": "https://github.com/Valkyrja3607/tuning_playbook_ja", "project_name": "tuning_playbook_ja", "stargazers_count": 161, "source": "GitHub", "score": 0.14452908725611952, "first_commit": "2023-01-22 02:19:42", "latest_commit": "2023-01-22 22:10:48", "languages": [], "model_or_dataset": null }, { "description": "A Japanese DistilBERT pretrained model, which was trained on Wikipedia.", "url": "https://github.com/BandaiNamcoResearchInc/DistilBERT-base-jp", "project_name": "DistilBERT-base-jp", "stargazers_count": 160, "source": "GitHub", "score": 0.14155843948047256, "first_commit": "2020-04-22 16:17:15", "latest_commit": "2020-04-22 16:24:26", "languages": [], "model_or_dataset": "model" }, { "description": "japanese-gpt-1b This repository provides a 1.3B-parameter Japanese GPT model.", "url": "https://huggingface.co/rinna/japanese-gpt-1b", "project_name": "japanese-gpt-1b", "downloads": 25320, "source": "Hugging Face", "score": 0.13942143258188813, "first_commit": "2022-01-20 02:30:19", "latest_commit": "2024-07-20 07:52:31", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel" }, { "description": "日本語における不適切表現を収集します。自然言語処理の時のデータクリーニング用等に使えると思います。", "url": "https://github.com/MosasoM/inappropriate-words-ja", "project_name": "inappropriate-words-ja", "stargazers_count": 159, "source": "GitHub", "score": 0.13858779170482557, "first_commit": "2020-08-17 13:38:34", "latest_commit": "2021-12-02 00:33:00", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "JapaneseEmbeddingEval", "url": "https://github.com/oshizo/JapaneseEmbeddingEval", "project_name": "JapaneseEmbeddingEval", "stargazers_count": 157, "source": "GitHub", "score": 0.13264649615353163, "first_commit": "2023-01-07 13:35:18", "latest_commit": "2024-04-06 12:45:12", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A free online, self-hostable, multilang Japanese dictionary.", "url": "https://github.com/WeDontPanic/Jotoba", "project_name": "Jotoba", "stargazers_count": 157, "source": "GitHub", "score": 0.13264649615353163, "first_commit": "2021-04-15 11:08:23", "latest_commit": "2024-01-22 17:06:42", "languages": [ "Rust", "JavaScript" ], "model_or_dataset": null }, { "description": "Japanese negative positive classification.日本語文書のネガポジを判定。", "url": "https://github.com/liaoziyang/negapoji", "project_name": "negapoji", "stargazers_count": 151, "source": "GitHub", "score": 0.11482260949964981, "first_commit": "2017-08-17 17:23:49", "latest_commit": "2017-08-20 18:12:51", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese dictation kit using Julius", "url": "https://github.com/julius-speech/dictation-kit", "project_name": "dictation-kit", "stargazers_count": 151, "source": "GitHub", "score": 0.11482260949964981, "first_commit": "2015-10-03 17:08:52", "latest_commit": "2019-04-18 11:41:47", "languages": [ "Python", "Perl" ], "model_or_dataset": null }, { "description": "Japanese word embedding with Sudachi and NWJC", "url": "https://github.com/WorksApplications/chiVe", "project_name": "chiVe", "stargazers_count": 151, "source": "GitHub", "score": 0.11482260949964981, "first_commit": "2019-11-18 15:39:08", "latest_commit": "2024-03-01 17:50:40", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Phishing URL dataset from JPCERT/CC", "url": "https://github.com/JPCERTCC/phishurl-list", "project_name": "phishurl-list", "stargazers_count": 150, "source": "GitHub", "score": 0.11185196172400284, "first_commit": "2022-08-05 15:20:50", "latest_commit": "2024-05-02 04:51:47", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "WRIME: 主観と客観の感情分析データセット", "url": "https://github.com/ids-cv/wrime", "project_name": "wrime", "stargazers_count": 145, "source": "GitHub", "score": 0.09699872284576799, "first_commit": "2020-08-18 14:13:44", "latest_commit": "2023-01-15 13:44:21", "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese sentiment analyzer implemented in Python.", "url": "https://github.com/Hironsan/asari", "project_name": "asari", "stargazers_count": 143, "source": "GitHub", "score": 0.09105742729447405, "first_commit": "2019-02-06 14:17:47", "latest_commit": "2022-10-19 07:38:31", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Shell command launcher with natural language", "url": "https://github.com/hirokidaichi/wanna", "project_name": "wanna", "stargazers_count": 143, "source": "GitHub", "score": 0.09105742729447405, "first_commit": "2023-03-01 18:10:59", "latest_commit": "2023-04-02 00:25:56", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A fast converter between Japanese hankaku and zenkaku characters", "url": "https://github.com/studio-ousia/mojimoji", "project_name": "mojimoji", "stargazers_count": 142, "source": "GitHub", "score": 0.08808677951882708, "first_commit": "2013-11-03 01:38:05", "latest_commit": "2024-01-12 19:24:55", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "JP Language Model Evaluation Harness", "url": "https://github.com/Stability-AI/lm-evaluation-harness/tree/jp-stable", "project_name": "jp-stable", "stargazers_count": 142, "source": "GitHub", "score": 0.08808677951882708, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "日本語に翻訳したStanford Alpacaのデータセットを用いてLLaMAをファインチューニングし作成したLow-Rank AdapterのリンクとGenerateサンプルコード", "url": "https://github.com/kunishou/Japanese-Alpaca-LoRA", "project_name": "Japanese-Alpaca-LoRA", "stargazers_count": 142, "source": "GitHub", "score": 0.08808677951882708, "first_commit": "2023-03-22 23:04:56", "latest_commit": "2023-04-03 05:31:02", "languages": [ "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "rinna/japanese-clip-vit-b-16", "url": "https://huggingface.co/rinna/japanese-clip-vit-b-16", "project_name": "japanese-clip-vit-b-16", "downloads": 18300, "source": "Hugging Face", "score": 0.07616408642178402, "first_commit": "2022-04-27 07:52:33", "latest_commit": "2024-07-20 08:42:32", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLIPModel" }, { "description": "A set of metrics for feature selection from text data", "url": "https://github.com/Kensuke-Mitsuzawa/JapaneseTokenizers", "project_name": "JapaneseTokenizers", "stargazers_count": 136, "source": "GitHub", "score": 0.07026289286494525, "first_commit": "2015-09-01 19:27:42", "latest_commit": "2019-03-25 16:52:52", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "bert-finetuned-japanese-sentiment This model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on product amazon reviews japanese dataset.", "url": "https://huggingface.co/christian-phu/bert-finetuned-japanese-sentiment", "project_name": "bert-finetuned-japanese-sentiment", "downloads": 17279, "source": "Hugging Face", "score": 0.0669638370728572, "first_commit": "2023-04-06 16:43:51", "latest_commit": "2023-04-07 17:27:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "OpenCALM-3B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-3b", "project_name": "open-calm-3b", "downloads": 17242, "source": "Hugging Face", "score": 0.06663042940791078, "first_commit": "2023-05-15 07:14:36", "latest_commit": "2023-05-18 01:11:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "chakki's Aspect-Based Sentiment Analysis dataset", "url": "https://github.com/chakki-works/chABSA-dataset", "project_name": "chABSA-dataset", "stargazers_count": 134, "source": "GitHub", "score": 0.06432159731365131, "first_commit": "2017-12-27 11:51:01", "latest_commit": "2018-09-11 12:28:06", "languages": [ "Python", "Jupyter Notebook", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "自然言語で書かれた時間情報表現を抽出/規格化するルールベースの解析器", "url": "https://github.com/yagays/ja-timex", "project_name": "ja-timex", "stargazers_count": 132, "source": "GitHub", "score": 0.058380301762357374, "first_commit": "2021-07-18 20:38:04", "latest_commit": "2023-11-04 14:58:31", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Please feel free to open an issue or pull request.", "url": "https://huggingface.co/datasets/shunk031/JGLUE", "project_name": "JGLUE", "downloads": 16266, "source": "Hugging Face", "score": 0.05783567586770257, "first_commit": "2023-02-25 13:33:13", "latest_commit": "2024-05-21 11:23:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "A JSON kanji dataset with updated JLPT levels and WaniKani information", "url": "https://github.com/davidluzgouveia/kanji-data", "project_name": "kanji-data", "stargazers_count": 130, "source": "GitHub", "score": 0.05243900621106343, "first_commit": "2019-02-28 05:53:36", "latest_commit": "2019-12-29 12:12:54", "languages": [ "Python", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "A list of pre-trained BERT models for Japanese with word/subword tokenization + vocabulary construction algorithm information", "url": "https://github.com/himkt/awesome-bert-japanese", "project_name": "awesome-bert-japanese", "stargazers_count": 130, "source": "GitHub", "score": 0.05243900621106343, "first_commit": "2020-07-27 00:10:39", "latest_commit": "2023-03-15 18:51:20", "languages": [], "model_or_dataset": null }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット", "url": "https://github.com/stockmarkteam/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "stargazers_count": 127, "source": "GitHub", "score": 0.04352706288412252, "first_commit": "2020-12-15 01:52:19", "latest_commit": "2023-09-02 23:44:38", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "GIS & Archaeological Simulator. 2023 in development.", "url": "https://github.com/AsPJT/PAX_SAPIENTICA", "project_name": "PAX_SAPIENTICA", "stargazers_count": 127, "source": "GitHub", "score": 0.04352706288412252, "first_commit": "2022-06-24 23:18:02", "latest_commit": "2024-08-13 01:06:14", "languages": [ "C++", "Java", "Python" ], "model_or_dataset": "dataset" }, { "description": "Node.js module for converting Japanese Hiragana and Katakana script to, and from, Romaji using Hepburn romanisation", "url": "https://github.com/lovell/hepburn", "project_name": "hepburn", "stargazers_count": 126, "source": "GitHub", "score": 0.040556415108475546, "first_commit": "2013-06-28 03:06:52", "latest_commit": "2023-09-08 09:11:19", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese", "project_name": "t5-base-japanese", "downloads": 13645, "source": "Hugging Face", "score": 0.03421779776433607, "first_commit": "2021-03-28 10:54:32", "latest_commit": "2022-07-31 08:20:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5Model" }, { "description": "Kanji usage frequency data collected from various sources", "url": "https://github.com/scriptin/kanji-frequency", "project_name": "kanji-frequency", "stargazers_count": 123, "source": "GitHub", "score": 0.031644471781534636, "first_commit": "2016-01-23 01:19:08", "latest_commit": "2024-07-13 03:51:19", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "deep-learning-with-pytorchの日本語版repositoryです。", "url": "https://github.com/Gin5050/deep-learning-with-pytorch-ja", "project_name": "deep-learning-with-pytorch-ja", "stargazers_count": 121, "source": "GitHub", "score": 0.025703176230240692, "first_commit": "2020-12-05 15:15:16", "latest_commit": "2021-05-13 11:02:11", "languages": [ "Jupyter Notebook", "Python", "C++", "Java" ], "model_or_dataset": null }, { "description": "A unified language analyzer for Japanese", "url": "https://github.com/ku-nlp/kwja", "project_name": "kwja", "stargazers_count": 119, "source": "GitHub", "score": 0.019761880678946752, "first_commit": "2022-05-25 16:09:37", "latest_commit": "2024-08-06 23:13:59", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings", "url": "https://github.com/neocl/jamdict", "project_name": "jamdict", "stargazers_count": 117, "source": "GitHub", "score": 0.01382058512765281, "first_commit": "2016-10-25 10:47:58", "latest_commit": "2021-06-06 12:04:03", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "LINE DistilBERT Japanese This is a DistilBERT model pre-trained on 131 GB of Japanese web text.", "url": "https://huggingface.co/line-corporation/line-distilbert-base-japanese", "project_name": "line-distilbert-base-japanese", "downloads": 11371, "source": "Hugging Face", "score": 0.01372674289708867, "first_commit": "2023-03-10 10:23:54", "latest_commit": "2023-12-01 09:50:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForMaskedLM" }, { "description": "Tanuki-8B-dpo-v1.0 モデルについて Tanuki-8Bは、フルスクラッチで約1.3Tトークン事前学習を行った約8Bパラメータの大規模言語モデルです。", "url": "https://huggingface.co/weblab-GENIAC/Tanuki-8B-dpo-v1.0", "project_name": "Tanuki-8B-dpo-v1.0", "downloads": 11241, "source": "Hugging Face", "score": 0.012555310560790447, "first_commit": "2024-08-12 12:47:52", "latest_commit": "2024-09-02 23:47:02", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": "LlamaForCausalLM" }, { "description": "lists of text corpus and more (mainly Japanese)", "url": "https://github.com/ikegami-yukino/dataset-list", "project_name": "dataset-list", "stargazers_count": 116, "source": "GitHub", "score": 0.01084993735200584, "first_commit": "2015-11-21 22:38:56", "latest_commit": "2024-07-26 00:40:50", "languages": [], "model_or_dataset": null }, { "description": "A comparison tool of Japanese tokenizers", "url": "https://github.com/taishi-i/toiro", "project_name": "toiro", "stargazers_count": 115, "source": "GitHub", "score": 0.007879289576358868, "first_commit": "2020-08-13 19:49:15", "latest_commit": "2023-07-31 23:55:55", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "日本語WikipediaコーパスでBERTのPre-Trainedモデルを生成するためのリポジトリ", "url": "https://github.com/Kosuke-Szk/ja_text_bert", "project_name": "ja_text_bert", "stargazers_count": 115, "source": "GitHub", "score": 0.007879289576358868, "first_commit": "2018-10-31 18:23:04", "latest_commit": "2018-11-08 15:17:20", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": "model" }, { "description": "General-purpose Swich transformer based Japanese language mode", "url": "https://github.com/tanreinama/GPTSAN", "project_name": "GPTSAN", "stargazers_count": 115, "source": "GitHub", "score": 0.007879289576358868, "first_commit": "2022-02-11 14:38:55", "latest_commit": "2023-09-13 12:20:29", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Model Card for Japanese character-level DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese-char-wwm", "project_name": "deberta-v2-large-japanese-char-wwm", "downloads": 10054, "source": "Hugging Face", "score": 0.001859232228590507, "first_commit": "2023-03-09 10:13:05", "latest_commit": "2023-09-15 03:48:28", "languages": [], "model_or_dataset": "model", "model_size": 0.33, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "License:CreativeML Open RAIL-M Additional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of May 31, 2023 このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。 ", "url": "https://huggingface.co/sazyou-roukaku/BracingEvoMix", "project_name": "BracingEvoMix", "downloads": 9737, "source": "Hugging Face", "score": -0.000997260468382855, "first_commit": "2023-05-31 10:29:16", "latest_commit": "2023-10-01 08:58:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "複数の前処理を構成して管理するテキスト前処理ツール", "url": "https://github.com/HojiChar/HojiChar", "project_name": "HojiChar", "stargazers_count": 112, "source": "GitHub", "score": -0.001032653750582044, "first_commit": "2023-01-31 20:37:40", "latest_commit": "2024-08-15 17:25:48", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Emotion analyzer for Japanese text", "url": "https://github.com/ikegami-yukino/pymlask", "project_name": "pymlask", "stargazers_count": 112, "source": "GitHub", "score": -0.001032653750582044, "first_commit": "2017-02-10 21:33:23", "latest_commit": "2024-07-26 01:27:14", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Stable LM Base Gamma 7B Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-gamma-7b", "project_name": "japanese-stablelm-base-gamma-7b", "downloads": 9522, "source": "Hugging Face", "score": -0.0029346293322606875, "first_commit": "2023-10-16 08:15:14", "latest_commit": "2024-01-25 08:05:12", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Llama3 Swallow", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-8B-Instruct-v0.1", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1", "downloads": 8981, "source": "Hugging Face", "score": -0.007809590054855605, "first_commit": "2024-06-26 04:11:25", "latest_commit": "2024-07-06 15:02:39", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "A fast LSTM Language Model for large vocabulary language like Japanese and Chinese", "url": "https://github.com/jiali-ms/JLM", "project_name": "JLM", "stargazers_count": 109, "source": "GitHub", "score": -0.009944597077522956, "first_commit": "2018-01-10 14:12:41", "latest_commit": "2019-06-04 21:35:33", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語T5モデル", "url": "https://github.com/sonoisa/t5-japanese", "project_name": "t5-japanese", "stargazers_count": 109, "source": "GitHub", "score": -0.009944597077522956, "first_commit": "2021-03-28 22:12:19", "latest_commit": "2023-07-20 21:36:10", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A Japanese accent dictionary generator", "url": "https://github.com/PKSHATechnology-Research/tdmelodic", "project_name": "tdmelodic", "stargazers_count": 109, "source": "GitHub", "score": -0.009944597077522956, "first_commit": "2020-09-14 18:12:46", "latest_commit": "2024-03-22 01:44:10", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "このドキュメントの日本語版はまだ作成中です。", "url": "https://huggingface.co/bclavie/JaColBERT", "project_name": "JaColBERT", "downloads": 8689, "source": "Hugging Face", "score": -0.010440807302540848, "first_commit": "2023-12-25 22:43:54", "latest_commit": "2024-01-27 15:30:00", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "HF_ColBERT" }, { "description": "hotchpotch/japanese-reranker-cross-encoder-small-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-small-v1", "project_name": "japanese-reranker-cross-encoder-small-v1", "downloads": 8532, "source": "Hugging Face", "score": -0.011855537124070241, "first_commit": "2024-03-28 04:31:45", "latest_commit": "2024-04-01 02:39:19", "languages": [], "model_or_dataset": "model", "model_size": 0.11800000000000001, "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "transformers-ud-japanese-electra-ginza-510 (sudachitra-wordpiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-ginza-510", "project_name": "transformers-ud-japanese-electra-base-ginza-510", "downloads": 8432, "source": "Hugging Face", "score": -0.012756638921222722, "first_commit": "2021-12-05 11:31:57", "latest_commit": "2021-12-05 21:12:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraModel" }, { "description": "Yet Another Japanese Dependency Structure Analyzer", "url": "https://github.com/taku910/cabocha", "project_name": "cabocha", "stargazers_count": 108, "source": "GitHub", "score": -0.012915244853169926, "first_commit": "2011-07-29 04:08:14", "latest_commit": "2020-07-24 11:31:47", "languages": [ "C++", "Java", "Perl", "Python", "Ruby", "C" ], "model_or_dataset": null }, { "description": "Japanese text8 corpus for word embedding.", "url": "https://github.com/Hironsan/ja.text8", "project_name": "ja.text8", "stargazers_count": 108, "source": "GitHub", "score": -0.012915244853169926, "first_commit": "2017-10-04 13:15:25", "latest_commit": "2017-10-04 13:38:23", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "BERT large Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-v2", "project_name": "bert-large-japanese-v2", "downloads": 8143, "source": "Hugging Face", "score": -0.015360823114993389, "first_commit": "2023-05-19 00:40:35", "latest_commit": "2023-05-19 00:47:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "Swallow-MX-8x7b-NVE-v0.1 Our Swallow-MX-8x7b-NVE-v0.1 model has undergone continuous pre-training from the Mixtral-8x7B-Instruct-v0.1, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1", "project_name": "Swallow-MX-8x7b-NVE-v0.1", "downloads": 8097, "source": "Hugging Face", "score": -0.01577532994168353, "first_commit": "2024-02-22 04:44:42", "latest_commit": "2024-05-03 18:51:12", "languages": [], "model_or_dataset": "model", "model_size": 46.7, "model_architectures": "MixtralForCausalLM" }, { "description": "Japanese Kana Kanji conversion input method library", "url": "https://github.com/ueno/libkkc", "project_name": "libkkc", "stargazers_count": 106, "source": "GitHub", "score": -0.018856540404463867, "first_commit": "2012-08-07 17:45:52", "latest_commit": "2024-09-02 12:08:48", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension (2022, Skelter Labs)", "url": "https://github.com/SkelterLabsInc/JaQuAD", "project_name": "JaQuAD", "stargazers_count": 106, "source": "GitHub", "score": -0.018856540404463867, "first_commit": "2022-01-11 16:58:07", "latest_commit": "2022-02-04 11:42:51", "languages": [ "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-hf", "project_name": "Swallow-7b-hf", "downloads": 7650, "source": "Hugging Face", "score": -0.019803254974955116, "first_commit": "2023-11-25 10:09:49", "latest_commit": "2024-06-29 08:56:17", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM" }, { "description": "Yet another mecab wrapper for nodejs", "url": "https://github.com/golbin/node-mecab-ya", "project_name": "node-mecab-ya", "stargazers_count": 105, "source": "GitHub", "score": -0.02182718818011084, "first_commit": "2016-04-09 23:34:34", "latest_commit": "2021-06-18 18:30:43", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "luke-japanese-large luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-large", "project_name": "luke-japanese-large", "downloads": 7114, "source": "Hugging Face", "score": -0.02463316060769241, "first_commit": "2022-11-07 14:25:53", "latest_commit": "2022-11-09 11:18:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM" }, { "description": "japanese-gpt2-medium This repository provides a medium-sized Japanese GPT-2 model.", "url": "https://huggingface.co/rinna/japanese-gpt2-medium", "project_name": "japanese-gpt2-medium", "downloads": 6790, "source": "Hugging Face", "score": -0.027552730430466445, "first_commit": "2021-04-05 02:01:26", "latest_commit": "2024-07-20 07:50:47", "languages": [], "model_or_dataset": "model", "model_size": 0.361, "model_architectures": "GPT2LMHeadModel" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-hf", "project_name": "Swallow-7b-instruct-hf", "downloads": 6727, "source": "Hugging Face", "score": -0.028120424562672507, "first_commit": "2023-12-07 02:18:36", "latest_commit": "2024-06-29 08:56:26", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM" }, { "description": "hotchpotch/japanese-reranker-cross-encoder-xsmall-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-xsmall-v1", "project_name": "japanese-reranker-cross-encoder-xsmall-v1", "downloads": 6610, "source": "Hugging Face", "score": -0.02917471366534091, "first_commit": "2024-03-28 04:29:26", "latest_commit": "2024-06-10 03:57:05", "languages": [], "model_or_dataset": "model", "model_size": 0.107, "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.", "url": "https://github.com/MorinoseiMorizo/jparacrawl-finetune", "project_name": "jparacrawl-finetune", "stargazers_count": 102, "source": "GitHub", "score": -0.03073913150705175, "first_commit": "2019-11-17 14:46:58", "latest_commit": "2021-04-29 14:27:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A desktop language immersion companion for learners of Japanese", "url": "https://github.com/fauu/Kamite", "project_name": "Kamite", "stargazers_count": 102, "source": "GitHub", "score": -0.03073913150705175, "first_commit": "2022-07-08 19:26:15", "latest_commit": "2024-02-22 23:41:52", "languages": [ "Java", "TypeScript", "JavaScript", "Rust", "C++", "Python" ], "model_or_dataset": null }, { "description": "参議院の公式ウェブサイトから会派、議員、議案、質問主意書のデータを整理しました。", "url": "https://github.com/smartnews-smri/house-of-councillors", "project_name": "house-of-councillors", "stargazers_count": 102, "source": "GitHub", "score": -0.03073913150705175, "first_commit": "2022-06-16 16:06:39", "latest_commit": "2024-08-15 14:22:06", "languages": [ "JavaScript", "Python" ], "model_or_dataset": "dataset" }, { "description": "CJK computer science terms comparison / 中日韓電腦科學術語對照 / 日中韓のコンピュータ科学の用語対照 / 한·중·일 전산학 용어 대조", "url": "https://github.com/dahlia/cjk-compsci-terms", "project_name": "cjk-compsci-terms", "stargazers_count": 101, "source": "GitHub", "score": -0.03370977928269872, "first_commit": "2020-12-19 06:11:14", "latest_commit": "2022-09-16 01:27:59", "languages": [ "Python", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "Japanese Word Similarity Dataset", "url": "https://github.com/tmu-nlp/JapaneseWordSimilarityDataset", "project_name": "JapaneseWordSimilarityDataset", "stargazers_count": 101, "source": "GitHub", "score": -0.03370977928269872, "first_commit": "2016-01-08 16:25:16", "latest_commit": "2021-12-07 12:22:07", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Trying to consolidate japanese phonetic, and in particular pitch accent resources into one list", "url": "https://github.com/olety/japanese-pitch-accent-resources", "project_name": "japanese-pitch-accent-resources", "stargazers_count": 101, "source": "GitHub", "score": -0.03370977928269872, "first_commit": "2018-03-03 15:51:09", "latest_commit": "2024-02-11 00:55:27", "languages": [], "model_or_dataset": null }, { "description": "LaBSE Model description Language-agnostic BERT Sentence Encoder (LaBSE) is a BERT-based model trained for sentence embedding for 109 languages.", "url": "https://huggingface.co/setu4993/LaBSE", "project_name": "LaBSE", "downloads": 5692, "source": "Hugging Face", "score": -0.03744682816320068, "first_commit": "2021-01-11 06:06:51", "latest_commit": "2023-10-18 23:23:16", "languages": [], "model_or_dataset": "model", "model_size": 0.47100000000000003, "model_architectures": "BertModel" }, { "description": "japanese-sentiment-analysis This model was trained from scratch on the chABSA dataset.", "url": "https://huggingface.co/jarvisx17/japanese-sentiment-analysis", "project_name": "japanese-sentiment-analysis", "downloads": 5280, "source": "Hugging Face", "score": -0.041159367567468894, "first_commit": "2022-11-15 06:28:39", "latest_commit": "2024-01-20 14:45:14", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "Asynchronous japanese morphological analyser using MeCab.", "url": "https://github.com/hecomi/node-mecab-async", "project_name": "node-mecab-async", "stargazers_count": 98, "source": "GitHub", "score": -0.04262172260963963, "first_commit": "2012-10-29 09:32:49", "latest_commit": "2017-10-29 14:56:11", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "japanese android/cli/web dictionary based on jmdict/kanjidic — 日本語 辞典 和英辞典 漢英字典 和独辞典 和蘭辞典", "url": "https://github.com/obfusk/jiten", "project_name": "jiten", "stargazers_count": 98, "source": "GitHub", "score": -0.04262172260963963, "first_commit": "2020-06-15 02:09:21", "latest_commit": "2023-12-16 23:43:06", "languages": [ "Python", "JavaScript", "C" ], "model_or_dataset": "dataset" }, { "description": "Llama3 Swallow", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-8B-v0.1", "project_name": "Llama-3-Swallow-8B-v0.1", "downloads": 5054, "source": "Hugging Face", "score": -0.0431958576290335, "first_commit": "2024-05-20 06:36:00", "latest_commit": "2024-07-01 06:24:48", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "bert-base-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-upos", "project_name": "bert-base-japanese-upos", "downloads": 5012, "source": "Hugging Face", "score": -0.043574320383837545, "first_commit": "2021-08-26 23:02:50", "latest_commit": "2022-09-18 19:43:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "Kotoba-Whisper-v1.1 Kotoba-Whisper-v1.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v1.0, with additional postprocessing stacks integrated as pipeline.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1", "project_name": "kotoba-whisper-v1.1", "downloads": 4957, "source": "Hugging Face", "score": -0.04406992637227141, "first_commit": "2024-04-29 14:53:48", "latest_commit": "2024-05-08 15:34:40", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "CyberAgentLM2-7B (CALM2-7B)", "url": "https://huggingface.co/cyberagent/calm2-7b", "project_name": "calm2-7b", "downloads": 4909, "source": "Hugging Face", "score": -0.0445024552349046, "first_commit": "2023-11-01 07:24:59", "latest_commit": "2023-11-02 05:46:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-roberta-base This repository provides a base-sized Japanese RoBERTa model.", "url": "https://huggingface.co/rinna/japanese-roberta-base", "project_name": "japanese-roberta-base", "downloads": 4639, "source": "Hugging Face", "score": -0.0469354300872163, "first_commit": "2021-06-11 02:56:39", "latest_commit": "2024-07-20 07:44:40", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "RobertaForMaskedLM" }, { "description": "sbert-jsnli-luke-japanese-base-lite This is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.", "url": "https://huggingface.co/oshizo/sbert-jsnli-luke-japanese-base-lite", "project_name": "sbert-jsnli-luke-japanese-base-lite", "downloads": 4316, "source": "Hugging Face", "score": -0.04984598889201881, "first_commit": "2023-01-10 11:53:15", "latest_commit": "2023-01-10 12:36:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel" }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-base", "project_name": "luke-japanese-base", "downloads": 4259, "source": "Hugging Face", "score": -0.05035961691639572, "first_commit": "2022-10-25 06:30:23", "latest_commit": "2022-11-09 15:23:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM" }, { "description": "Fugaku-LLM-13B-instruct-gguf Fugaku-LLMさんが公開しているFugaku-LLM-13B-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Fugaku-LLM-13B-instruct-gguf", "project_name": "Fugaku-LLM-13B-instruct-gguf", "downloads": 4256, "source": "Hugging Face", "score": -0.050386649970310296, "first_commit": "2024-05-10 16:43:49", "latest_commit": "2024-05-12 06:06:51", "languages": [], "model_or_dataset": "model", "model_size": 13.4, "model_architectures": null }, { "description": "このモデルはLuke-japanese-large-liteをファインチューニングしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime", "project_name": "luke-japanese-large-sentiment-analysis-wrime", "downloads": 4143, "source": "Hugging Face", "score": -0.0514048950010926, "first_commit": "2023-03-13 12:40:08", "latest_commit": "2023-05-15 12:58:08", "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": "LukeForSequenceClassification" }, { "description": "luke-japanese luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-base-lite", "project_name": "luke-japanese-base-lite", "downloads": 4137, "source": "Hugging Face", "score": -0.051458961108921744, "first_commit": "2022-10-25 09:27:16", "latest_commit": "2022-11-09 15:22:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM" }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers fugashi sentencepiece unidic-lite Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-large", "project_name": "ruri-large", "downloads": 3806, "source": "Hugging Face", "score": -0.054441608057496454, "first_commit": "2024-08-28 17:11:42", "latest_commit": "2024-09-04 08:49:10", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertModel" }, { "description": "Japanese Company Lexicon (JCLdic)", "url": "https://github.com/chakki-works/Japanese-Company-Lexicon", "project_name": "Japanese-Company-Lexicon", "stargazers_count": 94, "source": "GitHub", "score": -0.05450431371222752, "first_commit": "2020-01-16 15:25:09", "latest_commit": "2023-01-21 14:50:18", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Windows/macOSで使える原神の単語辞書です", "url": "https://github.com/kotofurumiya/genshin-dict", "project_name": "genshin-dict", "stargazers_count": 94, "source": "GitHub", "score": -0.05450431371222752, "first_commit": "2021-05-05 00:02:08", "latest_commit": "2024-07-15 16:47:41", "languages": [ "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "clip-japanese-base This is a Japanese CLIP (Contrastive Language-Image Pre-training) model developed by LY Corporation.", "url": "https://huggingface.co/line-corporation/clip-japanese-base", "project_name": "clip-japanese-base", "downloads": 3590, "source": "Hugging Face", "score": -0.05638798793934581, "first_commit": "2024-04-24 01:36:22", "latest_commit": "2024-05-10 03:07:04", "languages": [], "model_or_dataset": "model", "model_size": 0.197, "model_architectures": "CLYPModel" }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-chat-v0.1", "project_name": "karakuri-lm-70b-chat-v0.1", "downloads": 3558, "source": "Hugging Face", "score": -0.056676340514434605, "first_commit": "2024-01-26 09:08:09", "latest_commit": "2024-05-07 09:00:17", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM" }, { "description": "bilingual-gpt-neox-4b Overview This repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b", "project_name": "bilingual-gpt-neox-4b", "downloads": 3490, "source": "Hugging Face", "score": -0.057289089736498294, "first_commit": "2023-07-31 02:34:03", "latest_commit": "2024-07-20 08:02:07", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "OpenCALM-Small Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-small", "project_name": "open-calm-small", "downloads": 3484, "source": "Hugging Face", "score": -0.05734315584432744, "first_commit": "2023-05-15 06:40:15", "latest_commit": "2023-05-18 01:10:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Japanese Morphological Analyzer written in Rust", "url": "https://github.com/togatoga/kanpyo", "project_name": "kanpyo", "stargazers_count": 93, "source": "GitHub", "score": -0.05747496148787449, "first_commit": "2023-10-12 08:02:23", "latest_commit": "2024-08-20 08:56:37", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "rinna/youri-7b Overview We conduct continual pre-training of llama2-7b on 40B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/youri-7b", "project_name": "youri-7b", "downloads": 3330, "source": "Hugging Face", "score": -0.05873085261194226, "first_commit": "2023-10-30 15:12:17", "latest_commit": "2024-07-22 08:01:22", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3.1-70B-Japanese-Instruct-2407-gguf cyberagentさんが公開しているLlama-3.1-70B-Japanese-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-Japanese-Instruct-2407-gguf", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407-gguf", "downloads": 3245, "source": "Hugging Face", "score": -0.05949678913952187, "first_commit": "2024-07-26 09:05:34", "latest_commit": "2024-07-27 05:59:10", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "This dataset was created by automatically translating \"databricks-dolly-15k\" into Japanese.", "url": "https://huggingface.co/datasets/kunishou/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "downloads": 3216, "source": "Hugging Face", "score": -0.059758108660696085, "first_commit": "2023-04-13 08:31:08", "latest_commit": "2024-04-01 17:26:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Llama 3 Youko 8B (rinna/llama-3-youko-8b)", "url": "https://huggingface.co/rinna/llama-3-youko-8b", "project_name": "llama-3-youko-8b", "downloads": 3182, "source": "Hugging Face", "score": -0.06006448327172793, "first_commit": "2024-05-01 07:53:46", "latest_commit": "2024-07-25 05:14:42", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "50k English-Japanese Parallel Corpus for Machine Translation Benchmark.", "url": "https://github.com/odashi/small_parallel_enja", "project_name": "small_parallel_enja", "stargazers_count": 92, "source": "GitHub", "score": -0.06044560926352146, "first_commit": "2016-10-27 12:27:03", "latest_commit": "2019-09-11 14:00:17", "languages": [], "model_or_dataset": "dataset" }, { "description": "rinna/japanese-wav2vec2-base Overview This is a Japanese wav2vec 2.0 Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-wav2vec2-base", "project_name": "japanese-wav2vec2-base", "downloads": 3105, "source": "Hugging Face", "score": -0.06075833165553534, "first_commit": "2024-03-06 01:07:56", "latest_commit": "2024-07-22 08:11:46", "languages": [], "model_or_dataset": "model", "model_size": 0.095, "model_architectures": "Wav2Vec2ForPreTraining" }, { "description": "japanese-gpt2-xsmall", "url": "https://huggingface.co/rinna/japanese-gpt2-xsmall", "project_name": "japanese-gpt2-xsmall", "downloads": 3093, "source": "Hugging Face", "score": -0.06086646387119364, "first_commit": "2021-07-26 02:52:54", "latest_commit": "2024-07-20 07:48:11", "languages": [], "model_or_dataset": "model", "model_size": 0.0437, "model_architectures": "GPT2LMHeadModel" }, { "description": "japanese-gpt-neox-3.6b Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b", "project_name": "japanese-gpt-neox-3.6b", "downloads": 3052, "source": "Hugging Face", "score": -0.06123591560802615, "first_commit": "2023-05-17 02:16:45", "latest_commit": "2024-07-20 07:55:19", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "OpenCALM-7B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-7b", "project_name": "open-calm-7b", "downloads": 3007, "source": "Hugging Face", "score": -0.06164141141674477, "first_commit": "2023-05-15 07:53:34", "latest_commit": "2023-05-18 01:12:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Japanese-StableLM-Base-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-7b", "project_name": "japanese-stablelm-base-beta-7b", "downloads": 2991, "source": "Hugging Face", "score": -0.061785587704289165, "first_commit": "2023-10-30 07:43:36", "latest_commit": "2023-12-19 06:43:01", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/japanese-hubert-base Overview This is a Japanese HuBERT Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-hubert-base", "project_name": "japanese-hubert-base", "downloads": 2889, "source": "Hugging Face", "score": -0.0627047115373847, "first_commit": "2023-04-28 07:39:44", "latest_commit": "2024-07-20 08:55:38", "languages": [], "model_or_dataset": "model", "model_size": 0.09440000000000001, "model_architectures": "HubertModel" }, { "description": "This is a Japanese sentence-LUKE model.", "url": "https://huggingface.co/sonoisa/sentence-luke-japanese-base-lite", "project_name": "sentence-luke-japanese-base-lite", "downloads": 2852, "source": "Hugging Face", "score": -0.06303811920233111, "first_commit": "2023-03-19 14:44:42", "latest_commit": "2023-03-20 01:32:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel" }, { "description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.", "url": "https://github.com/buruzaemon/natto-py", "project_name": "natto-py", "stargazers_count": 91, "source": "GitHub", "score": -0.06341625703916844, "first_commit": "2014-10-24 20:56:40", "latest_commit": "2023-11-04 21:25:21", "languages": [ "Python", "JavaScript", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A* CCG Parser with a Supertag and Dependency Factored Model", "url": "https://github.com/masashi-y/depccg", "project_name": "depccg", "stargazers_count": 91, "source": "GitHub", "score": -0.06341625703916844, "first_commit": "2016-10-06 14:39:12", "latest_commit": "2023-08-26 16:03:23", "languages": [ "Python", "C" ], "model_or_dataset": null }, { "description": "Dictionary based Sentiment Analysis for Japanese", "url": "https://github.com/ikegami-yukino/oseti", "project_name": "oseti", "stargazers_count": 91, "source": "GitHub", "score": -0.06341625703916844, "first_commit": "2019-02-12 02:03:26", "latest_commit": "2024-01-12 07:14:53", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "stockmark/stockmark-13b Stockmark-13b is a 13 billion parameter LLM pretrained from scratch based on Japanese corpus of about 220B tokens.", "url": "https://huggingface.co/stockmark/stockmark-13b", "project_name": "stockmark-13b", "downloads": 2809, "source": "Hugging Face", "score": -0.06342559297510668, "first_commit": "2023-10-21 06:53:06", "latest_commit": "2024-05-17 06:15:56", "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": "LlamaForCausalLM" }, { "description": "Model Card for Japanese DeBERTa V3 base Model description This is a Japanese DeBERTa V3 base model pre-trained on LLM-jp corpus v1.0.", "url": "https://huggingface.co/ku-nlp/deberta-v3-base-japanese", "project_name": "deberta-v3-base-japanese", "downloads": 2789, "source": "Hugging Face", "score": -0.06360581333453717, "first_commit": "2024-04-23 05:08:21", "latest_commit": "2024-04-28 06:08:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "japanese-gpt-neox-3.6b-instruction-ppo Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-ppo", "project_name": "japanese-gpt-neox-3.6b-instruction-ppo", "downloads": 2766, "source": "Hugging Face", "score": -0.06381306674788224, "first_commit": "2023-05-30 01:50:48", "latest_commit": "2024-07-20 07:58:49", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "BERT base Japanese (character tokenization, whole word masking enabled)", "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-whole-word-masking", "project_name": "bert-base-japanese-char-whole-word-masking", "downloads": 2758, "source": "Hugging Face", "score": -0.06388515489165444, "first_commit": "2020-04-28 21:34:13", "latest_commit": "2024-02-22 00:58:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "[Llama-3-EZO model card]", "url": "https://huggingface.co/AXCXEPT/Llama-3-EZO-8b-Common-it", "project_name": "Llama-3-EZO-8b-Common-it", "downloads": 2750, "source": "Hugging Face", "score": -0.06395724303542664, "first_commit": "2024-07-13 06:42:31", "latest_commit": "2024-08-23 10:52:05", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Model Card for Model ID 実験モデルです /", "url": "https://huggingface.co/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", "project_name": "Llama-3-70B-japanese-suzume-vector-v0.1", "downloads": 2726, "source": "Hugging Face", "score": -0.06417350746674325, "first_commit": "2024-04-28 04:11:49", "latest_commit": "2024-04-28 07:46:32", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3-ELYZA-JP-8B-GGUF Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B-GGUF", "project_name": "Llama-3-ELYZA-JP-8B-GGUF", "downloads": 2715, "source": "Hugging Face", "score": -0.06427262866443001, "first_commit": "2024-06-25 07:29:22", "latest_commit": "2024-06-26 02:56:52", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "AIBunCho/japanese-novel-gpt-j-6b AI BunChoさんが公開しているjapanese-novel-gpt-j-6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/aibuncho-japanese-novel-gpt-j-6b-gguf", "project_name": "aibuncho-japanese-novel-gpt-j-6b-gguf", "downloads": 2700, "source": "Hugging Face", "score": -0.06440779393400288, "first_commit": "2023-09-03 17:32:44", "latest_commit": "2023-09-11 01:10:36", "languages": [], "model_or_dataset": "model", "model_size": 6.05, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast-instruct", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct", "downloads": 2653, "source": "Hugging Face", "score": -0.06483131177866455, "first_commit": "2023-08-28 13:36:19", "latest_commit": "2023-08-29 03:47:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "BERT large Japanese (unidic-lite with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese", "project_name": "bert-large-japanese", "downloads": 2652, "source": "Hugging Face", "score": -0.06484032279663607, "first_commit": "2021-03-05 06:17:13", "latest_commit": "2021-09-23 15:45:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "Llama-3.1-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-8B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-8B-Instruct-gguf", "project_name": "Llama-3.1-8B-Instruct-gguf", "downloads": 2577, "source": "Hugging Face", "score": -0.06551614914450044, "first_commit": "2024-07-23 16:33:06", "latest_commit": "2024-07-24 21:04:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Pytorch implementation and pre-trained Japanese model for CANINE, the efficient character-level transformer.", "url": "https://github.com/octanove/shiba", "project_name": "shiba", "stargazers_count": 90, "source": "GitHub", "score": -0.0663869048148154, "first_commit": "2021-06-24 20:17:27", "latest_commit": "2023-11-03 10:01:53", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "おーぷん2ちゃんねるをクロールして作成した対話コーパス", "url": "https://github.com/1never/open2ch-dialogue-corpus", "project_name": "open2ch-dialogue-corpus", "stargazers_count": 90, "source": "GitHub", "score": -0.0663869048148154, "first_commit": "2019-09-13 11:21:53", "latest_commit": "2021-06-07 00:06:23", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "【告知】chilled_remix及びreversemixは2023年5月21日にVersion変更を行い、v2へ移行いたしました。", "url": "https://huggingface.co/sazyou-roukaku/chilled_remix", "project_name": "chilled_remix", "downloads": 2456, "source": "Hugging Face", "score": -0.06660648231905493, "first_commit": "2023-04-18 12:48:48", "latest_commit": "2023-06-09 23:08:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "LLaVA-JP Model Card Model detail Model type: LLaVA-JP is a vision-language model that can converse about input images.", "url": "https://huggingface.co/toshi456/llava-jp-1.3b-v1.0", "project_name": "llava-jp-1.3b-v1.0", "downloads": 2423, "source": "Hugging Face", "score": -0.06690384591211525, "first_commit": "2023-12-04 13:13:03", "latest_commit": "2023-12-18 10:21:11", "languages": [], "model_or_dataset": "model", "model_size": 1.73, "model_architectures": "LlavaGpt2ForCausalLM" }, { "description": "japanese-gpt-neox-3.6b-instruction-sft Overview This repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft", "project_name": "japanese-gpt-neox-3.6b-instruction-sft", "downloads": 2406, "source": "Hugging Face", "score": -0.06705703321763118, "first_commit": "2023-05-17 02:16:28", "latest_commit": "2024-07-20 07:56:34", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Kotoba-Whisper (v2.0)", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v2.0", "project_name": "kotoba-whisper-v2.0", "downloads": 2376, "source": "Hugging Face", "score": -0.06732736375677692, "first_commit": "2024-09-17 12:49:47", "latest_commit": "2024-09-20 01:56:01", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "GLuCoSE v2", "url": "https://huggingface.co/pkshatech/GLuCoSE-base-ja-v2", "project_name": "GLuCoSE-base-ja-v2", "downloads": 2354, "source": "Hugging Face", "score": -0.06752560615215046, "first_commit": "2024-08-22 03:16:48", "latest_commit": "2024-09-18 09:21:54", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "LukeModel" }, { "description": "wav2vec2-base-asr", "url": "https://huggingface.co/TKU410410103/wav2vec2-base-japanese-asr", "project_name": "wav2vec2-base-japanese-asr", "downloads": 2343, "source": "Hugging Face", "score": -0.06762472734983724, "first_commit": "2024-04-14 10:22:21", "latest_commit": "2024-04-14 14:00:30", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Japanese SimCSE (BERT-base)", "url": "https://huggingface.co/pkshatech/simcse-ja-bert-base-clcmlp", "project_name": "simcse-ja-bert-base-clcmlp", "downloads": 2311, "source": "Hugging Face", "score": -0.06791307992492603, "first_commit": "2022-12-26 02:52:03", "latest_commit": "2023-01-27 06:44:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "japanese-gpt2-small This repository provides a small-sized Japanese GPT-2 model.", "url": "https://huggingface.co/rinna/japanese-gpt2-small", "project_name": "japanese-gpt2-small", "downloads": 2303, "source": "Hugging Face", "score": -0.06798516806869823, "first_commit": "2021-06-15 06:32:27", "latest_commit": "2024-07-20 07:49:31", "languages": [], "model_or_dataset": "model", "model_size": 0.123, "model_architectures": "GPT2LMHeadModel" }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b", "project_name": "ELYZA-japanese-Llama-2-7b", "downloads": 2270, "source": "Hugging Face", "score": -0.06828253166175854, "first_commit": "2023-08-28 12:38:34", "latest_commit": "2023-08-29 03:45:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "hubert-base-asr", "url": "https://huggingface.co/TKU410410103/hubert-base-japanese-asr", "project_name": "hubert-base-japanese-asr", "downloads": 2231, "source": "Hugging Face", "score": -0.06863396136264802, "first_commit": "2024-04-09 06:01:43", "latest_commit": "2024-04-14 13:20:43", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "HubertForCTC" }, { "description": "ELYZA-japanese-Llama-2-7b-fast-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fast-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-gguf", "downloads": 2183, "source": "Hugging Face", "score": -0.0690664902252812, "first_commit": "2023-08-29 15:31:01", "latest_commit": "2023-11-16 14:27:48", "languages": [], "model_or_dataset": "model", "model_size": 6.85, "model_architectures": null }, { "description": "Japanese Stable LM Instruct Gamma 7B Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-gamma-7b", "project_name": "japanese-stablelm-instruct-gamma-7b", "downloads": 2146, "source": "Hugging Face", "score": -0.06939989789022763, "first_commit": "2023-10-16 08:55:06", "latest_commit": "2024-01-24 05:54:38", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-hf", "project_name": "Swallow-70b-instruct-hf", "downloads": 2127, "source": "Hugging Face", "score": -0.0695711072316866, "first_commit": "2023-12-11 07:23:47", "latest_commit": "2024-06-29 08:56:31", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-instruct", "project_name": "ELYZA-japanese-Llama-2-13b-instruct", "downloads": 2107, "source": "Hugging Face", "score": -0.06975132759111709, "first_commit": "2023-12-25 16:10:32", "latest_commit": "2023-12-27 01:41:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "gpt-neox-japanese-2.7b", "url": "https://huggingface.co/abeja/gpt-neox-japanese-2.7b", "project_name": "gpt-neox-japanese-2.7b", "downloads": 2062, "source": "Hugging Face", "score": -0.07015682339983571, "first_commit": "2022-08-29 02:15:44", "latest_commit": "2023-04-10 05:12:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXJapaneseForCausalLM" }, { "description": "Llama-3.1-8B-EZO-1.1-it-gguf HODACHIさんが公開しているLlama-3.1-8B-EZO-1.1-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-8B-EZO-1.1-it-gguf", "project_name": "Llama-3.1-8B-EZO-1.1-it-gguf", "downloads": 1974, "source": "Hugging Face", "score": -0.07094979298132989, "first_commit": "2024-07-31 11:06:36", "latest_commit": "2024-07-31 12:47:45", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-hf", "project_name": "Swallow-13b-hf", "downloads": 1935, "source": "Hugging Face", "score": -0.07130122268221936, "first_commit": "2023-11-16 15:40:49", "latest_commit": "2024-06-29 08:56:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "nlp-waseda/roberta-large-japanese-seq512 Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100 with the maximum sequence length of 512.", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512", "project_name": "roberta-large-japanese-seq512", "downloads": 1918, "source": "Hugging Face", "score": -0.07145440998773528, "first_commit": "2022-06-13 09:46:45", "latest_commit": "2022-10-21 14:49:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "BERTによる日本語固有表現抽出のモデル BertForTokenClassificationを用いて、日本語の文から固有表現を抽出します。 ", "url": "https://huggingface.co/jurabi/bert-ner-japanese", "project_name": "bert-ner-japanese", "downloads": 1904, "source": "Hugging Face", "score": -0.07158056423933663, "first_commit": "2022-09-26 07:46:38", "latest_commit": "2022-09-26 12:13:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-hf", "project_name": "Swallow-7b-NVE-hf", "downloads": 1893, "source": "Hugging Face", "score": -0.0716796854370234, "first_commit": "2023-11-30 09:02:26", "latest_commit": "2024-06-29 08:56:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "Llama3 Swallow", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-70B-Instruct-v0.1", "project_name": "Llama-3-Swallow-70B-Instruct-v0.1", "downloads": 1831, "source": "Hugging Face", "score": -0.07223836855125794, "first_commit": "2024-06-28 16:17:32", "latest_commit": "2024-07-19 08:08:59", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "A Python Module for JUMAN++/KNP", "url": "https://github.com/ku-nlp/pyknp", "project_name": "pyknp", "stargazers_count": 88, "source": "GitHub", "score": -0.07232820036610935, "first_commit": "2015-04-08 15:25:47", "latest_commit": "2024-07-06 15:16:48", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "STAIR captions: large-scale Japanese image caption dataset", "url": "https://github.com/STAIR-Lab-CIT/STAIR-captions", "project_name": "STAIR-captions", "stargazers_count": 88, "source": "GitHub", "score": -0.07232820036610935, "first_commit": "2017-02-21 16:49:14", "latest_commit": "2018-07-04 18:24:35", "languages": [], "model_or_dataset": "dataset" }, { "description": "NMeCab: About Japanese morphological analyzer on .NET", "url": "https://github.com/komutan/NMeCab", "project_name": "NMeCab", "stargazers_count": 88, "source": "GitHub", "score": -0.07232820036610935, "first_commit": "2014-04-24 17:34:29", "latest_commit": "2024-03-31 03:51:55", "languages": [ "C#" ], "model_or_dataset": "dataset" }, { "description": "This dataset contains a diverse set of natural Japanese speech, collected from terrestrial television streams.", "url": "https://huggingface.co/datasets/reazon-research/reazonspeech", "project_name": "reazonspeech", "downloads": 1772, "source": "Hugging Face", "score": -0.0727700186115779, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "bert-base-japanese-v3-jsts 「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jsts", "project_name": "bert-base-japanese-v3-jsts", "downloads": 1771, "source": "Hugging Face", "score": -0.07277902962954942, "first_commit": "2023-06-11 15:27:32", "latest_commit": "2023-07-29 11:27:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Model Card for Japanese character-level DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese-char-wwm", "project_name": "deberta-v2-base-japanese-char-wwm", "downloads": 1749, "source": "Hugging Face", "score": -0.07297727202492298, "first_commit": "2023-01-18 13:55:30", "latest_commit": "2023-03-26 03:32:27", "languages": [], "model_or_dataset": "model", "model_size": 0.122, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "ELYZA-japanese-Llama-2-7b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fastのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-fast-gguf", "downloads": 1650, "source": "Hugging Face", "score": -0.07386936280410393, "first_commit": "2023-08-29 07:23:20", "latest_commit": "2023-11-16 14:27:36", "languages": [], "model_or_dataset": "model", "model_size": 6.85, "model_architectures": null }, { "description": "OpenCALM-1B Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-1b", "project_name": "open-calm-1b", "downloads": 1636, "source": "Hugging Face", "score": -0.07399551705570527, "first_commit": "2023-05-15 07:00:18", "latest_commit": "2023-05-18 01:11:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "FINGU-AI/FinguAI-Chat-v1 Overview The FINGU-AI/FinguAI-Chat-v1 model offers a specialized curriculum tailored to English, Korean, and Japanese speakers interested in finance, investment, and legal frameworks.", "url": "https://huggingface.co/FINGU-AI/FinguAI-Chat-v1", "project_name": "FinguAI-Chat-v1", "downloads": 1624, "source": "Hugging Face", "score": -0.07410364927136356, "first_commit": "2024-03-21 07:08:05", "latest_commit": "2024-03-22 09:36:44", "languages": [], "model_or_dataset": "model", "model_size": 0.464, "model_architectures": "Qwen2ForCausalLM" }, { "description": "OpenCALM-Large Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-large", "project_name": "open-calm-large", "downloads": 1609, "source": "Hugging Face", "score": -0.07423881454093645, "first_commit": "2023-05-15 06:50:24", "latest_commit": "2023-05-18 01:11:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "日本語向け Llama 3 8B はじめに このリポジトリはLlama 3を日本語化しようとしたモデルのリポジトリです。", "url": "https://huggingface.co/alfredplpl/Llama-3-8B-Instruct-Ja", "project_name": "Llama-3-8B-Instruct-Ja", "downloads": 1606, "source": "Hugging Face", "score": -0.07426584759485101, "first_commit": "2024-04-22 05:14:33", "latest_commit": "2024-05-01 19:16:01", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Gemma-Mling: Multilingual Gemma Update @ 2024.04.15: First release of Gemma-Mling 7B model Original Gemma Model Page:", "url": "https://huggingface.co/beomi/gemma-mling-7b", "project_name": "gemma-mling-7b", "downloads": 1593, "source": "Hugging Face", "score": -0.07438299082848084, "first_commit": "2024-04-15 05:37:05", "latest_commit": "2024-04-18 14:28:20", "languages": [], "model_or_dataset": "model", "model_size": 8.54, "model_architectures": "GemmaForCausalLM" }, { "description": "Kotoba-Whisper-v2.1 Kotoba-Whisper-v2.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v2.0, with additional postprocessing stacks integrated as pipeline.", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v2.1", "project_name": "kotoba-whisper-v2.1", "downloads": 1516, "source": "Hugging Face", "score": -0.07507683921228825, "first_commit": "2024-09-17 14:19:45", "latest_commit": "2024-09-20 01:55:12", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Japanese Laws This dataset comprises 8.75K law records retrieved from the official Japanese government website e-Gov. ", "url": "https://huggingface.co/datasets/y2lan/japan-law", "project_name": "japan-law", "downloads": 1509, "source": "Hugging Face", "score": -0.07513991633808892, "first_commit": "2023-07-20 06:26:25", "latest_commit": "2023-07-20 06:45:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "alpacaデータセットを日本語化したものです", "url": "https://github.com/shi3z/alpaca_ja", "project_name": "alpaca_ja", "stargazers_count": 87, "source": "GitHub", "score": -0.07529884814175632, "first_commit": "2023-04-01 07:03:33", "latest_commit": "2023-05-17 15:43:50", "languages": [], "model_or_dataset": "dataset" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-hf", "project_name": "Swallow-13b-instruct-hf", "downloads": 1480, "source": "Hugging Face", "score": -0.07540123585926314, "first_commit": "2023-12-07 03:10:55", "latest_commit": "2024-06-29 08:56:29", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM" }, { "description": "Mistral-Nemo-Japanese-Instruct-2408 Model Description", "url": "https://huggingface.co/cyberagent/Mistral-Nemo-Japanese-Instruct-2408", "project_name": "Mistral-Nemo-Japanese-Instruct-2408", "downloads": 1476, "source": "Hugging Face", "score": -0.07543727993114924, "first_commit": "2024-08-30 03:57:43", "latest_commit": "2024-08-30 04:03:41", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": "MistralForCausalLM" }, { "description": "Llama-3-ELYZA-JP-8B-AWQ Model Description Llama-3-ELYZA-JP-8B is a large language model trained by ELYZA, Inc.", "url": "https://huggingface.co/elyza/Llama-3-ELYZA-JP-8B-AWQ", "project_name": "Llama-3-ELYZA-JP-8B-AWQ", "downloads": 1472, "source": "Hugging Face", "score": -0.07547332400303534, "first_commit": "2024-06-25 04:31:31", "latest_commit": "2024-06-26 02:56:39", "languages": [], "model_or_dataset": "model", "model_size": 1.98, "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-large-lm-3.6b-instruction-sft", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft", "project_name": "japanese-large-lm-3.6b-instruction-sft", "downloads": 1460, "source": "Hugging Face", "score": -0.07558145621869364, "first_commit": "2023-08-14 17:18:09", "latest_commit": "2023-08-24 10:08:28", "languages": [], "model_or_dataset": "model", "model_size": 3.68, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Tanuki-8x8B-dpo-v1.0 モデルについて Tanuki-8x8Bは、フルスクラッチで約1.7Tトークン事前学習を行った8x8Bパラメータ(総パラメータ約47B、アクティブパラメータ約13B)の大規模言語モデルです。", "url": "https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0", "project_name": "Tanuki-8x8B-dpo-v1.0", "downloads": 1457, "source": "Hugging Face", "score": -0.07560848927260821, "first_commit": "2024-08-12 12:47:11", "latest_commit": "2024-09-02 23:47:09", "languages": [], "model_or_dataset": "model", "model_size": 47.0, "model_architectures": "TanukiForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-7b Model Description ELYZA-japanese-Llama-2-7b は、 Llama2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast", "project_name": "ELYZA-japanese-Llama-2-7b-fast", "downloads": 1428, "source": "Hugging Face", "score": -0.07586980879378243, "first_commit": "2023-08-28 13:17:58", "latest_commit": "2023-08-29 03:46:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-large-lm-3.6b", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b", "project_name": "japanese-large-lm-3.6b", "downloads": 1423, "source": "Hugging Face", "score": -0.07591486388364005, "first_commit": "2023-07-21 00:48:05", "latest_commit": "2023-08-17 01:06:17", "languages": [], "model_or_dataset": "model", "model_size": 3.68, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast-instruct", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct", "downloads": 1412, "source": "Hugging Face", "score": -0.07601398508132683, "first_commit": "2023-12-25 18:14:10", "latest_commit": "2023-12-27 01:41:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "stockmark/stockmark-100b Stockmark-100b is a 100 billion parameter LLM pretrained from scratch based on Japanese and English corpus of about 910 billion tokens.", "url": "https://huggingface.co/stockmark/stockmark-100b", "project_name": "stockmark-100b", "downloads": 1412, "source": "Hugging Face", "score": -0.07601398508132683, "first_commit": "2024-05-13 09:31:40", "latest_commit": "2024-05-15 06:18:10", "languages": [], "model_or_dataset": "model", "model_size": 96.2, "model_architectures": "LlamaForCausalLM" }, { "description": "Tanuki-8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GGUF", "project_name": "Tanuki-8B-dpo-v1.0-GGUF", "downloads": 1390, "source": "Hugging Face", "score": -0.07621222747670038, "first_commit": "2024-08-14 15:05:50", "latest_commit": "2024-08-27 18:00:44", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": null }, { "description": "hotchpotch/japanese-reranker-cross-encoder-large-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-large-v1", "project_name": "japanese-reranker-cross-encoder-large-v1", "downloads": 1388, "source": "Hugging Face", "score": -0.07623024951264343, "first_commit": "2024-03-28 20:53:30", "latest_commit": "2024-04-01 02:39:45", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification" }, { "description": "Japanese-LLaMA-3-8B Japanese-LLaMA-3-8Bは基盤モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-3-8b", "project_name": "japanese-llama-3-8b", "downloads": 1354, "source": "Hugging Face", "score": -0.07653662412367526, "first_commit": "2024-06-05 02:19:05", "latest_commit": "2024-06-21 06:35:41", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "(简体中文|English|日本語) Introduction github repo : https://github.com/FunAudioLLM/SenseVoice SenseVoice is a speech foundation model with multiple speech understanding capabilities, including automatic speech recognition (ASR), spoken language identification (LID), speech emotion recognition (SER), and audio event detection (AED).", "url": "https://huggingface.co/FunAudioLLM/SenseVoiceSmall", "project_name": "SenseVoiceSmall", "downloads": 1321, "source": "Hugging Face", "score": -0.07683398771673558, "first_commit": "2024-07-03 03:56:49", "latest_commit": "2024-07-31 05:47:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Ninja-v1-NSFW-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFW-128kのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-128k-gguf", "project_name": "Ninja-v1-NSFW-128k-gguf", "downloads": 1289, "source": "Hugging Face", "score": -0.07712234029182438, "first_commit": "2024-05-01 17:45:52", "latest_commit": "2024-05-04 13:25:47", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "recruit-jp/japanese-clip-vit-b-32-roberta-base Overview Developed by: Recruit Co.", "url": "https://huggingface.co/recruit-jp/japanese-clip-vit-b-32-roberta-base", "project_name": "japanese-clip-vit-b-32-roberta-base", "downloads": 1276, "source": "Hugging Face", "score": -0.0772394835254542, "first_commit": "2023-12-20 06:06:12", "latest_commit": "2024-01-22 07:41:59", "languages": [], "model_or_dataset": "model", "model_size": 0.198, "model_architectures": "JapaneseCLIPModel" }, { "description": "Reflection-Llama-3.1-70B-gguf mattshumerさんが公開しているReflection-Llama-3.1-70Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Reflection-Llama-3.1-70B-gguf", "project_name": "Reflection-Llama-3.1-70B-gguf", "downloads": 1265, "source": "Hugging Face", "score": -0.07733860472314097, "first_commit": "2024-09-06 17:18:27", "latest_commit": "2024-09-07 04:00:27", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "Places in japan.", "url": "https://huggingface.co/datasets/JapanDegitalMaterial/Places_in_Japan", "project_name": "Places_in_Japan", "downloads": 1252, "source": "Hugging Face", "score": -0.07745574795677079, "first_commit": "2023-09-23 12:35:06", "latest_commit": "2023-09-23 14:00:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ELYZA-tasks-100: 日本語instructionモデル評価データセット Data Description 本データセットはinstruction-tuningを行ったモデルの評価用データセットです。", "url": "https://huggingface.co/datasets/elyza/ELYZA-tasks-100", "project_name": "ELYZA-tasks-100", "downloads": 1250, "source": "Hugging Face", "score": -0.07747376999271385, "first_commit": "2023-08-28 09:01:44", "latest_commit": "2023-12-27 18:17:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese-StableLM-Base-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-beta-70b is a 70B-parameter decoder-only language model based on Llama-2-70b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-70b", "project_name": "japanese-stablelm-base-beta-70b", "downloads": 1237, "source": "Hugging Face", "score": -0.07759091322634366, "first_commit": "2023-10-30 07:46:28", "latest_commit": "2023-12-19 06:44:53", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Instruct-Beta-70B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-70b is a 70B-parameter decoder-only language model based on japanese-stablelm-base-beta-70b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-70b", "project_name": "japanese-stablelm-instruct-beta-70b", "downloads": 1232, "source": "Hugging Face", "score": -0.07763596831620129, "first_commit": "2023-10-30 07:47:31", "latest_commit": "2023-12-19 06:45:10", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-hf", "project_name": "Swallow-70b-hf", "downloads": 1219, "source": "Hugging Face", "score": -0.07775311154983111, "first_commit": "2023-11-25 02:13:04", "latest_commit": "2024-06-29 08:56:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "bert-base-japanese-v3-marc_ja 「大規模言語モデル入門」の第5章で紹介している(感情分析)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-marc_ja", "project_name": "bert-base-japanese-v3-marc_ja", "downloads": 1215, "source": "Hugging Face", "score": -0.07778915562171722, "first_commit": "2023-06-01 14:29:06", "latest_commit": "2023-07-24 06:49:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "roberta-small-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-luw-upos", "project_name": "roberta-small-japanese-luw-upos", "downloads": 1201, "source": "Hugging Face", "score": -0.07791530987331856, "first_commit": "2021-11-03 05:51:58", "latest_commit": "2022-09-18 19:45:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "old? ", "url": "https://huggingface.co/Lasorco/lametta_old", "project_name": "lametta_old", "downloads": 1183, "source": "Hugging Face", "score": -0.078077508196806, "first_commit": "2023-05-21 11:16:50", "latest_commit": "2024-07-23 07:24:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Llama-3.1-70B-Japanese-Instruct-2407 Model Description This is a Japanese continually pre-trained model based on meta-llama/Meta-Llama-3.1-70B-Instruct.", "url": "https://huggingface.co/cyberagent/Llama-3.1-70B-Japanese-Instruct-2407", "project_name": "Llama-3.1-70B-Japanese-Instruct-2407", "downloads": 1181, "source": "Hugging Face", "score": -0.07809553023274905, "first_commit": "2024-07-26 01:30:21", "latest_commit": "2024-07-26 02:30:17", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "hubert-large-asr", "url": "https://huggingface.co/TKU410410103/hubert-large-japanese-asr", "project_name": "hubert-large-japanese-asr", "downloads": 1166, "source": "Hugging Face", "score": -0.07823069550232194, "first_commit": "2024-04-09 03:01:08", "latest_commit": "2024-04-14 13:21:01", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "HubertForCTC" }, { "description": "GUIで動作する文書校正ツール GUI tool for textlinting.", "url": "https://github.com/gecko655/proofreading-tool", "project_name": "proofreading-tool", "stargazers_count": 86, "source": "GitHub", "score": -0.07826949591740329, "first_commit": "2021-04-08 12:10:36", "latest_commit": "2024-06-22 20:18:09", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "hotchpotch/japanese-reranker-cross-encoder-base-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-base-v1", "project_name": "japanese-reranker-cross-encoder-base-v1", "downloads": 1161, "source": "Hugging Face", "score": -0.07827575059217956, "first_commit": "2024-03-29 07:07:38", "latest_commit": "2024-04-01 02:39:31", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "uniTKU-hubert-japanese-asr", "url": "https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr", "project_name": "uniTKU-hubert-japanese-asr", "downloads": 1160, "source": "Hugging Face", "score": -0.07828476161015108, "first_commit": "2024-04-20 14:59:52", "latest_commit": "2024-04-22 18:37:33", "languages": [], "model_or_dataset": "model", "model_size": 0.0945, "model_architectures": "HubertForCTC" }, { "description": "Vecteus-v1-gguf Local-Novel-LLM-projectさんが公開しているVecteus-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Vecteus-v1-gguf", "project_name": "Vecteus-v1-gguf", "downloads": 1160, "source": "Hugging Face", "score": -0.07828476161015108, "first_commit": "2024-05-01 17:49:42", "latest_commit": "2024-05-01 18:37:01", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "calm3-22b-RP-GGUF 概要 Aratako/calm3-22b-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-GGUF", "project_name": "calm3-22b-RP-GGUF", "downloads": 1104, "source": "Hugging Face", "score": -0.07878937861655647, "first_commit": "2024-08-21 01:13:32", "latest_commit": "2024-08-21 13:26:35", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": null }, { "description": "alabnii/jmedroberta-base-sentencepiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece", "project_name": "jmedroberta-base-sentencepiece", "downloads": 1071, "source": "Hugging Face", "score": -0.07908674220961678, "first_commit": "2022-12-22 17:20:33", "latest_commit": "2023-03-21 23:57:37", "languages": [], "model_or_dataset": "model", "model_size": 0.109, "model_architectures": "BertForMaskedLM" }, { "description": "ELYZA-japanese-Llama-2-13b-fast Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast", "project_name": "ELYZA-japanese-Llama-2-13b-fast", "downloads": 1066, "source": "Hugging Face", "score": -0.07913179729947441, "first_commit": "2023-12-25 17:14:45", "latest_commit": "2023-12-27 01:41:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-13b Model Description ELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b", "project_name": "ELYZA-japanese-Llama-2-13b", "downloads": 1062, "source": "Hugging Face", "score": -0.07916784137136051, "first_commit": "2023-12-25 16:38:08", "latest_commit": "2023-12-27 01:40:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "nlp-waseda/roberta-base-japanese Model description This is a Japanese RoBERTa base model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese", "project_name": "roberta-base-japanese", "downloads": 1050, "source": "Hugging Face", "score": -0.07927597358701881, "first_commit": "2021-12-20 05:12:06", "latest_commit": "2022-10-21 14:46:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-plus-hf", "project_name": "Swallow-7b-plus-hf", "downloads": 1040, "source": "Hugging Face", "score": -0.07936608376673406, "first_commit": "2024-02-29 11:28:52", "latest_commit": "2024-06-29 08:56:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-debug\", split=\"test\") 概要 大喜利生成の動作確認用データセットです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-debug", "project_name": "ogiri-debug", "downloads": 1040, "source": "Hugging Face", "score": -0.07936608376673406, "first_commit": "2024-08-30 04:18:35", "latest_commit": "2024-08-30 14:52:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", "url": "https://huggingface.co/fishaudio/fish-speech-1.2-sft", "project_name": "fish-speech-1.2-sft", "downloads": 1035, "source": "Hugging Face", "score": -0.07941113885659168, "first_commit": "2024-07-18 08:00:29", "latest_commit": "2024-08-02 08:13:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base", "project_name": "Orion-14B-Base", "downloads": 1029, "source": "Hugging Face", "score": -0.07946520496442083, "first_commit": "2024-01-16 06:07:42", "latest_commit": "2024-03-26 09:21:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM" }, { "description": "PLaMo-13B Model Description PLaMo-13B is a LLaMA-based 13B model pre-trained on English and Japanese open datasets, developed by Preferred Networks, Inc. ", "url": "https://huggingface.co/pfnet/plamo-13b", "project_name": "plamo-13b", "downloads": 1025, "source": "Hugging Face", "score": -0.07950124903630693, "first_commit": "2023-09-25 12:47:05", "latest_commit": "2023-10-10 15:24:54", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM" }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-debug\", split=\"test\") 概要 大喜利生成の動作確認用データセットです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-debug", "project_name": "senryu-debug", "downloads": 1019, "source": "Hugging Face", "score": -0.07955531514413608, "first_commit": "2024-08-30 05:47:58", "latest_commit": "2024-09-04 10:49:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-7b-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-instruct-gguf", "downloads": 1005, "source": "Hugging Face", "score": -0.07968146939573742, "first_commit": "2023-08-29 05:33:45", "latest_commit": "2023-11-16 14:27:23", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "ODEX is an Open-Domain EXecution-based NL-to-Code generation data benchmark.", "url": "https://huggingface.co/datasets/neulab/odex", "project_name": "odex", "downloads": 1003, "source": "Hugging Face", "score": -0.07969949143168047, "first_commit": "2023-01-06 14:30:00", "latest_commit": "2023-02-10 18:01:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "DataPilot-ArrowPro-7B-KUJIRA-gguf DataPilotさんが公開しているArrowPro-7B-KUJIRAのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-KUJIRA-gguf", "project_name": "DataPilot-ArrowPro-7B-KUJIRA-gguf", "downloads": 999, "source": "Hugging Face", "score": -0.07973553550356657, "first_commit": "2024-05-09 13:21:27", "latest_commit": "2024-05-11 07:24:16", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Japanese-StableLM-Base-Alpha-7B \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-base-alpha-7b is a 7B-parameter decoder-only language model pre-trained on a diverse collection of Japanese and English datasets which focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-alpha-7b", "project_name": "japanese-stablelm-base-alpha-7b", "downloads": 992, "source": "Hugging Face", "score": -0.07979861262936724, "first_commit": "2023-08-09 14:30:09", "latest_commit": "2023-08-22 09:36:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "JapaneseStableLMAlphaForCausalLM" }, { "description": "Llama-3-Swallow-70B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-70B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-Swallow-70B-Instruct-v0.1-gguf", "project_name": "Llama-3-Swallow-70B-Instruct-v0.1-gguf", "downloads": 986, "source": "Hugging Face", "score": -0.0798526787371964, "first_commit": "2024-07-01 14:21:29", "latest_commit": "2024-07-07 05:04:16", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "このモデルは何? ", "url": "https://huggingface.co/Lasorco/lametta", "project_name": "lametta", "downloads": 952, "source": "Hugging Face", "score": -0.08015905334822823, "first_commit": "2023-03-28 14:29:55", "latest_commit": "2023-11-08 07:37:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Tanuki-8x8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GGUF", "project_name": "Tanuki-8x8B-dpo-v1.0-GGUF", "downloads": 943, "source": "Hugging Face", "score": -0.08024015250997196, "first_commit": "2024-08-14 18:48:45", "latest_commit": "2024-08-29 17:42:37", "languages": [], "model_or_dataset": "model", "model_size": 47.0, "model_architectures": null }, { "description": "Please feel free to open an issue or pull request. ", "url": "https://huggingface.co/datasets/kumapo/JAQKET", "project_name": "JAQKET", "downloads": 933, "source": "Hugging Face", "score": -0.08033026268968721, "first_commit": "2023-06-21 13:04:38", "latest_commit": "2023-10-09 06:44:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Tanuki-8B-dpo-v1.0-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-AWQ", "project_name": "Tanuki-8B-dpo-v1.0-AWQ", "downloads": 929, "source": "Hugging Face", "score": -0.0803663067615733, "first_commit": "2024-08-27 04:50:35", "latest_commit": "2024-09-03 09:29:23", "languages": [], "model_or_dataset": "model", "model_size": 1.47, "model_architectures": "LlamaForCausalLM" }, { "description": "In this study, we introduce a new dataset, WRIME, for emotional intensity estimation.", "url": "https://huggingface.co/datasets/shunk031/wrime", "project_name": "wrime", "downloads": 924, "source": "Hugging Face", "score": -0.08041136185143093, "first_commit": "2023-01-12 10:43:54", "latest_commit": "2023-01-15 12:39:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "albert-base-japanese-v1 日本語事前学習済みALBERTモデルです", "url": "https://huggingface.co/ken11/albert-base-japanese-v1", "project_name": "albert-base-japanese-v1", "downloads": 913, "source": "Hugging Face", "score": -0.0805104830491177, "first_commit": "2021-12-19 17:07:14", "latest_commit": "2021-12-22 03:04:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "AlbertForMaskedLM" }, { "description": "Llama3 Swallow", "url": "https://huggingface.co/tokyotech-llm/Llama-3-Swallow-70B-v0.1", "project_name": "Llama-3-Swallow-70B-v0.1", "downloads": 898, "source": "Hugging Face", "score": -0.08064564831869057, "first_commit": "2024-06-14 05:56:33", "latest_commit": "2024-07-01 06:24:32", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/nekomata-7b Overview We conduct continual pre-training of qwen-7b on 30B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/nekomata-7b", "project_name": "nekomata-7b", "downloads": 893, "source": "Hugging Face", "score": -0.0806907034085482, "first_commit": "2023-12-19 06:58:44", "latest_commit": "2024-07-20 08:35:21", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": "QWenLMHeadModel" }, { "description": "t5-base-japanese-web (with Byte-fallback, 32K) Description megagonlabs/t5-base-japanese-web is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", "url": "https://huggingface.co/megagonlabs/t5-base-japanese-web", "project_name": "t5-base-japanese-web", "downloads": 887, "source": "Hugging Face", "score": -0.08074476951637735, "first_commit": "2021-08-24 04:41:45", "latest_commit": "2021-09-06 19:32:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Model Description llava-calm2-siglip is an experimental Vision Language Model that can answer questions in Japanese about images.", "url": "https://huggingface.co/cyberagent/llava-calm2-siglip", "project_name": "llava-calm2-siglip", "downloads": 854, "source": "Hugging Face", "score": -0.08104213310943767, "first_commit": "2024-06-12 19:35:20", "latest_commit": "2024-06-12 19:40:39", "languages": [], "model_or_dataset": "model", "model_size": 7.46, "model_architectures": "LlavaForConditionalGeneration" }, { "description": "japanese-gpt-neox-3.6b-instruction-sft-v2 Overview", "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", "project_name": "japanese-gpt-neox-3.6b-instruction-sft-v2", "downloads": 854, "source": "Hugging Face", "score": -0.08104213310943767, "first_commit": "2023-05-30 01:50:25", "latest_commit": "2024-07-20 07:57:35", "languages": [], "model_or_dataset": "model", "model_size": 3.76, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "This is the filtered Japanese subset of XL-Sum followed by PaLM 2 filters 15-gram overlap * code: https://gist.github.com/mkshing/d6371cbfdd50d4f352cee247fd4dd86a number of examples train: 4215 (before: 7113) validation: 758 (before: 889) test: 766 (before: 889)", "url": "https://huggingface.co/datasets/mkshing/xlsum_ja", "project_name": "xlsum_ja", "downloads": 848, "source": "Hugging Face", "score": -0.08109619921726682, "first_commit": "2023-06-16 04:15:41", "latest_commit": "2023-06-20 23:28:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese-LLaMA-3-8B-Instruct-v2 Japanese-LLaMA-3-8B-Instruct-v2は指示実行モデル、フルモデルです。 ", "url": "https://huggingface.co/owner203/japanese-llama-3-8b-instruct-v2", "project_name": "japanese-llama-3-8b-instruct-v2", "downloads": 833, "source": "Hugging Face", "score": -0.0812313644868397, "first_commit": "2024-06-10 10:10:19", "latest_commit": "2024-06-21 06:35:31", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Corpus of Annual Reports in Japan", "url": "https://github.com/chakki-works/CoARiJ", "project_name": "CoARiJ", "stargazers_count": 85, "source": "GitHub", "score": -0.08124014369305026, "first_commit": "2019-09-02 14:12:48", "latest_commit": "2020-12-19 14:00:34", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Tanuki-8x8B-dpo-v1.0-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-AWQ", "project_name": "Tanuki-8x8B-dpo-v1.0-AWQ", "downloads": 832, "source": "Hugging Face", "score": -0.08124037550481121, "first_commit": "2024-08-27 09:31:22", "latest_commit": "2024-09-03 09:26:20", "languages": [], "model_or_dataset": "model", "model_size": 6.75, "model_architectures": "TanukiForCausalLM" }, { "description": "Llama-3-ELYZA-JP-8B-gguf elyzaさんが公開しているLlama-3-ELYZA-JP-8Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-ELYZA-JP-8B-gguf", "project_name": "Llama-3-ELYZA-JP-8B-gguf", "downloads": 829, "source": "Hugging Face", "score": -0.08126740855872579, "first_commit": "2024-06-26 16:36:04", "latest_commit": "2024-06-26 17:55:35", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Jpのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf", "project_name": "HODACHI-Borea-Phi-3.5-mini-Instruct-Jp-gguf", "downloads": 827, "source": "Hugging Face", "score": -0.08128543059466883, "first_commit": "2024-08-21 09:58:41", "latest_commit": "2024-08-21 11:08:38", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null }, { "description": "llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "project_name": "llm-jp-13b-instruct-lora-jaster-dolly-oasst-v1.0", "downloads": 823, "source": "Hugging Face", "score": -0.08132147466655494, "first_commit": "2023-10-18 19:01:48", "latest_commit": "2023-10-20 08:41:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/sociocom/MedNER-CR-JA", "project_name": "MedNER-CR-JA", "downloads": 811, "source": "Hugging Face", "score": -0.08142960688221323, "first_commit": "2022-08-23 03:30:43", "latest_commit": "2024-07-31 07:44:00", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "BertForTokenClassification" }, { "description": "Shisa 7B Shisa 7B (shisa-7b-v1)", "url": "https://huggingface.co/augmxnt/shisa-7b-v1", "project_name": "shisa-7b-v1", "downloads": 790, "source": "Hugging Face", "score": -0.08161883825961526, "first_commit": "2023-11-27 17:55:31", "latest_commit": "2023-12-20 18:11:13", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": "MistralForCausalLM" }, { "description": "bert-base-japanese-v3-unsup-simcse-jawiki 「大規模言語モデル入門」の第8章で紹介している教師なしSimCSEのモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-unsup-simcse-jawiki", "project_name": "bert-base-japanese-v3-unsup-simcse-jawiki", "downloads": 786, "source": "Hugging Face", "score": -0.08165488233150135, "first_commit": "2023-06-21 10:52:27", "latest_commit": "2023-07-24 07:07:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "OpenCALM-Medium Model Description OpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", "url": "https://huggingface.co/cyberagent/open-calm-medium", "project_name": "open-calm-medium", "downloads": 783, "source": "Hugging Face", "score": -0.08168191538541593, "first_commit": "2023-05-15 06:44:47", "latest_commit": "2023-05-18 01:10:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Phi-3-mini-128k-instruct-gguf microsoftさんが公開しているPhi-3-mini-128k-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Phi-3-mini-128k-instruct-gguf", "project_name": "Phi-3-mini-128k-instruct-gguf", "downloads": 782, "source": "Hugging Face", "score": -0.08169092640338746, "first_commit": "2024-04-24 13:50:51", "latest_commit": "2024-04-24 14:24:09", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fast-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", "downloads": 761, "source": "Hugging Face", "score": -0.08188015778078947, "first_commit": "2023-12-27 09:46:04", "latest_commit": "2023-12-27 11:39:18", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null }, { "description": "PLaMo-13B-Instruct Model Description PLaMo-13B-Instruct is an instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", "url": "https://huggingface.co/pfnet/plamo-13b-instruct", "project_name": "plamo-13b-instruct", "downloads": 744, "source": "Hugging Face", "score": -0.0820333450863054, "first_commit": "2023-10-26 02:11:24", "latest_commit": "2024-01-25 07:46:09", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM" }, { "description": "shisa-base-7b-v1 shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", "url": "https://huggingface.co/augmxnt/shisa-base-7b-v1", "project_name": "shisa-base-7b-v1", "downloads": 740, "source": "Hugging Face", "score": -0.0820693891581915, "first_commit": "2023-11-19 09:44:36", "latest_commit": "2023-12-09 10:34:29", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": "MistralForCausalLM" }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece", "project_name": "jmedroberta-base-manbyo-wordpiece", "downloads": 738, "source": "Hugging Face", "score": -0.08208741119413454, "first_commit": "2022-12-22 17:17:03", "latest_commit": "2023-03-08 01:44:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "DeBERTa V2 base Japanese This is a DeBERTaV2 model pretrained on Japanese texts.", "url": "https://huggingface.co/izumi-lab/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 738, "source": "Hugging Face", "score": -0.08208741119413454, "first_commit": "2023-10-21 13:24:11", "latest_commit": "2024-07-19 03:07:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "gpt2-large-japanese This repository provides a large sized Japanese GPT-2 model.", "url": "https://huggingface.co/abeja/gpt2-large-japanese", "project_name": "gpt2-large-japanese", "downloads": 719, "source": "Hugging Face", "score": -0.08225862053559352, "first_commit": "2022-08-29 05:17:36", "latest_commit": "2022-08-29 16:10:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "Llama-3.1-70B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3.1-70B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-Instruct-gguf", "project_name": "Llama-3.1-70B-Instruct-gguf", "downloads": 708, "source": "Hugging Face", "score": -0.0823577417332803, "first_commit": "2024-07-23 17:25:23", "latest_commit": "2024-07-24 21:04:27", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "upskyy/gte-korean-base This model is korsts and kornli finetuning model from Alibaba-NLP/gte-multilingual-base.", "url": "https://huggingface.co/upskyy/gte-base-korean", "project_name": "gte-base-korean", "downloads": 708, "source": "Hugging Face", "score": -0.0823577417332803, "first_commit": "2024-08-08 14:34:44", "latest_commit": "2024-08-08 15:29:27", "languages": [], "model_or_dataset": "model", "model_size": 0.305, "model_architectures": "NewModel" }, { "description": "weblab-10b-instruction-sft-GPTQ Original model weblab-10b-instruction-sft which is a Japanese-centric multilingual GPT-NeoX model of 10 billion parameters created by matsuo-lab Takeshi Kojima.", "url": "https://huggingface.co/dahara1/weblab-10b-instruction-sft-GPTQ", "project_name": "weblab-10b-instruction-sft-GPTQ", "downloads": 704, "source": "Hugging Face", "score": -0.08239378580516639, "first_commit": "2023-08-21 05:45:35", "latest_commit": "2023-11-14 00:24:22", "languages": [], "model_or_dataset": "model", "model_size": 1.86, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/llm-jp-eval", "project_name": "llm-jp-eval", "downloads": 700, "source": "Hugging Face", "score": -0.08242982987705248, "first_commit": "2024-06-19 10:31:57", "latest_commit": "2024-08-31 12:40:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Model Card for Japanese character-level GPT-2 Small Model description This is a Japanese character-level GPT-2 Small (90M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/gpt2-small-japanese-char", "project_name": "gpt2-small-japanese-char", "downloads": 694, "source": "Hugging Face", "score": -0.08248389598488164, "first_commit": "2023-04-18 08:24:55", "latest_commit": "2023-05-08 10:08:13", "languages": [], "model_or_dataset": "model", "model_size": 0.10300000000000001, "model_architectures": "GPT2LMHeadModel" }, { "description": "Ninja-v1-NSFW-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-NSFWのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-gguf", "project_name": "Ninja-v1-NSFW-gguf", "downloads": 681, "source": "Hugging Face", "score": -0.08260103921851146, "first_commit": "2024-05-03 14:03:23", "latest_commit": "2024-05-04 13:26:52", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF Original Model elyza/ELYZA-japanese-Llama-2-13b-fast-instruct Run with LlamaEdge LlamaEdge version: v0.2.8 and above Prompt template Prompt type: llama-2-chat Prompt string <s>[INST] <<SYS>> {{ system_prompt }} <</SYS>> {{ user_msg_1 }}", "url": "https://huggingface.co/second-state/ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "downloads": 680, "source": "Hugging Face", "score": -0.08261005023648299, "first_commit": "2024-01-06 03:33:53", "latest_commit": "2024-03-20 07:21:25", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM" }, { "description": "Model Card for Tanrei/GPTSAN-japanese General-purpose Swich transformer based Japanese language model GPTSAN has some unique features.", "url": "https://huggingface.co/Tanrei/GPTSAN-japanese", "project_name": "GPTSAN-japanese", "downloads": 680, "source": "Hugging Face", "score": -0.08261005023648299, "first_commit": "2023-01-06 05:41:12", "latest_commit": "2023-04-21 19:04:49", "languages": [], "model_or_dataset": "model", "model_size": 2.78, "model_architectures": "GPTSanJapaneseForConditionalGeneration" }, { "description": "bilingual-gpt-neox-4b-instruction-ppo Overview This repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-instruction-ppo", "project_name": "bilingual-gpt-neox-4b-instruction-ppo", "downloads": 677, "source": "Hugging Face", "score": -0.08263708329039755, "first_commit": "2023-08-02 05:56:07", "latest_commit": "2024-07-20 08:05:14", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-instruct-hf", "project_name": "Swallow-7b-NVE-instruct-hf", "downloads": 669, "source": "Hugging Face", "score": -0.08270917143416975, "first_commit": "2023-12-07 02:08:59", "latest_commit": "2024-07-06 15:18:11", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "Polyglot-math-4x7b-24b Polyglot-4x7b is a Mixture of Experts approach to a multilingual model.", "url": "https://huggingface.co/macadeliccc/polyglot-math-4x7b", "project_name": "polyglot-math-4x7b", "downloads": 664, "source": "Hugging Face", "score": -0.08275422652402738, "first_commit": "2024-01-13 03:05:44", "latest_commit": "2024-03-04 19:25:12", "languages": [], "model_or_dataset": "model", "model_size": 24.2, "model_architectures": "MixtralForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-NVE-instruct-hf", "project_name": "Swallow-70b-NVE-instruct-hf", "downloads": 659, "source": "Hugging Face", "score": -0.082799281613885, "first_commit": "2023-12-13 03:56:30", "latest_commit": "2024-07-06 15:18:24", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-large-lm-1.7b-instruction-sft This repository provides a 1.7B parameters Japanese language model, fine-tuned and trained by LINE Corporation.", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft", "project_name": "japanese-large-lm-1.7b-instruction-sft", "downloads": 646, "source": "Hugging Face", "score": -0.08291642484751482, "first_commit": "2023-08-14 17:19:11", "latest_commit": "2023-08-14 17:19:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-medium", "project_name": "t5-large-medium", "downloads": 634, "source": "Hugging Face", "score": -0.08302455706317312, "first_commit": "2023-04-26 08:31:45", "latest_commit": "2023-05-10 10:00:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "gemma-2-2b-it-gguf googleさんが公開しているgemma-2-2b-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/gemma-2-2b-it-gguf", "project_name": "gemma-2-2b-it-gguf", "downloads": 629, "source": "Hugging Face", "score": -0.08306961215303074, "first_commit": "2024-08-01 17:22:58", "latest_commit": "2024-08-01 18:29:08", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null }, { "description": "Llama-3.1-70B-EZO-1.1-it-gguf HODACHIさんが公開しているLlama-3.1-70B-EZO-1.1-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3.1-70B-EZO-1.1-it-gguf", "project_name": "Llama-3.1-70B-EZO-1.1-it-gguf", "downloads": 622, "source": "Hugging Face", "score": -0.08313268927883143, "first_commit": "2024-07-31 12:12:13", "latest_commit": "2024-07-31 21:47:25", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-fast-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese, and thier original post-training and speed up tuning.", "url": "https://huggingface.co/dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ", "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ", "downloads": 621, "source": "Hugging Face", "score": -0.08314170029680294, "first_commit": "2023-08-30 09:18:50", "latest_commit": "2023-11-14 00:10:58", "languages": [], "model_or_dataset": "model", "model_size": 1.24, "model_architectures": "LlamaForCausalLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-NVE-hf", "project_name": "Swallow-70b-NVE-hf", "downloads": 618, "source": "Hugging Face", "score": -0.08316873335071752, "first_commit": "2023-12-07 07:34:35", "latest_commit": "2024-06-29 08:56:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "hotchpotch/japanese-bge-reranker-v2-m3-v1 日本語で学習させた Reranker (CrossEncoder) シリーズです。 ", "url": "https://huggingface.co/hotchpotch/japanese-bge-reranker-v2-m3-v1", "project_name": "japanese-bge-reranker-v2-m3-v1", "downloads": 617, "source": "Hugging Face", "score": -0.08317774436868905, "first_commit": "2024-03-28 20:45:16", "latest_commit": "2024-04-01 02:40:22", "languages": [], "model_or_dataset": "model", "model_size": 0.5680000000000001, "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "rinna/nekomata-14b Overview We conduct continual pre-training of qwen-14b on 66B tokens from a mixture of Japanese and English datasets.", "url": "https://huggingface.co/rinna/nekomata-14b", "project_name": "nekomata-14b", "downloads": 613, "source": "Hugging Face", "score": -0.08321378844057514, "first_commit": "2023-12-19 08:09:53", "latest_commit": "2024-07-22 07:58:40", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": "QWenLMHeadModel" }, { "description": "japanese-large-lm-1.7b This repository provides a 1.7B parameters Japanese language model, trained by LINE Corporation.", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b", "project_name": "japanese-large-lm-1.7b", "downloads": 611, "source": "Hugging Face", "score": -0.08323181047651819, "first_commit": "2023-07-21 00:46:33", "latest_commit": "2023-08-17 01:06:37", "languages": [], "model_or_dataset": "model", "model_size": 1.75, "model_architectures": "GPT2LMHeadModel" }, { "description": "llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "project_name": "llm-jp-13b-instruct-full-dolly_en-dolly_ja-ichikara_003_001-oasst_en-oasst_ja-v1.1", "downloads": 606, "source": "Hugging Face", "score": -0.08327686556637581, "first_commit": "2024-01-29 12:52:31", "latest_commit": "2024-02-07 19:49:25", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": "GPT2LMHeadModel" }, { "description": "Japanese-StableLM-Instruct-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-beta-7b is a 7B-parameter decoder-only language model based on", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-7b", "project_name": "japanese-stablelm-instruct-beta-7b", "downloads": 605, "source": "Hugging Face", "score": -0.08328587658434734, "first_commit": "2023-10-30 07:47:09", "latest_commit": "2023-12-19 06:43:49", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "KARAKURI LM KARAKURI LM is a pretrained language model that builds upon Llama 2.", "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-v0.1", "project_name": "karakuri-lm-70b-v0.1", "downloads": 602, "source": "Hugging Face", "score": -0.08331290963826192, "first_commit": "2024-01-26 10:49:53", "latest_commit": "2024-05-07 09:00:06", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM" }, { "description": "RetrievaBERT Model The RetrievaBERT is the pre-trained Transformer Encoder using Megatron-LM.", "url": "https://huggingface.co/retrieva-jp/bert-1.3b", "project_name": "bert-1.3b", "downloads": 600, "source": "Hugging Face", "score": -0.08333093167420497, "first_commit": "2024-06-25 06:18:24", "latest_commit": "2024-07-09 05:36:08", "languages": [], "model_or_dataset": "model", "model_size": 1.3, "model_architectures": "RetrievaBertForMaskedLM" }, { "description": "QuantFactory/Mistral-Nemo-Japanese-Instruct-2408-GGUF This is quantized version of cyberagent/Mistral-Nemo-Japanese-Instruct-2408 created using llama.cpp Original Model Card Mistral-Nemo-Japanese-Instruct-2408 Model Description", "url": "https://huggingface.co/QuantFactory/Mistral-Nemo-Japanese-Instruct-2408-GGUF", "project_name": "Mistral-Nemo-Japanese-Instruct-2408-GGUF", "downloads": 597, "source": "Hugging Face", "score": -0.08335796472811954, "first_commit": "2024-09-09 15:17:36", "latest_commit": "2024-09-09 16:27:10", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null }, { "description": "nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp", "project_name": "roberta-large-japanese-seq512-with-auto-jumanpp", "downloads": 595, "source": "Hugging Face", "score": -0.08337598676406259, "first_commit": "2022-10-15 06:04:06", "latest_commit": "2022-10-21 15:56:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "Japanese-StableLM-Base-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-base-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-ja_vocab-beta-7b", "project_name": "japanese-stablelm-base-ja_vocab-beta-7b", "downloads": 593, "source": "Hugging Face", "score": -0.08339400880000564, "first_commit": "2023-10-30 07:49:15", "latest_commit": "2023-12-19 06:45:58", "languages": [], "model_or_dataset": "model", "model_size": 6.88, "model_architectures": "LlamaForCausalLM" }, { "description": "BERT Base Japanese for Irony", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-irony", "project_name": "bert-base-japanese-sentiment-irony", "downloads": 589, "source": "Hugging Face", "score": -0.08343005287189174, "first_commit": "2022-11-07 06:29:21", "latest_commit": "2022-11-08 04:23:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Stockmark-13b-instruct Stockmark-13b-instruct is an instruction-tuned version of Stockmark-13b, a 13 billion parameter Japanese LLM.", "url": "https://huggingface.co/stockmark/stockmark-13b-instruct", "project_name": "stockmark-13b-instruct", "downloads": 589, "source": "Hugging Face", "score": -0.08343005287189174, "first_commit": "2023-11-08 16:56:34", "latest_commit": "2023-11-08 17:02:17", "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Instruct-JAVocab-Beta-7B A cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XL Model Description japanese-stablelm-instruct-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on japanese-stablelm-ja_vocab-beta-7b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-ja_vocab-beta-7b", "project_name": "japanese-stablelm-instruct-ja_vocab-beta-7b", "downloads": 576, "source": "Hugging Face", "score": -0.08354719610552157, "first_commit": "2023-10-30 07:49:38", "latest_commit": "2023-12-19 06:46:01", "languages": [], "model_or_dataset": "model", "model_size": 6.88, "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-StableLM-Instruct-Alpha-7B-v2 \"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XL Model Description japanese-stablelm-instruct-alpha-7b-v2 is a 7B parameter decoder-only language models pre-trained built on top of the Japanese-StableLM-Base-Alpha-7B model and further fine-tuned on various instruction-following datasets.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b-v2", "project_name": "japanese-stablelm-instruct-alpha-7b-v2", "downloads": 575, "source": "Hugging Face", "score": -0.08355620712349308, "first_commit": "2023-10-06 08:40:24", "latest_commit": "2023-10-06 08:40:24", "languages": [], "model_or_dataset": "model", "model_size": 7.01, "model_architectures": "JapaneseStableLMAlphaForCausalLM" }, { "description": "stockmark/gpt-neox-japanese-1.4b This repository provides a GPT-NeoX based model with 1.4B parameters pre-trained on Japanese corpus of about 20B tokens.", "url": "https://huggingface.co/stockmark/gpt-neox-japanese-1.4b", "project_name": "gpt-neox-japanese-1.4b", "downloads": 573, "source": "Hugging Face", "score": -0.08357422915943613, "first_commit": "2023-08-06 07:37:38", "latest_commit": "2023-09-07 03:44:19", "languages": [], "model_or_dataset": "model", "model_size": 1.44, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "bert-large-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-upos", "project_name": "bert-large-japanese-upos", "downloads": 564, "source": "Hugging Face", "score": -0.08365532832117986, "first_commit": "2021-08-19 10:39:38", "latest_commit": "2022-09-18 19:43:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "stockmark-gpt-neox-japanese-1.4b-gguf stockmarkさんが公開しているgpt-neox-japanese-1.4bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/stockmark-gpt-neox-japanese-1.4b-gguf", "project_name": "stockmark-gpt-neox-japanese-1.4b-gguf", "downloads": 553, "source": "Hugging Face", "score": -0.08375444951886664, "first_commit": "2023-08-22 12:45:18", "latest_commit": "2023-09-08 22:00:37", "languages": [], "model_or_dataset": "model", "model_size": 1.41, "model_architectures": null }, { "description": "Sarashina1-13B", "url": "https://huggingface.co/sbintuitions/sarashina1-13b", "project_name": "sarashina1-13b", "downloads": 551, "source": "Hugging Face", "score": -0.08377247155480969, "first_commit": "2024-06-07 11:56:53", "latest_commit": "2024-06-27 06:56:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "bert-base-japanese-v3-jnli 「大規模言語モデル入門」の第5章で紹介している(自然言語推論)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jnli", "project_name": "bert-base-japanese-v3-jnli", "downloads": 547, "source": "Hugging Face", "score": -0.08380851562669578, "first_commit": "2023-06-12 14:15:16", "latest_commit": "2023-07-24 06:49:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Sarashina1-65B", "url": "https://huggingface.co/sbintuitions/sarashina1-65b", "project_name": "sarashina1-65b", "downloads": 545, "source": "Hugging Face", "score": -0.08382653766263883, "first_commit": "2024-06-07 11:57:56", "latest_commit": "2024-06-27 06:56:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Sarashina1-7B This repository provides Japanese language models trained by SB Intuitions.", "url": "https://huggingface.co/sbintuitions/sarashina1-7b", "project_name": "sarashina1-7b", "downloads": 544, "source": "Hugging Face", "score": -0.08383554868061036, "first_commit": "2024-06-07 10:13:21", "latest_commit": "2024-06-27 06:55:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "ELYZA-japanese-CodeLlama-7b-instruct-gguf ELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-instruct-gguf", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct-gguf", "downloads": 541, "source": "Hugging Face", "score": -0.08386258173452493, "first_commit": "2023-11-15 09:48:32", "latest_commit": "2023-11-16 14:28:24", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-NVE-hf", "project_name": "Swallow-13b-NVE-hf", "downloads": 537, "source": "Hugging Face", "score": -0.08389862580641103, "first_commit": "2024-01-30 11:39:05", "latest_commit": "2024-06-29 08:56:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "haqishen-Llama-3-8B-Japanese-Instruct-gguf haqishenさんが公開しているLlama-3-8B-Japanese-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/haqishen-Llama-3-8B-Japanese-Instruct-gguf", "project_name": "haqishen-Llama-3-8B-Japanese-Instruct-gguf", "downloads": 535, "source": "Hugging Face", "score": -0.08391664784235409, "first_commit": "2024-04-23 13:55:17", "latest_commit": "2024-04-23 14:54:23", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "[Llama-3.1-70B-EZO-1.1-it] Model Card モデル情報 / Model Information このモデルは、Meta AI の Llama 3.1 をベースに、日本語タスクでの性能を向上させるためにファインチューニングを行ったものです。", "url": "https://huggingface.co/AXCXEPT/Llama-3.1-70B-EZO-1.1-it", "project_name": "Llama-3.1-70B-EZO-1.1-it", "downloads": 534, "source": "Hugging Face", "score": -0.0839256588603256, "first_commit": "2024-07-29 01:35:35", "latest_commit": "2024-08-23 10:52:31", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-7b-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-7bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-gguf", "project_name": "ELYZA-japanese-Llama-2-7b-gguf", "downloads": 532, "source": "Hugging Face", "score": -0.08394368089626865, "first_commit": "2023-08-29 06:32:01", "latest_commit": "2023-11-16 14:27:12", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-T2-2B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Common-T2-2B-gemma-2-it-gguf", "downloads": 530, "source": "Hugging Face", "score": -0.08396170293221171, "first_commit": "2024-08-01 17:32:31", "latest_commit": "2024-08-01 18:38:31", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GGUF", "project_name": "japanese-stablelm-instruct-beta-70B-GGUF", "downloads": 523, "source": "Hugging Face", "score": -0.08402478005801238, "first_commit": "2023-11-02 15:45:24", "latest_commit": "2023-11-02 18:22:05", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b-instruct", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct", "downloads": 521, "source": "Hugging Face", "score": -0.08404280209395543, "first_commit": "2023-11-07 12:04:07", "latest_commit": "2023-11-17 05:01:00", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "rinna-llama-3-youko-8b-gguf rinnaさんが公開しているllama-3-youko-8bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-llama-3-youko-8b-gguf", "project_name": "rinna-llama-3-youko-8b-gguf", "downloads": 516, "source": "Hugging Face", "score": -0.08408785718381305, "first_commit": "2024-05-01 14:17:53", "latest_commit": "2024-05-01 15:11:21", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "This is a Japanese translated version of HumanEval, an evaluation harness for the HumanEval problem solving dataset described in the paper \"Evaluating Large Language Models Trained on Code\".", "url": "https://huggingface.co/datasets/kogi-jwu/jhumaneval", "project_name": "jhumaneval", "downloads": 516, "source": "Hugging Face", "score": -0.08408785718381305, "first_commit": "2023-10-21 08:20:14", "latest_commit": "2024-01-10 21:52:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Model Card for Japanese character-level DeBERTa V2 tiny Model description This is a Japanese DeBERTa V2 tiny model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese-char-wwm", "project_name": "deberta-v2-tiny-japanese-char-wwm", "downloads": 515, "source": "Hugging Face", "score": -0.08409686820178458, "first_commit": "2023-01-05 08:48:29", "latest_commit": "2023-03-23 07:31:19", "languages": [], "model_or_dataset": "model", "model_size": 0.0101, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "🎈 FlexDreamHK FlexDreamHKはリークされたNovelAIモデルの入っていない、あるいはそのリスクを可能な限り低くしたモデルを目指して作成しました。 ", "url": "https://huggingface.co/den2nova/FlexDreamHK", "project_name": "FlexDreamHK", "downloads": 513, "source": "Hugging Face", "score": -0.08411489023772763, "first_commit": "2023-07-06 10:11:45", "latest_commit": "2023-07-29 04:21:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Japanese StableLM-3B-4E1T Base Model Description This is a 3B-parameter decoder-only language model with a focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-base", "project_name": "japanese-stablelm-3b-4e1t-base", "downloads": 511, "source": "Hugging Face", "score": -0.08413291227367067, "first_commit": "2023-10-16 06:04:58", "latest_commit": "2024-04-26 03:20:34", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": "StableLMEpochForCausalLM" }, { "description": "Japanese StableLM-3B-4E1T Instruct Model Description", "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-instruct", "project_name": "japanese-stablelm-3b-4e1t-instruct", "downloads": 510, "source": "Hugging Face", "score": -0.0841419232916422, "first_commit": "2023-10-16 07:50:31", "latest_commit": "2024-04-26 03:20:42", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": "StableLMEpochForCausalLM" }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-shashin\", split=\"train\") 概要 株式会社東建コーポレーションが運営するホームメイト・リサーチによる『ホームメイト川柳大賞』のうち、お題が画像形式で提供される『写真川柳』に関するクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-shashin", "project_name": "senryu-shashin", "downloads": 509, "source": "Hugging Face", "score": -0.08415093430961372, "first_commit": "2024-08-28 18:50:08", "latest_commit": "2024-08-31 03:47:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "mt5_summarize_japanese (Japanese caption : 日本語の要約のモデル)", "url": "https://huggingface.co/tsmatz/mt5_summarize_japanese", "project_name": "mt5_summarize_japanese", "downloads": 507, "source": "Hugging Face", "score": -0.08416895634555678, "first_commit": "2022-11-26 10:51:27", "latest_commit": "2024-07-12 00:01:31", "languages": [], "model_or_dataset": "model", "model_size": 0.3, "model_architectures": "MT5ForConditionalGeneration" }, { "description": "Wav2Vec2-Large-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, JSUT, TEDxJP and some other data.", "url": "https://huggingface.co/NTQAI/wav2vec2-large-japanese", "project_name": "wav2vec2-large-japanese", "downloads": 506, "source": "Hugging Face", "score": -0.0841779673635283, "first_commit": "2021-07-05 02:44:40", "latest_commit": "2023-02-17 13:07:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "このツールは、複数のデータセットを横断して日本語の大規模言語モデルを自動評価するものです.", "url": "https://github.com/llm-jp/llm-jp-eval", "project_name": "llm-jp-eval", "stargazers_count": 84, "source": "GitHub", "score": -0.08421079146869723, "first_commit": "2023-10-19 19:36:10", "latest_commit": "2024-07-18 21:31:53", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese CLIP ViT-H/14 (Wider) Table of Contents Overview Usage Model Details Evaluation Limitations and Biases Citation See Also Contact Information Overview Developed by:", "url": "https://huggingface.co/hakuhodo-tech/japanese-clip-vit-h-14-bert-wider", "project_name": "japanese-clip-vit-h-14-bert-wider", "downloads": 500, "source": "Hugging Face", "score": -0.08423203347135745, "first_commit": "2024-03-06 03:30:25", "latest_commit": "2024-03-06 21:46:11", "languages": [], "model_or_dataset": "model", "model_size": 0.91, "model_architectures": "CustomCLIPModel" }, { "description": "Overview This dataset provides a convenient and user-friendly format of data from Aozora Bunko (青空文庫), a website that compiles public-domain books in Japan, ideal for Machine Learning applications.", "url": "https://huggingface.co/datasets/globis-university/aozorabunko-clean", "project_name": "aozorabunko-clean", "downloads": 497, "source": "Hugging Face", "score": -0.08425906652527203, "first_commit": "2023-06-26 13:31:28", "latest_commit": "2023-10-27 13:22:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf yuisekiさんが公開しているYuisekinAIEvol-Mistral-7B-ja-math-v0.1.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf", "project_name": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf", "downloads": 496, "source": "Hugging Face", "score": -0.08426807754324354, "first_commit": "2024-04-29 14:18:07", "latest_commit": "2024-04-29 15:52:08", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "llm-jp-3-13b-instruct This repository provides large language models developed by the Research and Development Center for Large Language Models at the National Institute of Informatics.", "url": "https://huggingface.co/llm-jp/llm-jp-3-13b-instruct", "project_name": "llm-jp-3-13b-instruct", "downloads": 492, "source": "Hugging Face", "score": -0.08430412161512965, "first_commit": "2024-09-23 13:17:09", "latest_commit": "2024-09-26 18:21:20", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "LlamaForCausalLM" }, { "description": "QuantFactory/TinySlime-1.1B-Chat-v1.0-GGUF", "url": "https://huggingface.co/QuantFactory/TinySlime-1.1B-Chat-v1.0-GGUF", "project_name": "TinySlime-1.1B-Chat-v1.0-GGUF", "downloads": 491, "source": "Hugging Face", "score": -0.08431313263310117, "first_commit": "2024-09-11 05:27:47", "latest_commit": "2024-09-11 05:55:46", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": null }, { "description": "Llama-3-8B-Japanese-Instruct-GGUF Original Model haqishen/Llama-3-8B-Japanese-Instruct Run with LlamaEdge LlamaEdge version: v0.10.1 and above Prompt template Prompt type: llama-3-chat Prompt string <|begin_of_text|><|start_header_id|>system<|end_header_id|> {{ system_prompt }}<|eot_id|><|start_header_id|>user<|end_header_id|> {{ user_message_1 }}<|eot_id|><|start_header_id|>assistant<|end_header_id|> {{ model_answer_1 }}<|eot_id|><|start_header", "url": "https://huggingface.co/second-state/Llama-3-8B-Japanese-Instruct-GGUF", "project_name": "Llama-3-8B-Japanese-Instruct-GGUF", "downloads": 481, "source": "Hugging Face", "score": -0.08440324281281641, "first_commit": "2024-05-14 05:37:53", "latest_commit": "2024-05-14 06:42:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-GGUF", "project_name": "japanese-stablelm-instruct-gamma-7B-GGUF", "downloads": 455, "source": "Hugging Face", "score": -0.08463752928007606, "first_commit": "2023-10-28 19:03:17", "latest_commit": "2023-10-28 19:07:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Anime with caption CC-0 dataset このデータセットはイラストに対する日本語キャプションを 倫理的に学習しやすくするためのデータセットです。 ", "url": "https://huggingface.co/datasets/alfredplpl/anime-with-caption-cc0", "project_name": "anime-with-caption-cc0", "downloads": 447, "source": "Hugging Face", "score": -0.08470961742384826, "first_commit": "2024-06-03 04:37:13", "latest_commit": "2024-06-03 05:49:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "japanese-gpt-neox-small This repository provides a small-sized Japanese GPT-NeoX model.", "url": "https://huggingface.co/rinna/japanese-gpt-neox-small", "project_name": "japanese-gpt-neox-small", "downloads": 443, "source": "Hugging Face", "score": -0.08474566149573436, "first_commit": "2022-08-31 05:58:25", "latest_commit": "2024-07-20 07:53:40", "languages": [], "model_or_dataset": "model", "model_size": 0.20400000000000001, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Qwen1.5-110B-Chat-gguf Qwenさんが公開しているQwen1.5-110B-Chatのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Qwen1.5-110B-Chat-gguf", "project_name": "Qwen1.5-110B-Chat-gguf", "downloads": 442, "source": "Hugging Face", "score": -0.08475467251370589, "first_commit": "2024-04-27 19:35:48", "latest_commit": "2024-04-28 08:09:17", "languages": [], "model_or_dataset": "model", "model_size": 111.0, "model_architectures": null }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-test\", split=\"test\") 概要 川柳投稿サイトの『写真川柳』と『川柳投稿まるせん』のクロールデータ、および YANS 委員が作成したデータを含みます。 ", "url": "https://huggingface.co/datasets/YANS-official/senryu-test", "project_name": "senryu-test", "downloads": 442, "source": "Hugging Face", "score": -0.08475467251370589, "first_commit": "2024-09-03 15:02:47", "latest_commit": "2024-09-09 05:53:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "calm3-22b-RP-v2 GGUF版はこちら/Click here for the GGUF version また、こちらで本モデルのデモを公開しています。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-v2", "project_name": "calm3-22b-RP-v2", "downloads": 441, "source": "Hugging Face", "score": -0.08476368353167742, "first_commit": "2024-09-12 11:29:23", "latest_commit": "2024-09-16 05:53:42", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": "LlamaForCausalLM" }, { "description": "PLaMo-13B-Instruct-NC Model Description PLaMo-13B-Instruct-NC is a noncommercial instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", "url": "https://huggingface.co/pfnet/plamo-13b-instruct-nc", "project_name": "plamo-13b-instruct-nc", "downloads": 438, "source": "Hugging Face", "score": -0.08479071658559198, "first_commit": "2023-10-26 05:36:26", "latest_commit": "2024-01-25 07:46:45", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "PlamoForCausalLM" }, { "description": "This repository is publicly accessible, but you have to accept the conditions to access its files and content.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b", "project_name": "japanese-stablelm-instruct-alpha-7b", "downloads": 437, "source": "Hugging Face", "score": -0.08479972760356351, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "bert-base-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-wikipedia-ud-head", "project_name": "bert-base-japanese-wikipedia-ud-head", "downloads": 434, "source": "Hugging Face", "score": -0.08482676065747809, "first_commit": "2022-06-20 21:58:53", "latest_commit": "2023-03-04 20:16:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering" }, { "description": "Llama-3-Swallow-8B-Instruct-v0.1-gguf tokyotech-llmさんが公開しているLlama-3-Swallow-8B-Instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama-3-Swallow-8B-Instruct-v0.1-gguf", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1-gguf", "downloads": 431, "source": "Hugging Face", "score": -0.08485379371139266, "first_commit": "2024-07-01 16:42:54", "latest_commit": "2024-07-02 10:43:55", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf umiyukiさんが公開しているUmievo-itr012-Gleipnir-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/umiyuki-Umievo-itr012-Gleipnir-7B-gguf", "project_name": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf", "downloads": 431, "source": "Hugging Face", "score": -0.08485379371139266, "first_commit": "2024-05-29 15:05:32", "latest_commit": "2024-05-29 15:53:40", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Model Card for Japanese BART base Model description This is a Japanese BART base model pre-trained on Japanese Wikipedia.", "url": "https://huggingface.co/ku-nlp/bart-base-japanese", "project_name": "bart-base-japanese", "downloads": 427, "source": "Hugging Face", "score": -0.08488983778327876, "first_commit": "2023-05-09 07:00:51", "latest_commit": "2023-05-12 11:03:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration" }, { "description": "Model Card for Japanese DeBERTa V2 tiny Model description", "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese", "project_name": "deberta-v2-tiny-japanese", "downloads": 426, "source": "Hugging Face", "score": -0.08489884880125029, "first_commit": "2023-01-18 13:36:09", "latest_commit": "2023-03-23 16:13:46", "languages": [], "model_or_dataset": "model", "model_size": 0.013900000000000001, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-v0.1", "project_name": "Swallow-13b-instruct-v0.1", "downloads": 425, "source": "Hugging Face", "score": -0.0849078598192218, "first_commit": "2024-03-04 11:30:28", "latest_commit": "2024-06-29 09:00:15", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM" }, { "description": "Ruri: Japanese General Text Embeddings Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-base", "project_name": "ruri-base", "downloads": 413, "source": "Hugging Face", "score": -0.08501599203488011, "first_commit": "2024-08-28 13:09:10", "latest_commit": "2024-09-04 08:49:23", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel" }, { "description": "aixsatoshi-Honyaku-13b-gguf aixsatoshiさんが公開しているHonyaku-13bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aixsatoshi-Honyaku-13b-gguf", "project_name": "aixsatoshi-Honyaku-13b-gguf", "downloads": 413, "source": "Hugging Face", "score": -0.08501599203488011, "first_commit": "2024-05-19 08:07:15", "latest_commit": "2024-05-19 09:24:59", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null }, { "description": "hh-rlhf-12k-ja This repository provides a human preference dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/hh-rlhf-12k-ja", "project_name": "hh-rlhf-12k-ja", "downloads": 413, "source": "Hugging Face", "score": -0.08501599203488011, "first_commit": "2024-02-04 21:19:53", "latest_commit": "2024-02-04 21:45:59", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "RoSEtta RoSEtta (RoFormer-based Sentence Encoder through Distillation) is a general Japanese text embedding model, excelling in retrieval tasks.", "url": "https://huggingface.co/pkshatech/RoSEtta-base-ja", "project_name": "RoSEtta-base-ja", "downloads": 402, "source": "Hugging Face", "score": -0.08511511323256687, "first_commit": "2024-08-22 03:25:13", "latest_commit": "2024-09-27 05:47:42", "languages": [], "model_or_dataset": "model", "model_size": 0.19, "model_architectures": "RetrievaBertModel" }, { "description": "GitHub リポジトリ ids-cv/wrime で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/wrime-sentiment", "project_name": "wrime-sentiment", "downloads": 401, "source": "Hugging Face", "score": -0.0851241242505384, "first_commit": "2023-07-29 06:38:26", "latest_commit": "2023-10-06 00:56:38", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Additional pretrained BERT base Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-base-japanese-fin-additional", "project_name": "bert-base-japanese-fin-additional", "downloads": 400, "source": "Hugging Face", "score": -0.08513313526850992, "first_commit": "2022-03-11 17:41:11", "latest_commit": "2022-12-09 00:40:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/senryu-marusen\", split=\"train\") 概要 月に1万句以上の投稿がある国内最大級の川柳投稿サイト『川柳投稿まるせん』のクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/senryu-marusen", "project_name": "senryu-marusen", "downloads": 398, "source": "Hugging Face", "score": -0.08515115730445298, "first_commit": "2024-08-28 18:49:03", "latest_commit": "2024-08-30 11:41:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Rakuda - Questions for Japanese models Repository:", "url": "https://huggingface.co/datasets/yuzuai/rakuda-questions", "project_name": "rakuda-questions", "downloads": 397, "source": "Hugging Face", "score": -0.0851601683224245, "first_commit": "2023-06-23 01:08:54", "latest_commit": "2023-06-23 08:01:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Phi-3-medium-128k-instruct-gguf microsoftさんが公開しているPhi-3-medium-128k-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Phi-3-medium-128k-instruct-gguf", "project_name": "Phi-3-medium-128k-instruct-gguf", "downloads": 391, "source": "Hugging Face", "score": -0.08521423443025365, "first_commit": "2024-05-22 15:27:33", "latest_commit": "2024-05-22 16:56:55", "languages": [], "model_or_dataset": "model", "model_size": 14.0, "model_architectures": null }, { "description": "llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset 「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset", "project_name": "bert-base-japanese-v3-crf-ner-wikipedia-dataset", "downloads": 387, "source": "Hugging Face", "score": -0.08525027850213975, "first_commit": "2023-05-28 08:19:43", "latest_commit": "2023-07-25 15:04:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertWithCrfForTokenClassification" }, { "description": "datagemma-rag-27b-it-gguf googleさんが公開しているdatagemma-rag-27b-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/datagemma-rag-27b-it-gguf", "project_name": "datagemma-rag-27b-it-gguf", "downloads": 385, "source": "Hugging Face", "score": -0.0852683005380828, "first_commit": "2024-09-12 18:03:45", "latest_commit": "2024-09-12 19:57:32", "languages": [], "model_or_dataset": "model", "model_size": 27.2, "model_architectures": null }, { "description": "JA-VG-VQA-500 Dataset Description JA-VG-VQA-500 is a 500-sample subset of Japanese Visual Genome VQA dataset.", "url": "https://huggingface.co/datasets/SakanaAI/JA-VG-VQA-500", "project_name": "JA-VG-VQA-500", "downloads": 379, "source": "Hugging Face", "score": -0.08532236664591195, "first_commit": "2024-03-21 09:51:10", "latest_commit": "2024-05-14 04:11:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "hubert-base-jtube This repo provides model weights for the hubert-base model trained on the JTubeSpeech corpus. ", "url": "https://huggingface.co/sarulab-speech/hubert-base-jtube", "project_name": "hubert-base-jtube", "downloads": 377, "source": "Hugging Face", "score": -0.08534038868185499, "first_commit": "2024-02-02 04:15:22", "latest_commit": "2024-02-05 11:49:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "HubertModel" }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-ner", "project_name": "luke-japanese-base-finetuned-ner", "downloads": 375, "source": "Hugging Face", "score": -0.08535841071779805, "first_commit": "2023-01-17 23:36:52", "latest_commit": "2023-05-12 00:36:17", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForTokenClassification" }, { "description": "Tanuki-8B-dpo-v1.0-GGUF 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのGGUF量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-GGUF", "project_name": "Tanuki-8B-dpo-v1.0-4k-GGUF", "downloads": 374, "source": "Hugging Face", "score": -0.08536742173576957, "first_commit": "2024-08-16 12:39:31", "latest_commit": "2024-08-27 18:05:25", "languages": [], "model_or_dataset": "model", "model_size": 7.51, "model_architectures": null }, { "description": "Japanese GPT2 Lyric Model Model description", "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-small", "project_name": "gpt2-japanese-lyric-small", "downloads": 374, "source": "Hugging Face", "score": -0.08536742173576957, "first_commit": "2022-04-21 04:25:18", "latest_commit": "2023-10-23 12:46:36", "languages": [], "model_or_dataset": "model", "model_size": 0.123, "model_architectures": "GPT2LMHeadModel" }, { "description": "recruit-jp/japanese-typo-detector-roberta-base モデルの概要 日本語の文章を入力すると各文字ごとに誤字脱字である確率を出力します 各ラベルの意味は以下の通りです id label meaning 0 OK 誤字なし 1 deletion 1文字の抜け 2 insertion_a 余分な1文字の挿入 3 insertion_b 直前の文字列と一致する2文字以上の余分な文字の挿入 4 kanji-conversion_a 同一の読みを持つ漢字の入れ替え(誤変換) 5 kanji-conversion_b 近い読みを持つ漢字の入れ替え(誤変換) 6 substitution 1文字の入れ替え 7 transposition 隣接する2文字間の転置 8 others その他の入力誤り 誤り種類の詳細については学習データセットの元論文をご参照ください 日本語 Wikipedia の編集履歴に基づく 入力誤りデータセットと訂正システムの改良 その他、モデルの詳細については当社ブログ記事をご参照ください 誤字脱字検出モデルをHugging Face Hubに公開しました (Re", "url": "https://huggingface.co/recruit-jp/japanese-typo-detector-roberta-base", "project_name": "japanese-typo-detector-roberta-base", "downloads": 367, "source": "Hugging Face", "score": -0.08543049886157025, "first_commit": "2023-11-09 06:27:40", "latest_commit": "2023-12-21 03:07:31", "languages": [], "model_or_dataset": "model", "model_size": 0.0996, "model_architectures": "RobertaForTokenClassification" }, { "description": "Githubリポジトリstockmarkteam/ner-wikipedia-datasetで公開されているデータセットを利用しています。", "url": "https://huggingface.co/datasets/llm-book/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "downloads": 367, "source": "Hugging Face", "score": -0.08543049886157025, "first_commit": "2023-04-15 10:43:21", "latest_commit": "2023-12-12 11:25:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "rinna/japanese-hubert-large Overview This is a Japanese HuBERT Large model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-hubert-large", "project_name": "japanese-hubert-large", "downloads": 363, "source": "Hugging Face", "score": -0.08546654293345635, "first_commit": "2024-03-05 10:24:37", "latest_commit": "2024-07-22 08:12:21", "languages": [], "model_or_dataset": "model", "model_size": 0.315, "model_architectures": "HubertModel" }, { "description": "ELYZA-japanese-Llama-2-13b-fast-gguf ELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fastのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-gguf", "project_name": "ELYZA-japanese-Llama-2-13b-fast-gguf", "downloads": 362, "source": "Hugging Face", "score": -0.08547555395142788, "first_commit": "2023-12-27 10:40:52", "latest_commit": "2023-12-27 13:18:46", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese", "project_name": "wav2vec2-large-xlsr-japanese", "downloads": 362, "source": "Hugging Face", "score": -0.08547555395142788, "first_commit": "2021-03-28 04:21:20", "latest_commit": "2023-02-08 00:15:23", "languages": [], "model_or_dataset": "model", "model_size": 0.318, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf aixsatoshiさんが公開しているLlama-3-8b-Cosmopedia-japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf", "project_name": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf", "downloads": 360, "source": "Hugging Face", "score": -0.08549357598737092, "first_commit": "2024-05-01 12:36:43", "latest_commit": "2024-05-19 08:27:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "日本語T5 Prefix Language Model", "url": "https://huggingface.co/sonoisa/t5-base-japanese-adapt", "project_name": "t5-base-japanese-adapt", "downloads": 357, "source": "Hugging Face", "score": -0.0855206090412855, "first_commit": "2022-08-27 08:51:11", "latest_commit": "2022-11-05 09:34:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "alabnii/jmedroberta-base-sentencepiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece-vocab50000", "project_name": "jmedroberta-base-sentencepiece-vocab50000", "downloads": 354, "source": "Hugging Face", "score": -0.08554764209520006, "first_commit": "2022-12-22 17:22:14", "latest_commit": "2023-06-27 03:44:17", "languages": [], "model_or_dataset": "model", "model_size": 0.124, "model_architectures": "BertForMaskedLM" }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、JNLI(文章の関係性判別)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-jnli", "project_name": "luke-japanese-base-finetuned-jnli", "downloads": 353, "source": "Hugging Face", "score": -0.0855566531131716, "first_commit": "2023-02-11 18:39:14", "latest_commit": "2023-07-21 14:09:44", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForSequenceClassification" }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG", "project_name": "Orion-14B-Chat-RAG", "downloads": 352, "source": "Hugging Face", "score": -0.08556566413114312, "first_commit": "2024-01-16 12:19:08", "latest_commit": "2024-03-26 10:08:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM" }, { "description": "DeBERTa V2 small Japanese This is a DeBERTaV2 model pretrained on Japanese texts.", "url": "https://huggingface.co/izumi-lab/deberta-v2-small-japanese", "project_name": "deberta-v2-small-japanese", "downloads": 352, "source": "Hugging Face", "score": -0.08556566413114312, "first_commit": "2023-10-21 13:24:28", "latest_commit": "2024-07-19 03:08:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-clip-vit-l-16", "project_name": "japanese-stable-clip-vit-l-16", "downloads": 351, "source": "Hugging Face", "score": -0.08557467514911464, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": null }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", "url": "https://huggingface.co/sonoisa/t5-base-japanese-title-generation", "project_name": "t5-base-japanese-title-generation", "downloads": 349, "source": "Hugging Face", "score": -0.08559269718505769, "first_commit": "2021-04-04 06:57:18", "latest_commit": "2022-02-21 13:38:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Model Card for Japanese DeBERTa V2 large Model description This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese", "project_name": "deberta-v2-large-japanese", "downloads": 347, "source": "Hugging Face", "score": -0.08561071922100075, "first_commit": "2023-01-07 07:45:25", "latest_commit": "2023-05-12 14:10:35", "languages": [], "model_or_dataset": "model", "model_size": 0.373, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Japanese to Korean translator Japanese to Korean translator model based on EncoderDecoderModel(bert-japanese+kogpt2)", "url": "https://huggingface.co/sappho192/aihub-ja-ko-translator", "project_name": "aihub-ja-ko-translator", "downloads": 342, "source": "Hugging Face", "score": -0.08565577431085837, "first_commit": "2024-02-05 00:51:47", "latest_commit": "2024-06-28 06:38:39", "languages": [], "model_or_dataset": "model", "model_size": 0.265, "model_architectures": "EncoderDecoderModel" }, { "description": "Japanese-Starling-ChatV-7B-GGUF GGUF conversion of \"Japanese-Starling-ChatV-7B\" \"Japanese-Starling-ChatV-7B\" is a Japanese chat model built on top of \"chatntq-ja-7b-v1.0\", originally based on Mistral-7B-v0.1.", "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B-GGUF", "project_name": "Japanese-Starling-ChatV-7B-GGUF", "downloads": 340, "source": "Hugging Face", "score": -0.08567379634680142, "first_commit": "2024-04-14 12:42:01", "latest_commit": "2024-04-20 01:23:10", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "日本語VL-T5事前学習済みモデル", "url": "https://huggingface.co/sonoisa/vl-t5-base-japanese", "project_name": "vl-t5-base-japanese", "downloads": 339, "source": "Hugging Face", "score": -0.08568280736477295, "first_commit": "2021-10-03 11:54:43", "latest_commit": "2021-10-04 11:13:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VLT5ModelWrapper" }, { "description": "OcuteusのGGUF版です。 ", "url": "https://huggingface.co/Local-Novel-LLM-project/Ocuteus-v1-gguf", "project_name": "Ocuteus-v1-gguf", "downloads": 334, "source": "Hugging Face", "score": -0.08572786245463057, "first_commit": "2024-05-07 09:57:49", "latest_commit": "2024-05-10 06:18:35", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "This repository contains some GGUF quantizations of the merge of the VNTL LLaMA 3 8B qlora.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-gguf", "project_name": "vntl-llama3-8b-gguf", "downloads": 334, "source": "Hugging Face", "score": -0.08572786245463057, "first_commit": "2024-06-13 17:17:30", "latest_commit": "2024-06-15 17:33:02", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "llm-book/t5-base-long-livedoor-news-corpus 「大規模言語モデル入門」の第7章で紹介している要約生成のモデルです。 ", "url": "https://huggingface.co/llm-book/t5-base-long-livedoor-news-corpus", "project_name": "t5-base-long-livedoor-news-corpus", "downloads": 329, "source": "Hugging Face", "score": -0.0857729175444882, "first_commit": "2023-06-27 13:32:54", "latest_commit": "2023-07-25 13:10:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-base", "project_name": "deberta-v3-japanese-base", "downloads": 323, "source": "Hugging Face", "score": -0.08582698365231733, "first_commit": "2023-09-21 16:19:31", "latest_commit": "2024-07-05 05:49:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-13b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-13b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf", "downloads": 322, "source": "Hugging Face", "score": -0.08583599467028886, "first_commit": "2024-05-02 14:18:27", "latest_commit": "2024-05-03 04:36:24", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null }, { "description": "Swallow Our Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-v0.1", "project_name": "Swallow-70b-instruct-v0.1", "downloads": 321, "source": "Hugging Face", "score": -0.08584500568826038, "first_commit": "2024-03-06 14:39:34", "latest_commit": "2024-06-29 09:00:17", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": "LlamaForCausalLM" }, { "description": "HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf HODACHIさんが公開しているBorea-Phi-3.5-mini-Instruct-Commonのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf", "project_name": "HODACHI-Borea-Phi-3.5-mini-Instruct-Common-gguf", "downloads": 318, "source": "Hugging Face", "score": -0.08587203874217496, "first_commit": "2024-08-21 10:33:58", "latest_commit": "2024-08-21 11:42:56", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null }, { "description": "This is a Japanese sentence-LUKE model.", "url": "https://huggingface.co/cheonboy/sentence_embedding_japanese", "project_name": "sentence_embedding_japanese", "downloads": 317, "source": "Hugging Face", "score": -0.08588104976014649, "first_commit": "2023-10-05 05:10:25", "latest_commit": "2023-10-05 05:13:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeModel" }, { "description": "rinna/japanese-gpt-neox-3.6b-instruction-ppo rinnaさんが公開しているjapanese-gpt-neox-3.6b-instruction-ppoのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf", "project_name": "rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf", "downloads": 313, "source": "Hugging Face", "score": -0.08591709383203258, "first_commit": "2023-09-02 17:52:26", "latest_commit": "2023-09-08 02:39:00", "languages": [], "model_or_dataset": "model", "model_size": 3.61, "model_architectures": null }, { "description": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf umiyukiさんが公開しているJapanese-Chat-Umievo-itr001-7bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf", "project_name": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf", "downloads": 309, "source": "Hugging Face", "score": -0.08595313790391869, "first_commit": "2024-04-27 09:55:39", "latest_commit": "2024-04-27 10:52:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Parakeet TDT-CTC 0.6B (ja) | | parakeet-tdt_ctc-0.6b-ja is an ASR model that transcribes Japanese speech with Punctuations.", "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja", "project_name": "parakeet-tdt_ctc-0.6b-ja", "downloads": 308, "source": "Hugging Face", "score": -0.0859621489218902, "first_commit": "2024-05-13 15:39:30", "latest_commit": "2024-05-17 17:20:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct", "project_name": "Fugaku-LLM-13B-instruct", "downloads": 306, "source": "Hugging Face", "score": -0.08598017095783327, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 13.2, "model_architectures": null }, { "description": "Ruri: Japanese General Text Embeddings Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-small", "project_name": "ruri-small", "downloads": 306, "source": "Hugging Face", "score": -0.08598017095783327, "first_commit": "2024-08-28 16:23:12", "latest_commit": "2024-09-04 08:49:30", "languages": [], "model_or_dataset": "model", "model_size": 0.0681, "model_architectures": "DistilBertModel" }, { "description": "AutoWikiQA 東工大が公開しているSwallow-MXを用いて、Wikipedia中のテキストを入力として「質問(query)」と「回答(answer)」を生成し、生成された質問と回答についてフィルタリングを行ったデータセットです。", "url": "https://huggingface.co/datasets/cl-nagoya/auto-wiki-qa", "project_name": "auto-wiki-qa", "downloads": 302, "source": "Hugging Face", "score": -0.08601621502971936, "first_commit": "2024-03-28 01:33:42", "latest_commit": "2024-04-20 12:17:33", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Model Card for Japanese character-level GPT-2 Medium Model description This is a Japanese character-level GPT-2 Medium (310M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/gpt2-medium-japanese-char", "project_name": "gpt2-medium-japanese-char", "downloads": 301, "source": "Hugging Face", "score": -0.08602522604769089, "first_commit": "2023-05-18 06:29:28", "latest_commit": "2023-06-08 05:34:26", "languages": [], "model_or_dataset": "model", "model_size": 0.335, "model_architectures": "GPT2LMHeadModel" }, { "description": "WRIME-fine-tuned BERT base Japanese This model is a Japanese BERTBASE fine-tuned on the WRIME dataset.", "url": "https://huggingface.co/patrickramos/bert-base-japanese-v2-wrime-fine-tune", "project_name": "bert-base-japanese-v2-wrime-fine-tune", "downloads": 293, "source": "Hugging Face", "score": -0.08609731419146308, "first_commit": "2022-05-22 09:42:14", "latest_commit": "2023-03-22 08:11:34", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-large", "project_name": "ruri-reranker-large", "downloads": 291, "source": "Hugging Face", "score": -0.08611533622740614, "first_commit": "2024-08-20 02:37:26", "latest_commit": "2024-09-04 08:50:12", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification" }, { "description": "rinna/japanese-gpt-neox-3.6b rinnaさんが公開しているjapanese-gpt-neox-3.6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-gguf", "project_name": "rinna-japanese-gpt-neox-3.6b-gguf", "downloads": 288, "source": "Hugging Face", "score": -0.0861423692813207, "first_commit": "2023-09-02 18:46:08", "latest_commit": "2023-09-08 02:37:19", "languages": [], "model_or_dataset": "model", "model_size": 3.61, "model_architectures": null }, { "description": "ku-nlp/roberta-base-japanese-char-wwm Model description This is a Japanese RoBERTa base model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/ku-nlp/roberta-base-japanese-char-wwm", "project_name": "roberta-base-japanese-char-wwm", "downloads": 287, "source": "Hugging Face", "score": -0.08615138029929223, "first_commit": "2022-09-20 05:07:34", "latest_commit": "2023-03-20 08:05:45", "languages": [], "model_or_dataset": "model", "model_size": 0.1, "model_architectures": "RobertaForMaskedLM" }, { "description": "日本語版CLIPモデル This is a CLIP text/image encoder model for Japanese. ", "url": "https://huggingface.co/sonoisa/clip-vit-b-32-japanese-v1", "project_name": "clip-vit-b-32-japanese-v1", "downloads": 286, "source": "Hugging Face", "score": -0.08616039131726376, "first_commit": "2022-02-15 15:47:34", "latest_commit": "2022-04-19 14:18:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "QuantFactory/Llama3.1-ArrowSE-v0.4-GGUF This is quantized version of DataPilot/Llama3.1-ArrowSE-v0.4 created using llama.cpp Original Model Card 概要 このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", "url": "https://huggingface.co/QuantFactory/Llama3.1-ArrowSE-v0.4-GGUF", "project_name": "Llama3.1-ArrowSE-v0.4-GGUF", "downloads": 284, "source": "Hugging Face", "score": -0.0861784133532068, "first_commit": "2024-07-28 06:17:48", "latest_commit": "2024-07-28 06:57:40", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Fish Speech V1.2 Fish Speech V1.2 is a leading text-to-speech (TTS) model trained on 300k hours of English, Chinese, and Japanese audio data.", "url": "https://huggingface.co/fishaudio/fish-speech-1.2", "project_name": "fish-speech-1.2", "downloads": 283, "source": "Hugging Face", "score": -0.08618742437117834, "first_commit": "2024-07-02 04:24:09", "latest_commit": "2024-07-02 04:31:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "※llama.cpp Releases b3428(7/21)", "url": "https://huggingface.co/MCZK/EZO-Common-9B-gemma-2-it-GGUF", "project_name": "EZO-Common-9B-gemma-2-it-GGUF", "downloads": 283, "source": "Hugging Face", "score": -0.08618742437117834, "first_commit": "2024-07-10 11:12:59", "latest_commit": "2024-07-21 11:26:08", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-GGUF", "project_name": "japanese-stablelm-instruct-beta-7B-GGUF", "downloads": 282, "source": "Hugging Face", "score": -0.08619643538914985, "first_commit": "2023-11-03 01:04:31", "latest_commit": "2023-11-03 12:54:55", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat", "project_name": "Orion-14B-Chat", "downloads": 282, "source": "Hugging Face", "score": -0.08619643538914985, "first_commit": "2024-01-16 06:03:30", "latest_commit": "2024-04-11 10:48:51", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": "OrionForCausalLM" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-short", "project_name": "t5-large-short", "downloads": 281, "source": "Hugging Face", "score": -0.08620544640712138, "first_commit": "2023-04-26 08:18:58", "latest_commit": "2023-05-10 10:00:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "DataPilot-ArrowPro-7B-RobinHood-gguf DataPilotさんが公開しているArrowPro-7B-RobinHoodのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-RobinHood-gguf", "project_name": "DataPilot-ArrowPro-7B-RobinHood-gguf", "downloads": 276, "source": "Hugging Face", "score": -0.086250501496979, "first_commit": "2024-05-11 07:22:37", "latest_commit": "2024-05-11 13:43:09", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "oasst2-33k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/oasst2-33k-ja", "project_name": "oasst2-33k-ja", "downloads": 276, "source": "Hugging Face", "score": -0.086250501496979, "first_commit": "2024-04-28 16:24:00", "latest_commit": "2024-04-28 16:39:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "rinna-llama-3-youko-70b-instruct-gguf rinnaさんが公開しているllama-3-youko-70b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/rinna-llama-3-youko-70b-instruct-gguf", "project_name": "rinna-llama-3-youko-70b-instruct-gguf", "downloads": 267, "source": "Hugging Face", "score": -0.08633160065872272, "first_commit": "2024-07-27 09:04:09", "latest_commit": "2024-07-31 14:35:52", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char-v2", "project_name": "bert-large-japanese-char-v2", "downloads": 267, "source": "Hugging Face", "score": -0.08633160065872272, "first_commit": "2023-05-19 00:48:06", "latest_commit": "2023-05-19 00:54:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "Ninja-v1-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-gguf", "project_name": "Ninja-v1-gguf", "downloads": 265, "source": "Hugging Face", "score": -0.08634962269466577, "first_commit": "2024-05-03 14:03:22", "latest_commit": "2024-05-04 13:26:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "モデル概要 AWSのtrn1インスタンスを用いて開発した大喜利言語モデルです。", "url": "https://huggingface.co/watashiha/watashiha-gpt-6b", "project_name": "watashiha-gpt-6b", "downloads": 259, "source": "Hugging Face", "score": -0.08640368880249492, "first_commit": "2023-12-28 05:41:38", "latest_commit": "2024-03-04 05:21:14", "languages": [], "model_or_dataset": "model", "model_size": 5.83, "model_architectures": "GPT2LMHeadModel" }, { "description": "aya-23-35B-gguf CohereForAIさんが公開しているaya-23-35Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aya-23-35B-gguf", "project_name": "aya-23-35B-gguf", "downloads": 257, "source": "Hugging Face", "score": -0.08642171083843797, "first_commit": "2024-05-26 16:32:27", "latest_commit": "2024-05-27 00:47:56", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": null }, { "description": "XLNet-japanese Model description This model require Mecab and senetencepiece with XLNetTokenizer.", "url": "https://huggingface.co/hajime9652/xlnet-japanese", "project_name": "xlnet-japanese", "downloads": 255, "source": "Hugging Face", "score": -0.08643973287438103, "first_commit": "2021-04-01 03:12:11", "latest_commit": "2023-01-05 04:28:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLNetLMHeadModel" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-short", "project_name": "t5-small-short", "downloads": 255, "source": "Hugging Face", "score": -0.08643973287438103, "first_commit": "2023-04-25 04:37:20", "latest_commit": "2023-05-10 09:55:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "こちらでアップロードできないので、civitaiにて先に公開しています。 ", "url": "https://huggingface.co/sazyou-roukaku/AfterRealXL", "project_name": "AfterRealXL", "downloads": 255, "source": "Hugging Face", "score": -0.08643973287438103, "first_commit": "2023-09-23 08:43:02", "latest_commit": "2023-10-01 18:12:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "url": "https://huggingface.co/QuantFactory/ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", "downloads": 252, "source": "Hugging Face", "score": -0.0864667659282956, "first_commit": "2024-07-05 05:56:09", "latest_commit": "2024-07-13 13:29:45", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": null }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-test\", split=\"test\") 概要 大喜利投稿サイトBoketeのクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-test", "project_name": "ogiri-test", "downloads": 252, "source": "Hugging Face", "score": -0.0864667659282956, "first_commit": "2024-09-03 15:08:05", "latest_commit": "2024-09-09 05:53:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese Natural Language Inference Model", "url": "https://huggingface.co/cyberagent/xlm-roberta-large-jnli-jsick", "project_name": "xlm-roberta-large-jnli-jsick", "downloads": 248, "source": "Hugging Face", "score": -0.0865028100001817, "first_commit": "2022-12-23 10:51:12", "latest_commit": "2022-12-23 10:51:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "Converted from clu-ling/whisper-large-v2-japanese-5k-steps using CTranslate2.", "url": "https://huggingface.co/zh-plus/faster-whisper-large-v2-japanese-5k-steps", "project_name": "faster-whisper-large-v2-japanese-5k-steps", "downloads": 248, "source": "Hugging Face", "score": -0.0865028100001817, "first_commit": "2023-07-03 08:29:37", "latest_commit": "2023-07-03 18:42:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Instruct 150K is a localized version of the original LLaVA Visual Instruct 150K dataset.", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-Instruct-150K-JA", "project_name": "LLaVA-Instruct-150K-JA", "downloads": 248, "source": "Hugging Face", "score": -0.0865028100001817, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Model Card for Japanese BART large Model description", "url": "https://huggingface.co/ku-nlp/bart-large-japanese", "project_name": "bart-large-japanese", "downloads": 243, "source": "Hugging Face", "score": -0.08654786509003932, "first_commit": "2023-05-09 07:44:59", "latest_commit": "2023-05-12 11:05:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration" }, { "description": "calm3-22b-RP-v2-GGUF 概要 Aratako/calm3-22b-RP-v2の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/calm3-22b-RP-v2-GGUF", "project_name": "calm3-22b-RP-v2-GGUF", "downloads": 242, "source": "Hugging Face", "score": -0.08655687610801084, "first_commit": "2024-09-16 04:30:57", "latest_commit": "2024-09-16 09:55:09", "languages": [], "model_or_dataset": "model", "model_size": 22.5, "model_architectures": null }, { "description": "SpeechT5 (TTS task) for Japanese SpeechT5 model fine-tuned for Japanese speech synthesis (text-to-speech)", "url": "https://huggingface.co/esnya/japanese_speecht5_tts", "project_name": "japanese_speecht5_tts", "downloads": 241, "source": "Hugging Face", "score": -0.08656588712598237, "first_commit": "2023-08-08 18:37:40", "latest_commit": "2023-08-09 09:25:38", "languages": [], "model_or_dataset": "model", "model_size": 0.14400000000000002, "model_architectures": "SpeechT5ForTextToSpeech" }, { "description": "QuantFactory/shisa-7b-v1-GGUF This is quantized version of augmxnt/shisa-base-7b-v1 created using llama.cpp Model Description shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", "url": "https://huggingface.co/QuantFactory/shisa-7b-v1-GGUF", "project_name": "shisa-7b-v1-GGUF", "downloads": 239, "source": "Hugging Face", "score": -0.08658390916192542, "first_commit": "2024-06-14 01:44:05", "latest_commit": "2024-06-18 05:53:41", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": null }, { "description": "llm-jp-3-1.8b", "url": "https://huggingface.co/llm-jp/llm-jp-3-1.8b", "project_name": "llm-jp-3-1.8b", "downloads": 236, "source": "Hugging Face", "score": -0.08661094221584, "first_commit": "2024-09-23 12:49:46", "latest_commit": "2024-09-26 18:19:48", "languages": [], "model_or_dataset": "model", "model_size": 1.87, "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B", "project_name": "Japanese-Starling-ChatV-7B", "downloads": 232, "source": "Hugging Face", "score": -0.08664698628772609, "first_commit": "2024-04-14 12:18:31", "latest_commit": "2024-04-14 15:26:06", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "lightblue-suzume-llama-3-8B-japanese-gguf lightblueさんが公開しているsuzume-llama-3-8B-japaneseのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-japanese-gguf", "project_name": "lightblue-suzume-llama-3-8B-japanese-gguf", "downloads": 230, "source": "Hugging Face", "score": -0.08666500832366915, "first_commit": "2024-04-23 13:30:08", "latest_commit": "2024-05-07 12:58:06", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-xsmall", "project_name": "deberta-v3-japanese-xsmall", "downloads": 224, "source": "Hugging Face", "score": -0.08671907443149829, "first_commit": "2023-09-21 16:12:53", "latest_commit": "2024-07-05 05:48:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Tanuki-8x8B-dpo-v1.0-GPTQ-4bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGPTQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GPTQ-4bit", "project_name": "Tanuki-8x8B-dpo-v1.0-GPTQ-4bit", "downloads": 224, "source": "Hugging Face", "score": -0.08671907443149829, "first_commit": "2024-08-27 18:19:13", "latest_commit": "2024-09-03 09:27:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TanukiForCausalLM" }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-base", "project_name": "ruri-reranker-base", "downloads": 223, "source": "Hugging Face", "score": -0.08672808544946982, "first_commit": "2024-08-20 01:10:40", "latest_commit": "2024-09-04 08:50:21", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "GPT-2 small Japanese model This repository contains a GPT2-small model trained on Japanese Wikipedia dataset.", "url": "https://huggingface.co/colorfulscoop/gpt2-small-ja", "project_name": "gpt2-small-ja", "downloads": 222, "source": "Hugging Face", "score": -0.08673709646744135, "first_commit": "2021-03-27 02:27:05", "latest_commit": "2021-09-27 20:50:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/livedoor-news-corpus", "project_name": "livedoor-news-corpus", "downloads": 222, "source": "Hugging Face", "score": -0.08673709646744135, "first_commit": "2023-06-21 07:16:52", "latest_commit": "2023-12-12 11:19:43", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ichikara-instruction (Non Commercial) LLMのための日本語インストラクションデータ 公開ページ 公開ページより、 本データに関して、言語処理学会第30回年次大会において発表を行います。", "url": "https://huggingface.co/datasets/p1atdev/ichikara-instruction", "project_name": "ichikara-instruction", "downloads": 222, "source": "Hugging Face", "score": -0.08673709646744135, "first_commit": "2024-03-12 07:09:56", "latest_commit": "2024-03-12 08:36:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "License:CreativeML Open RAIL-M Additional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of June 25, 2023 このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。 ", "url": "https://huggingface.co/sazyou-roukaku/LittleStepMix", "project_name": "LittleStepMix", "downloads": 220, "source": "Hugging Face", "score": -0.0867551185033844, "first_commit": "2023-06-25 06:57:42", "latest_commit": "2023-07-04 10:47:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Mistral-Nemo-Instruct-2407-gguf mistralaiさんが公開しているMistral-Nemo-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-Nemo-Instruct-2407-gguf", "project_name": "Mistral-Nemo-Instruct-2407-gguf", "downloads": 220, "source": "Hugging Face", "score": -0.0867551185033844, "first_commit": "2024-07-22 13:28:13", "latest_commit": "2024-07-22 17:25:48", "languages": [], "model_or_dataset": "model", "model_size": 12.2, "model_architectures": null }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-long", "project_name": "t5-base-long", "downloads": 220, "source": "Hugging Face", "score": -0.0867551185033844, "first_commit": "2023-04-26 08:30:59", "latest_commit": "2023-05-10 10:00:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "bilingual-gpt-neox-4b-8k Overview Notice: This model requires transformers>=4.31.0 to work properly.", "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-8k", "project_name": "bilingual-gpt-neox-4b-8k", "downloads": 217, "source": "Hugging Face", "score": -0.08678215155729897, "first_commit": "2023-07-31 02:34:21", "latest_commit": "2024-07-20 08:03:16", "languages": [], "model_or_dataset": "model", "model_size": 3.95, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Wav2Vec2-Large-XLSR-53-Japanese Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.", "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese-hiragana", "project_name": "wav2vec2-large-xlsr-japanese-hiragana", "downloads": 214, "source": "Hugging Face", "score": -0.08680918461121354, "first_commit": "2021-06-18 07:15:24", "latest_commit": "2023-02-08 00:36:47", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Mistral-Large-Instruct-2407-gguf mistralaiさんが公開しているMistral-Large-Instruct-2407のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-Large-Instruct-2407-gguf", "project_name": "Mistral-Large-Instruct-2407-gguf", "downloads": 213, "source": "Hugging Face", "score": -0.08681819562918507, "first_commit": "2024-07-24 18:59:58", "latest_commit": "2024-07-26 12:21:45", "languages": [], "model_or_dataset": "model", "model_size": 123.0, "model_architectures": null }, { "description": "c4ai-command-r-plus-gguf CohereForAIさんが公開しているc4ai-command-r-plusのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/c4ai-command-r-plus-gguf", "project_name": "c4ai-command-r-plus-gguf", "downloads": 211, "source": "Hugging Face", "score": -0.08683621766512811, "first_commit": "2024-04-22 14:46:41", "latest_commit": "2024-04-23 16:13:37", "languages": [], "model_or_dataset": "model", "model_size": 104.0, "model_architectures": null }, { "description": "llm-japanese-dataset LLM構築用の日本語インストラクション(チャット)データセット 主に,英語で構築されたLLMモデルなどに対して,チャット(Instruction)応答タスクに関してLoRAなどでチューニングするために使用できます. ", "url": "https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset", "project_name": "llm-japanese-dataset", "downloads": 211, "source": "Hugging Face", "score": -0.08683621766512811, "first_commit": "2023-04-30 06:13:24", "latest_commit": "2024-01-18 13:42:50", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Mistral-7B-Instruct-v0.3-gguf mistralaiさんが公開しているMistral-7B-Instruct-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Mistral-7B-Instruct-v0.3-gguf", "project_name": "Mistral-7B-Instruct-v0.3-gguf", "downloads": 210, "source": "Hugging Face", "score": -0.08684522868309964, "first_commit": "2024-05-23 14:44:25", "latest_commit": "2024-05-23 15:58:46", "languages": [], "model_or_dataset": "model", "model_size": 7.25, "model_architectures": null }, { "description": "bert-japanese_finetuned-sentiment-analysis This model was trained from scratch on the Japanese Sentiment Polarity Dictionary dataset.", "url": "https://huggingface.co/minutillamolinara/bert-japanese_finetuned-sentiment-analysis", "project_name": "bert-japanese_finetuned-sentiment-analysis", "downloads": 208, "source": "Hugging Face", "score": -0.08686325071904269, "first_commit": "2023-03-31 02:28:09", "latest_commit": "2023-03-31 13:13:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "fio-base-japanese-v0.1 日本語版は近日公開予定です(日本語を勉強中なので、間違いはご容赦ください!", "url": "https://huggingface.co/bclavie/fio-base-japanese-v0.1", "project_name": "fio-base-japanese-v0.1", "downloads": 205, "source": "Hugging Face", "score": -0.08689028377295727, "first_commit": "2023-12-18 11:01:07", "latest_commit": "2023-12-19 10:28:16", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel" }, { "description": "Evaluation on MIRACL japanese These models don't train on the MIRACL training data.", "url": "https://huggingface.co/aken12/splade-japanese-v3", "project_name": "splade-japanese-v3", "downloads": 204, "source": "Hugging Face", "score": -0.08689929479092878, "first_commit": "2024-03-29 12:35:47", "latest_commit": "2024-05-22 02:59:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "roberta_qa_japanese (Japanese caption : 日本語の (抽出型) 質問応答のモデル)", "url": "https://huggingface.co/tsmatz/roberta_qa_japanese", "project_name": "roberta_qa_japanese", "downloads": 204, "source": "Hugging Face", "score": -0.08689929479092878, "first_commit": "2022-12-11 03:41:07", "latest_commit": "2024-07-12 00:00:07", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "RobertaForQuestionAnswering" }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese-v1.1", "project_name": "t5-base-japanese-v1.1", "downloads": 198, "source": "Hugging Face", "score": -0.08695336089875794, "first_commit": "2022-08-12 15:41:28", "latest_commit": "2022-08-27 09:21:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-instruct-1_6b", "project_name": "japanese-stablelm-2-instruct-1_6b", "downloads": 197, "source": "Hugging Face", "score": -0.08696237191672947, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null }, { "description": "japanese-stablelm-2-instruct-1_6b-gguf stabilityaiさんが公開しているjapanese-stablelm-2-instruct-1_6bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/japanese-stablelm-2-instruct-1_6b-gguf", "project_name": "japanese-stablelm-2-instruct-1_6b-gguf", "downloads": 196, "source": "Hugging Face", "score": -0.08697138293470098, "first_commit": "2024-05-11 07:26:43", "latest_commit": "2024-05-11 09:56:19", "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-base - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-base StableLM", "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-base-gguf", "project_name": "stabilityai-japanese-stablelm-3b-4e1t-base-gguf", "downloads": 196, "source": "Hugging Face", "score": -0.08697138293470098, "first_commit": "2023-11-16 10:23:21", "latest_commit": "2023-11-16 11:18:48", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": null }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-base-beta-70B-GGUF", "project_name": "japanese-stablelm-base-beta-70B-GGUF", "downloads": 196, "source": "Hugging Face", "score": -0.08697138293470098, "first_commit": "2023-11-06 11:33:47", "latest_commit": "2023-11-06 12:14:36", "languages": [], "model_or_dataset": "model", "model_size": 69.0, "model_architectures": null }, { "description": "nlp-waseda/roberta-large-japanese Model description This is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese", "project_name": "roberta-large-japanese", "downloads": 187, "source": "Hugging Face", "score": -0.08705248209644471, "first_commit": "2022-05-10 08:37:48", "latest_commit": "2022-10-21 14:48:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "stockmark-100b-gguf stockmarkさんが公開しているstockmark-100bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/stockmark-100b-gguf", "project_name": "stockmark-100b-gguf", "downloads": 187, "source": "Hugging Face", "score": -0.08705248209644471, "first_commit": "2024-05-17 12:45:56", "latest_commit": "2024-05-18 09:14:46", "languages": [], "model_or_dataset": "model", "model_size": 96.2, "model_architectures": null }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59362 Validation Metrics Loss: 0.13092292845249176 Accuracy: 0.9527127414314258 Precision: 0.9634070704982427 Recall: 0.9842171959602166 AUC: 0.9667289746092403 F1: 0.9737009564152002 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59362", "project_name": "autonlp-japanese-sentiment-59362", "downloads": 186, "source": "Hugging Face", "score": -0.08706149311441623, "first_commit": "2021-04-21 11:28:11", "latest_commit": "2021-05-18 22:55:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "モデル説明 (model explanation) CoolJapanDiffusion 2.1.1とWaifuDiffusion 1.4 anime epoch2のマージ。", "url": "https://huggingface.co/ThePioneer/CoolerWaifuDiffusion", "project_name": "CoolerWaifuDiffusion", "downloads": 186, "source": "Hugging Face", "score": -0.08706149311441623, "first_commit": "2023-01-20 23:52:39", "latest_commit": "2023-01-22 19:16:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "aya-23-8B-gguf CohereForAIさんが公開しているaya-23-8Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/aya-23-8B-gguf", "project_name": "aya-23-8B-gguf", "downloads": 185, "source": "Hugging Face", "score": -0.08707050413238776, "first_commit": "2024-05-26 16:32:53", "latest_commit": "2024-05-27 00:54:36", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Dataset overview This dataset identifies whether a GitHub repository description pertains to Japanese natural language processing (NLP).", "url": "https://huggingface.co/datasets/taishi-i/awesome-japanese-nlp-classification-dataset", "project_name": "awesome-japanese-nlp-classification-dataset", "downloads": 184, "source": "Hugging Face", "score": -0.08707951515035929, "first_commit": "2023-09-09 06:37:36", "latest_commit": "2023-09-09 20:09:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "electra-base-cyberbullying This is an ELECTRA Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/transformers-ud-japanese-electra-base-discriminator-cyberbullying", "project_name": "transformers-ud-japanese-electra-base-discriminator-cyberbullying", "downloads": 183, "source": "Hugging Face", "score": -0.08708852616833081, "first_commit": "2022-09-09 04:08:15", "latest_commit": "2022-11-01 07:18:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification" }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-vlm", "project_name": "japanese-stable-vlm", "downloads": 183, "source": "Hugging Face", "score": -0.08708852616833081, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 7.57, "model_architectures": null }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-large-long", "project_name": "t5-large-long", "downloads": 180, "source": "Hugging Face", "score": -0.08711555922224538, "first_commit": "2023-04-26 08:33:12", "latest_commit": "2023-05-10 10:00:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "lightblue-suzume-llama-3-8B-multilingual-gguf lightblueさんが公開しているsuzume-llama-3-8B-multilingualのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-multilingual-gguf", "project_name": "lightblue-suzume-llama-3-8B-multilingual-gguf", "downloads": 178, "source": "Hugging Face", "score": -0.08713358125818843, "first_commit": "2024-05-06 16:31:55", "latest_commit": "2024-05-07 12:59:57", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Japanese to emotions I fine-tuned LINE DistillBERT as the base model using WRIME Ver2 as the teacher data.", "url": "https://huggingface.co/koshin2001/Japanese-to-emotions", "project_name": "Japanese-to-emotions", "downloads": 177, "source": "Hugging Face", "score": -0.08714259227615996, "first_commit": "2024-09-09 13:28:59", "latest_commit": "2024-09-11 01:49:55", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification" }, { "description": "BERT small Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-small-japanese", "project_name": "bert-small-japanese", "downloads": 176, "source": "Hugging Face", "score": -0.08715160329413148, "first_commit": "2021-10-04 13:09:36", "latest_commit": "2022-12-09 00:40:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "Model Trained Using AutoNLP Problem type: Binary Classification Model ID: 59363 Validation Metrics Loss: 0.12651239335536957 Accuracy: 0.9532079853817648 Precision: 0.9729688278823665 Recall: 0.9744633462616643 AUC: 0.9717333684823413 F1: 0.9737155136027014 Usage You can use cURL to access this model: $ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}' https://api-inference.huggingface.co/models/abhishek/autonlp-japanese-sentiment-5936", "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59363", "project_name": "autonlp-japanese-sentiment-59363", "downloads": 174, "source": "Hugging Face", "score": -0.08716962533007454, "first_commit": "2021-04-21 11:28:24", "latest_commit": "2021-05-18 22:56:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "We provide an Amazon product reviews dataset for multilingual text classification.", "url": "https://huggingface.co/datasets/defunct-datasets/amazon_reviews_multi", "project_name": "amazon_reviews_multi", "downloads": 174, "source": "Hugging Face", "score": -0.08716962533007454, "first_commit": "2022-01-25 16:34:54", "latest_commit": "2023-11-02 14:52:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "J-ResearchCorpus Update: 2024/3/16言語処理学会第30回年次大会(NLP2024)を含む、論文 1,343 本のデータを追加 2024/2/25言語処理学会誌「自然言語処理」のうち CC-BY-4.0 で公開されている論文 360 本のデータを追加 概要 CC-BY-* ライセンスで公開されている日本語論文や学会誌等から抜粋した高品質なテキストのデータセットです。", "url": "https://huggingface.co/datasets/kunishou/J-ResearchCorpus", "project_name": "J-ResearchCorpus", "downloads": 173, "source": "Hugging Face", "score": -0.08717863634804605, "first_commit": "2024-02-12 14:03:42", "latest_commit": "2024-03-16 07:55:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "llm-lora-classification", "url": "https://github.com/hppRC/llm-lora-classification", "project_name": "llm-lora-classification", "stargazers_count": 83, "source": "GitHub", "score": -0.0871814392443442, "first_commit": "2023-07-17 12:42:57", "latest_commit": "2023-07-22 19:46:45", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Word2vec (word to vectors) approach for Japanese language using Gensim and Mecab.", "url": "https://github.com/philipperemy/japanese-words-to-vectors", "project_name": "japanese-words-to-vectors", "stargazers_count": 83, "source": "GitHub", "score": -0.0871814392443442, "first_commit": "2016-09-04 09:43:00", "latest_commit": "2020-08-09 19:48:23", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf pfnetさんが公開しているnekomata-14b-pfn-qfin-inst-mergeのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf", "project_name": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf", "downloads": 172, "source": "Hugging Face", "score": -0.08718764736601758, "first_commit": "2024-04-23 14:53:08", "latest_commit": "2024-04-24 14:39:32", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null }, { "description": "読み込み方 from datasets import load_dataset dataset = load_dataset(\"YANS-official/ogiri-bokete\", split=\"train\") 概要 大喜利投稿サイトBoketeのクロールデータです。", "url": "https://huggingface.co/datasets/YANS-official/ogiri-bokete", "project_name": "ogiri-bokete", "downloads": 170, "source": "Hugging Face", "score": -0.08720566940196063, "first_commit": "2024-07-21 09:58:15", "latest_commit": "2024-08-31 09:24:55", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "llm-jp-3-3.7b", "url": "https://huggingface.co/llm-jp/llm-jp-3-3.7b", "project_name": "llm-jp-3-3.7b", "downloads": 168, "source": "Hugging Face", "score": -0.08722369143790368, "first_commit": "2024-09-23 12:25:32", "latest_commit": "2024-09-26 18:20:09", "languages": [], "model_or_dataset": "model", "model_size": 3.78, "model_architectures": "LlamaForCausalLM" }, { "description": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-70b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-70b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf", "downloads": 167, "source": "Hugging Face", "score": -0.08723270245587521, "first_commit": "2024-05-03 09:00:00", "latest_commit": "2024-05-04 06:52:16", "languages": [], "model_or_dataset": "model", "model_size": 69.2, "model_architectures": null }, { "description": "databricks-dolly-15k-ja This repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", "url": "https://huggingface.co/datasets/llm-jp/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "downloads": 165, "source": "Hugging Face", "score": -0.08725072449181825, "first_commit": "2024-01-27 07:11:25", "latest_commit": "2024-01-30 18:09:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "shisa-7b-v1-gguf augmxntさんが公開しているshisa-7b-v1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/shisa-7b-v1-gguf", "project_name": "shisa-7b-v1-gguf", "downloads": 164, "source": "Hugging Face", "score": -0.08725973550978978, "first_commit": "2023-12-09 14:02:20", "latest_commit": "2023-12-10 12:24:25", "languages": [], "model_or_dataset": "model", "model_size": 7.96, "model_architectures": null }, { "description": "ElanMT ElanMT-BT-en-ja is a English to Japanese translation model developed by ELAN MITSUA Project / Abstract Engine.", "url": "https://huggingface.co/Mitsua/elan-mt-bt-en-ja", "project_name": "elan-mt-bt-en-ja", "downloads": 164, "source": "Hugging Face", "score": -0.08725973550978978, "first_commit": "2024-05-20 01:51:18", "latest_commit": "2024-05-20 01:53:38", "languages": [], "model_or_dataset": "model", "model_size": 0.0606, "model_architectures": "MarianMTModel" }, { "description": "Stanza model for Japanese (ja)", "url": "https://huggingface.co/stanfordnlp/stanza-ja", "project_name": "stanza-ja", "downloads": 164, "source": "Hugging Face", "score": -0.08725973550978978, "first_commit": "2021-09-07 12:05:41", "latest_commit": "2024-07-31 05:09:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット 近年、大規模言語モデル(LLM)の台頭により、一般的な日本語を用いた自然な検索クエリで質問するユースケースが増えています。", "url": "https://huggingface.co/datasets/hotchpotch/JaCWIR", "project_name": "JaCWIR", "downloads": 164, "source": "Hugging Face", "score": -0.08725973550978978, "first_commit": "2024-03-23 05:57:58", "latest_commit": "2024-04-01 02:34:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Llama 3 Youko 70B (rinna/llama-3-youko-70b)", "url": "https://huggingface.co/rinna/llama-3-youko-70b", "project_name": "llama-3-youko-70b", "downloads": 162, "source": "Hugging Face", "score": -0.08727775754573283, "first_commit": "2024-07-21 14:13:34", "latest_commit": "2024-07-25 05:16:28", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": "LlamaForCausalLM" }, { "description": "tokyotech-llm様の Llama-3-Swallow-8B-Instruct-v0.1 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3-Swallow-8B-Instruct-v0.1-GGUF", "project_name": "Llama-3-Swallow-8B-Instruct-v0.1-GGUF", "downloads": 161, "source": "Hugging Face", "score": -0.08728676856370436, "first_commit": "2024-07-01 11:45:22", "latest_commit": "2024-07-01 17:54:05", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "whisper-large-v2-japanese-5k-steps This model is a fine-tuned version of openai/whisper-large-v2 on the Japanese CommonVoice dataset (v11)..", "url": "https://huggingface.co/clu-ling/whisper-large-v2-japanese-5k-steps", "project_name": "whisper-large-v2-japanese-5k-steps", "downloads": 158, "source": "Hugging Face", "score": -0.08731380161761892, "first_commit": "2023-01-28 22:14:29", "latest_commit": "2023-03-03 21:11:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "SakanaAI-EvoLLM-JP-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-v1-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-v1-7B-gguf", "project_name": "SakanaAI-EvoLLM-JP-v1-7B-gguf", "downloads": 158, "source": "Hugging Face", "score": -0.08731380161761892, "first_commit": "2024-03-21 13:04:25", "latest_commit": "2024-03-21 14:41:04", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "記事本文からタイトルを生成するモデル SEE: https://qiita.com/sonoisa/items/30876467ad5a8a81821f", "url": "https://huggingface.co/sonoisa/t5-qiita-title-generation", "project_name": "t5-qiita-title-generation", "downloads": 157, "source": "Hugging Face", "score": -0.08732281263559045, "first_commit": "2021-10-17 14:46:56", "latest_commit": "2022-02-21 13:39:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Tanuki-8B-dpo-v1.0-GPTQ-8bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGPTQ 8bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GPTQ-8bit", "project_name": "Tanuki-8B-dpo-v1.0-GPTQ-8bit", "downloads": 157, "source": "Hugging Face", "score": -0.08732281263559045, "first_commit": "2024-08-27 17:32:47", "latest_commit": "2024-09-03 09:28:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "Llama-3-8B-Japanese-Instruct-GGUF Original Model haqishen/Llama-3-8B-Japanese-Instruct Run with Gaianet Prompt template: prompt template: llama-3-chat Context size: chat_ctx_size: 4096 Run with GaiaNet:", "url": "https://huggingface.co/gaianet/Llama-3-8B-Japanese-Instruct-GGUF", "project_name": "Llama-3-8B-Japanese-Instruct-GGUF", "downloads": 154, "source": "Hugging Face", "score": -0.08734984568950503, "first_commit": "2024-05-14 05:38:05", "latest_commit": "2024-05-16 13:44:53", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b-gguf ELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-gguf", "project_name": "ELYZA-japanese-CodeLlama-7b-gguf", "downloads": 154, "source": "Hugging Face", "score": -0.08734984568950503, "first_commit": "2023-11-15 09:53:42", "latest_commit": "2023-11-16 14:28:03", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct-gguf", "project_name": "Fugaku-LLM-13B-instruct-gguf", "downloads": 151, "source": "Hugging Face", "score": -0.08737687874341961, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 13.4, "model_architectures": null }, { "description": "NVIDIA が公開している SteerLM 向けのトライアルデータセット HelpSteer2を日本語に自動翻訳したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/HelpSteer2-20k-ja", "project_name": "HelpSteer2-20k-ja", "downloads": 150, "source": "Hugging Face", "score": -0.08738588976139113, "first_commit": "2024-06-21 08:09:33", "latest_commit": "2024-06-21 08:44:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "NVIDIA が公開している SteerLM 向けのトライアルデータセット HelpSteerを日本語に自動翻訳したデータセットになります。", "url": "https://huggingface.co/datasets/kunishou/HelpSteer-35k-ja", "project_name": "HelpSteer-35k-ja", "downloads": 150, "source": "Hugging Face", "score": -0.08738588976139113, "first_commit": "2024-03-02 16:45:19", "latest_commit": "2024-03-03 10:10:54", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Leia-Swallow-7B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", "url": "https://huggingface.co/leia-llm/Leia-Swallow-7b", "project_name": "Leia-Swallow-7b", "downloads": 149, "source": "Hugging Face", "score": -0.08739490077936266, "first_commit": "2024-04-17 07:12:28", "latest_commit": "2024-04-17 10:29:56", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": "LlamaForCausalLM" }, { "description": "luke-japanese-large-lite luke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", "url": "https://huggingface.co/studio-ousia/luke-japanese-large-lite", "project_name": "luke-japanese-large-lite", "downloads": 146, "source": "Hugging Face", "score": -0.08742193383327723, "first_commit": "2022-11-07 14:26:40", "latest_commit": "2022-11-09 11:19:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM" }, { "description": "COMET-T5 ja Finetuned T5 on ATOMIC ja using a text-to-text language modeling objective.", "url": "https://huggingface.co/nlp-waseda/comet-t5-base-japanese", "project_name": "comet-t5-base-japanese", "downloads": 145, "source": "Hugging Face", "score": -0.08743094485124875, "first_commit": "2022-11-12 15:07:40", "latest_commit": "2023-02-08 09:26:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Finetuned Waseda RoBERTa to evaluate the generated answers on JTruthfulQA.", "url": "https://huggingface.co/nlp-waseda/roberta_jtruthfulqa", "project_name": "roberta_jtruthfulqa", "downloads": 145, "source": "Hugging Face", "score": -0.08743094485124875, "first_commit": "2023-12-06 01:33:02", "latest_commit": "2023-12-06 04:31:12", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "RobertaForSequenceClassification" }, { "description": "JaQuAD is developed to provide a SQuAD-like QA dataset in Japanese.", "url": "https://huggingface.co/datasets/SkelterLabsInc/JaQuAD", "project_name": "JaQuAD", "downloads": 145, "source": "Hugging Face", "score": -0.08743094485124875, "first_commit": "2022-01-26 01:34:38", "latest_commit": "2022-10-25 09:06:40", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Leia-Swallow-13B LEIA is a training technique for autoregressive LLMs that effectively improves their performance in languages other than English by enhancing cross-lingual knowledge transfer from English to a target language.", "url": "https://huggingface.co/leia-llm/Leia-Swallow-13b", "project_name": "Leia-Swallow-13b", "downloads": 144, "source": "Hugging Face", "score": -0.08743995586922028, "first_commit": "2024-04-17 07:32:11", "latest_commit": "2024-04-18 05:21:10", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM" }, { "description": "mbpp-ja", "url": "https://huggingface.co/datasets/llm-jp/mbpp-ja", "project_name": "mbpp-ja", "downloads": 144, "source": "Hugging Face", "score": -0.08743995586922028, "first_commit": "2024-04-19 00:26:56", "latest_commit": "2024-04-20 06:26:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "このデータセットについて このデータは、日本の官公庁のWebサイトに掲載されている「よくある質問」を手作業で抽出し、インストラクション用のデータセットとしたものです。 ", "url": "https://huggingface.co/datasets/matsuxr/JaGovFaqs-22k", "project_name": "JaGovFaqs-22k", "downloads": 144, "source": "Hugging Face", "score": -0.08743995586922028, "first_commit": "2023-12-31 13:58:41", "latest_commit": "2024-02-29 02:51:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese InstructBLIP Alpha Model Details Japanese InstructBLIP Alpha is a vision-language instruction-following model that enables to generate Japanese descriptions for input images and optionally input texts such as questions.", "url": "https://huggingface.co/stabilityai/japanese-instructblip-alpha", "project_name": "japanese-instructblip-alpha", "downloads": 142, "source": "Hugging Face", "score": -0.08745797790516333, "first_commit": "2023-08-16 23:49:58", "latest_commit": "2023-11-17 03:57:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "JapaneseInstructBlipAlphaForConditionalGeneration" }, { "description": "JQaRA : Japanese Question Answering with Retrieval Augmentation - 検索拡張(RAG)評価のための日本語 Q&A データセット 高性能な LLM の台頭に伴い、LLM を用いた質疑応答のユースケースが増加しています。", "url": "https://huggingface.co/datasets/hotchpotch/JQaRA", "project_name": "JQaRA", "downloads": 141, "source": "Hugging Face", "score": -0.08746698892313486, "first_commit": "2024-03-03 01:58:34", "latest_commit": "2024-08-10 02:56:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-MS-7b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf", "downloads": 140, "source": "Hugging Face", "score": -0.08747599994110637, "first_commit": "2024-05-02 13:37:22", "latest_commit": "2024-05-03 04:35:34", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null }, { "description": "Umievo-itr012-Gleipnir-7B-GGUF", "url": "https://huggingface.co/QuantFactory/Umievo-itr012-Gleipnir-7B-GGUF", "project_name": "Umievo-itr012-Gleipnir-7B-GGUF", "downloads": 138, "source": "Hugging Face", "score": -0.08749402197704943, "first_commit": "2024-06-09 03:48:10", "latest_commit": "2024-06-09 13:12:32", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "HODACHI-EZO-Common-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Common-9B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Common-9B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Common-9B-gemma-2-it-gguf", "downloads": 137, "source": "Hugging Face", "score": -0.08750303299502095, "first_commit": "2024-07-15 15:42:39", "latest_commit": "2024-07-15 16:20:33", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null }, { "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive information japanese-stablelm-3b-4e1t-instruct - GGUF Model creator: stabilityai Original model: japanese-stablelm-3b-4e1t-instruct StableLM", "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf", "project_name": "stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf", "downloads": 137, "source": "Hugging Face", "score": -0.08750303299502095, "first_commit": "2023-11-16 10:25:20", "latest_commit": "2023-11-16 12:53:33", "languages": [], "model_or_dataset": "model", "model_size": 2.8, "model_architectures": null }, { "description": "This model is traned with guanaco dataset.", "url": "https://huggingface.co/ganchengguang/Yoko-7B-Japanese-v0", "project_name": "Yoko-7B-Japanese-v0", "downloads": 137, "source": "Hugging Face", "score": -0.08750303299502095, "first_commit": "2023-08-09 16:28:38", "latest_commit": "2023-08-10 13:00:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "What’s this?", "url": "https://huggingface.co/globis-university/deberta-v3-japanese-large", "project_name": "deberta-v3-japanese-large", "downloads": 136, "source": "Hugging Face", "score": -0.08751204401299248, "first_commit": "2023-09-21 16:15:15", "latest_commit": "2024-07-05 05:50:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "This repository contains some GGUF quantizations of the VNTL Gemma 2 27B model.", "url": "https://huggingface.co/lmg-anon/vntl-gemma2-27b-gguf", "project_name": "vntl-gemma2-27b-gguf", "downloads": 136, "source": "Hugging Face", "score": -0.08751204401299248, "first_commit": "2024-07-07 00:28:06", "latest_commit": "2024-07-08 16:13:54", "languages": [], "model_or_dataset": "model", "model_size": 27.2, "model_architectures": null }, { "description": "This is for (private) DEMO only.", "url": "https://huggingface.co/Bagus/wav2vec2-xlsr-japanese-speech-emotion-recognition", "project_name": "wav2vec2-xlsr-japanese-speech-emotion-recognition", "downloads": 135, "source": "Hugging Face", "score": -0.087521055030964, "first_commit": "2021-09-22 04:10:36", "latest_commit": "2023-10-19 01:31:17", "languages": [], "model_or_dataset": "model", "model_size": 0.316, "model_architectures": "HubertForSequenceClassification" }, { "description": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf ryota39さんが公開しているPhi-3-mini-4k-instruct-dpoのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ryota39-Phi-3-mini-4k-instruct-dpo-gguf", "project_name": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf", "downloads": 135, "source": "Hugging Face", "score": -0.087521055030964, "first_commit": "2024-04-29 14:27:31", "latest_commit": "2024-04-29 16:53:45", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": null }, { "description": "※llama.cpp Releases b3428(7/21)", "url": "https://huggingface.co/MCZK/EZO-Humanities-9B-gemma-2-it-GGUF", "project_name": "EZO-Humanities-9B-gemma-2-it-GGUF", "downloads": 135, "source": "Hugging Face", "score": -0.087521055030964, "first_commit": "2024-07-10 22:02:03", "latest_commit": "2024-07-21 18:11:21", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null }, { "description": "Tanuki-8B-dpo-v1.0-GPTQ-4bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0のGPTQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-GPTQ-4bit", "project_name": "Tanuki-8B-dpo-v1.0-GPTQ-4bit", "downloads": 134, "source": "Hugging Face", "score": -0.08753006604893553, "first_commit": "2024-08-27 16:17:17", "latest_commit": "2024-09-03 09:29:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "line-corporation/japanese-large-lm-1.7b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-1.7b-instruction-sftのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-instruction-sft-gguf", "project_name": "line-corp-japanese-large-lm-1.7b-instruction-sft-gguf", "downloads": 132, "source": "Hugging Face", "score": -0.08754808808487857, "first_commit": "2023-09-03 22:30:23", "latest_commit": "2024-03-24 05:54:56", "languages": [], "model_or_dataset": "model", "model_size": 1.77, "model_architectures": null }, { "description": "HODACHI様の Llama-3.1-8B-EZO-1.1-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3.1-8B-EZO-1.1-it-GGUF", "project_name": "Llama-3.1-8B-EZO-1.1-it-GGUF", "downloads": 129, "source": "Hugging Face", "score": -0.08757512113879315, "first_commit": "2024-07-31 12:12:01", "latest_commit": "2024-07-31 18:13:59", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1.", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "project_name": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "downloads": 128, "source": "Hugging Face", "score": -0.08758413215676468, "first_commit": "2024-01-28 16:13:26", "latest_commit": "2024-01-28 16:24:30", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF Model creator: MaziyarPanahi Original model: MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 Description MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1.", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "project_name": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", "downloads": 126, "source": "Hugging Face", "score": -0.08760215419270773, "first_commit": "2024-01-26 06:13:55", "latest_commit": "2024-01-26 06:36:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "line-corporation/japanese-large-lm-3.6b-instruction-sft line-corporationさんが公開しているjapanese-large-lm-3.6b-instruction-sftのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-instruction-sft-gguf", "project_name": "line-corp-japanese-large-lm-3.6b-instruction-sft-gguf", "downloads": 126, "source": "Hugging Face", "score": -0.08760215419270773, "first_commit": "2023-09-02 18:01:40", "latest_commit": "2023-09-08 02:52:29", "languages": [], "model_or_dataset": "model", "model_size": 3.71, "model_architectures": null }, { "description": "JaWiki WikipediaのHTML形式のダンプファイルから抽出したテキストデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/jawiki", "project_name": "jawiki", "downloads": 125, "source": "Hugging Face", "score": -0.08761116521067924, "first_commit": "2024-02-02 06:36:01", "latest_commit": "2024-02-13 15:19:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Meta-Llama-3-8B-Instruct-gguf meta-llamaさんが公開しているMeta-Llama-3-8B-Instructのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Meta-Llama-3-8B-Instruct-gguf", "project_name": "Meta-Llama-3-8B-Instruct-gguf", "downloads": 122, "source": "Hugging Face", "score": -0.08763819826459382, "first_commit": "2024-05-12 07:18:00", "latest_commit": "2024-05-12 08:08:38", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Llama3-ArrowSE-8B-v0.3-gguf DataPilotさんが公開しているLlama3-ArrowSE-8B-v0.3のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Llama3-ArrowSE-8B-v0.3-gguf", "project_name": "Llama3-ArrowSE-8B-v0.3-gguf", "downloads": 120, "source": "Hugging Face", "score": -0.08765622030053687, "first_commit": "2024-07-07 07:27:12", "latest_commit": "2024-07-07 09:30:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "このモデルはluke-japanese-baseをファインチューニングして、MARC-ja(positive or negativeの二値分類)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-marcja", "project_name": "luke-japanese-base-marcja", "downloads": 119, "source": "Hugging Face", "score": -0.0876652313185084, "first_commit": "2023-03-02 03:57:33", "latest_commit": "2023-07-21 14:10:48", "languages": [], "model_or_dataset": "model", "model_size": 0.279, "model_architectures": "LukeForSequenceClassification" }, { "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-QA", "project_name": "luke-japanese-base-finetuned-QA", "downloads": 117, "source": "Hugging Face", "score": -0.08768325335445144, "first_commit": "2023-01-15 23:38:30", "latest_commit": "2023-07-21 14:11:02", "languages": [], "model_or_dataset": "model", "model_size": 0.132, "model_architectures": "LukeForQuestionAnswering" }, { "description": "ChatNTQ JA 7B V1.0 Model Description", "url": "https://huggingface.co/NTQAI/chatntq-ja-7b-v1.0", "project_name": "chatntq-ja-7b-v1.0", "downloads": 117, "source": "Hugging Face", "score": -0.08768325335445144, "first_commit": "2023-12-26 06:22:59", "latest_commit": "2023-12-26 09:22:34", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "概要 NHKで定期的に放送されていた『着信御礼!", "url": "https://huggingface.co/datasets/YANS-official/ogiri-keitai", "project_name": "ogiri-keitai", "downloads": 117, "source": "Hugging Face", "score": -0.08768325335445144, "first_commit": "2024-07-20 10:11:36", "latest_commit": "2024-08-30 10:13:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-small", "project_name": "ruri-reranker-small", "downloads": 115, "source": "Hugging Face", "score": -0.08770127539039449, "first_commit": "2024-08-19 12:39:07", "latest_commit": "2024-09-04 08:50:32", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification" }, { "description": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1 上記のモデルを訓練し、アダルト用語を認識できるようにしたものです。", "url": "https://huggingface.co/swdq/Visual-novel-whisper", "project_name": "Visual-novel-whisper", "downloads": 114, "source": "Hugging Face", "score": -0.08771028640836602, "first_commit": "2024-07-24 10:09:29", "latest_commit": "2024-07-24 10:29:47", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "DataPilot様の ArrowPro-7B-RobinHood をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/ArrowPro-7B-RobinHood-GGUF", "project_name": "ArrowPro-7B-RobinHood-GGUF", "downloads": 112, "source": "Hugging Face", "score": -0.08772830844430907, "first_commit": "2024-05-10 12:03:26", "latest_commit": "2024-05-10 18:14:28", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "mathstral-7B-v0.1-gguf mistralaiさんが公開しているmathstral-7B-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/mathstral-7B-v0.1-gguf", "project_name": "mathstral-7B-v0.1-gguf", "downloads": 110, "source": "Hugging Face", "score": -0.08774633048025213, "first_commit": "2024-07-17 17:49:56", "latest_commit": "2024-07-17 18:54:27", "languages": [], "model_or_dataset": "model", "model_size": 7.25, "model_architectures": null }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stable-diffusion-xl", "project_name": "japanese-stable-diffusion-xl", "downloads": 107, "source": "Hugging Face", "score": -0.08777336353416669, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Local-Novel-LLM-project様の Assistance をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Assistance-7B-GGUF", "project_name": "Assistance-7B-GGUF", "downloads": 106, "source": "Hugging Face", "score": -0.08778237455213822, "first_commit": "2024-05-03 12:16:29", "latest_commit": "2024-05-04 07:48:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "QuantFactory/shisa-gamma-7b-v1-GGUF", "url": "https://huggingface.co/QuantFactory/shisa-gamma-7b-v1-GGUF", "project_name": "shisa-gamma-7b-v1-GGUF", "downloads": 106, "source": "Hugging Face", "score": -0.08778237455213822, "first_commit": "2024-06-12 17:16:36", "latest_commit": "2024-06-18 06:17:30", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Japanese-WizardLM2-ChatV-7B-GGUF GGUF conversion of \"Japanese-WizardLM2-ChatV-7B\" This model, Japanese-WizardLM2-ChatV-7B, is based on \"chatntq-ja-7b-v1.0 \", and was created by subtracting \"Mistral-7B-v0.1\" from \"WizardLM-2-7b\" ChatVector was added by a factor of 1.0.", "url": "https://huggingface.co/umiyuki/Japanese-WizardLM2-ChatV-7B-GGUF", "project_name": "Japanese-WizardLM2-ChatV-7B-GGUF", "downloads": 104, "source": "Hugging Face", "score": -0.08780039658808127, "first_commit": "2024-04-16 14:45:30", "latest_commit": "2024-04-17 01:41:16", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "reazonspeech-espnet-next ReazonSpeech is a project to maintain freely-available Japanese audio datasets and ML models.", "url": "https://huggingface.co/reazon-research/reazonspeech-espnet-next", "project_name": "reazonspeech-espnet-next", "downloads": 104, "source": "Hugging Face", "score": -0.08780039658808127, "first_commit": "2023-03-29 07:20:03", "latest_commit": "2023-03-29 17:28:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Umievo-itr012-Gleipnir-7B このモデルは強力な4つの日本語モデルを進化的アルゴリズムで進化的マージしたものです。", "url": "https://huggingface.co/umiyuki/Umievo-itr012-Gleipnir-7B", "project_name": "Umievo-itr012-Gleipnir-7B", "downloads": 103, "source": "Hugging Face", "score": -0.0878094076060528, "first_commit": "2024-05-29 12:32:29", "latest_commit": "2024-05-29 13:51:31", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "QuantFactory/llama-3-youko-8b-GGUF", "url": "https://huggingface.co/QuantFactory/llama-3-youko-8b-GGUF", "project_name": "llama-3-youko-8b-GGUF", "downloads": 103, "source": "Hugging Face", "score": -0.0878094076060528, "first_commit": "2024-06-24 05:04:12", "latest_commit": "2024-06-24 06:35:40", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "deberta-base-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-head", "project_name": "deberta-base-japanese-aozora-ud-head", "downloads": 102, "source": "Hugging Face", "score": -0.08781841862402431, "first_commit": "2022-06-15 04:02:27", "latest_commit": "2023-03-04 20:10:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf SakanaAIさんが公開しているEvoLLM-JP-A-v1-7Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-A-v1-7B-gguf", "project_name": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf", "downloads": 102, "source": "Hugging Face", "score": -0.08781841862402431, "first_commit": "2024-03-21 13:25:41", "latest_commit": "2024-03-21 14:48:28", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Ninja-v1-128k-gguf Local-Novel-LLM-projectさんが公開しているNinja-v1-128kのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Ninja-v1-128k-gguf", "project_name": "Ninja-v1-128k-gguf", "downloads": 102, "source": "Hugging Face", "score": -0.08781841862402431, "first_commit": "2024-05-01 17:48:06", "latest_commit": "2024-05-04 13:25:20", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Japanese Anime Speech Dataset V2 日本語はこちら japanese-anime-speech-v2 is an audio-text dataset designed for training automatic speech recognition models.", "url": "https://huggingface.co/datasets/joujiboi/japanese-anime-speech-v2", "project_name": "japanese-anime-speech-v2", "downloads": 102, "source": "Hugging Face", "score": -0.08781841862402431, "first_commit": "2024-06-26 14:18:01", "latest_commit": "2024-07-24 19:06:51", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "[Under Construction]", "url": "https://huggingface.co/datasets/bclavie/mmarco-japanese-hard-negatives", "project_name": "mmarco-japanese-hard-negatives", "downloads": 102, "source": "Hugging Face", "score": -0.08781841862402431, "first_commit": "2023-12-24 13:04:27", "latest_commit": "2023-12-24 18:52:04", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ElanMT ElanMT-BT-ja-en is a Japanese to English translation model developed by ELAN MITSUA Project / Abstract Engine.", "url": "https://huggingface.co/Mitsua/elan-mt-bt-ja-en", "project_name": "elan-mt-bt-ja-en", "downloads": 101, "source": "Hugging Face", "score": -0.08782742964199584, "first_commit": "2024-05-20 01:56:12", "latest_commit": "2024-05-20 01:56:57", "languages": [], "model_or_dataset": "model", "model_size": 0.0606, "model_architectures": "MarianMTModel" }, { "description": "pfnet-Llama3-Preferred-MedSwallow-70B-gguf pfnetさんが公開しているLlama3-Preferred-MedSwallow-70Bのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-Llama3-Preferred-MedSwallow-70B-gguf", "project_name": "pfnet-Llama3-Preferred-MedSwallow-70B-gguf", "downloads": 101, "source": "Hugging Face", "score": -0.08782742964199584, "first_commit": "2024-07-18 15:45:16", "latest_commit": "2024-07-19 09:14:38", "languages": [], "model_or_dataset": "model", "model_size": 70.6, "model_architectures": null }, { "description": "BERT for Sentiment Analysis of Japanese Twitter", "url": "https://huggingface.co/LoneWolfgang/bert-for-japanese-twitter-sentiment", "project_name": "bert-for-japanese-twitter-sentiment", "downloads": 101, "source": "Hugging Face", "score": -0.08782742964199584, "first_commit": "2024-05-13 10:19:52", "latest_commit": "2024-08-09 12:03:25", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "Swallow-8Bは追加の日本語継続事前学習により日本語が大変流暢なLlama-3派生モデルです。", "url": "https://huggingface.co/aixsatoshi/Meta-Llama-3.1-8B-Instruct-plus-Swallow", "project_name": "Meta-Llama-3.1-8B-Instruct-plus-Swallow", "downloads": 99, "source": "Hugging Face", "score": -0.08784545167793889, "first_commit": "2024-07-24 03:10:38", "latest_commit": "2024-07-24 04:03:21", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-cyberbullying", "project_name": "bert-base-japanese-sentiment-cyberbullying", "downloads": 98, "source": "Hugging Face", "score": -0.08785446269591042, "first_commit": "2022-09-09 02:16:34", "latest_commit": "2022-11-01 07:18:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000 Model description This is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000", "project_name": "jmedroberta-base-manbyo-wordpiece-vocab50000", "downloads": 96, "source": "Hugging Face", "score": -0.08787248473185347, "first_commit": "2022-12-22 17:19:15", "latest_commit": "2023-03-08 01:47:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-QAe", "project_name": "deberta-v2-base-japanese-finetuned-QAe", "downloads": 95, "source": "Hugging Face", "score": -0.087881495749825, "first_commit": "2023-01-09 11:59:13", "latest_commit": "2023-03-27 02:43:35", "languages": [], "model_or_dataset": "model", "model_size": 0.112, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "回答と回答が出てくるパラグラフを与えると質問文を生成するモデル SEE: https://github.com/sonoisa/deep-question-generation 本モデルの作成ステップ概要 SQuAD 1.1を日本語に機械翻訳し、不正なデータをクレンジング(有効なデータは約半分)。", "url": "https://huggingface.co/sonoisa/t5-base-japanese-question-generation", "project_name": "t5-base-japanese-question-generation", "downloads": 94, "source": "Hugging Face", "score": -0.08789050676779651, "first_commit": "2021-04-03 14:08:55", "latest_commit": "2022-03-11 02:50:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Cross-Encoder for Natural Language Inference(NLI) for Japanese Considering the results of the JNLI evaluation result, we recommend using akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick for natural language inference in Japanese.", "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli", "project_name": "bert-base-japanese-v3_nli-jsnli", "downloads": 93, "source": "Hugging Face", "score": -0.08789951778576804, "first_commit": "2024-04-11 05:38:09", "latest_commit": "2024-04-26 06:27:05", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "line-corporation/japanese-large-lm-1.7b line-corporationさんが公開しているjapanese-large-lm-1.7bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-gguf", "project_name": "line-corp-japanese-large-lm-1.7b-gguf", "downloads": 93, "source": "Hugging Face", "score": -0.08789951778576804, "first_commit": "2023-09-03 22:35:34", "latest_commit": "2024-03-24 05:54:30", "languages": [], "model_or_dataset": "model", "model_size": 1.77, "model_architectures": null }, { "description": "埋め込みモデルの学習、評価のためのクラスタリングデータセットです。 ", "url": "https://huggingface.co/datasets/oshizo/ASRClustering-ja", "project_name": "ASRClustering-ja", "downloads": 93, "source": "Hugging Face", "score": -0.08789951778576804, "first_commit": "2024-06-22 12:12:34", "latest_commit": "2024-06-23 15:35:03", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "This dataset was created by automatically translating \"OpenAssistant/oasst1\" into Japanese.", "url": "https://huggingface.co/datasets/kunishou/oasst1-89k-ja", "project_name": "oasst1-89k-ja", "downloads": 93, "source": "Hugging Face", "score": -0.08789951778576804, "first_commit": "2023-05-06 09:12:30", "latest_commit": "2024-04-01 17:15:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "「LLM-jp-3 172B beta1」利用規約 この利用規約(以下「本規約」といいます)は、大学共同利用機関法人 情報・システム研究機構 国立情報学研究所(以下「提供者」といいます)による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta1」(以下「本プログラム」といいます)の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta1-instruct", "project_name": "llm-jp-3-172b-beta1-instruct", "downloads": 91, "source": "Hugging Face", "score": -0.08791753982171109, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null }, { "description": "モデル概要 このモデルは、 sonoisa/sentence-luke-japanese-base-lite をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました。 ", "url": "https://huggingface.co/TomokiFujihara/luke-japanese-base-lite-offensiveness-estimation", "project_name": "luke-japanese-base-lite-offensiveness-estimation", "downloads": 91, "source": "Hugging Face", "score": -0.08791753982171109, "first_commit": "2023-12-08 03:20:14", "latest_commit": "2024-03-24 12:35:36", "languages": [], "model_or_dataset": "model", "model_size": 0.133, "model_architectures": "OffensivenessEstimationModel" }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for VecTeus-v1.0 The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 VecTeus has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-v1", "project_name": "Vecteus-v1", "downloads": 90, "source": "Hugging Face", "score": -0.08792655083968262, "first_commit": "2024-05-01 02:08:01", "latest_commit": "2024-05-04 04:07:22", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "ryota39様の Tora-7B-v0.1 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Tora-7B-v0.1-GGUF", "project_name": "Tora-7B-v0.1-GGUF", "downloads": 90, "source": "Hugging Face", "score": -0.08792655083968262, "first_commit": "2024-05-07 11:24:35", "latest_commit": "2024-06-15 03:16:21", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Genji-JP 6B Please check our blog post for more details, samples, evaluations and more: Blogpost Model Description Genji-JP 6B is a model finetuned on our Japanese storytelling dataset based on EleutherAI's GPT-J 6B model.", "url": "https://huggingface.co/NovelAI/genji-jp", "project_name": "genji-jp", "downloads": 90, "source": "Hugging Face", "score": -0.08792655083968262, "first_commit": "2021-11-03 15:07:47", "latest_commit": "2022-08-09 17:36:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM" }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Oumuamua-7b-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Oumuamua-7b-RP-GGUF", "project_name": "Oumuamua-7b-RP-GGUF", "downloads": 89, "source": "Hugging Face", "score": -0.08793556185765414, "first_commit": "2024-06-23 13:00:02", "latest_commit": "2024-06-23 14:45:14", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null }, { "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/jawiki-sentences", "project_name": "jawiki-sentences", "downloads": 89, "source": "Hugging Face", "score": -0.08793556185765414, "first_commit": "2023-06-03 03:02:08", "latest_commit": "2023-10-25 15:22:05", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "One more step before getting this model.", "url": "https://huggingface.co/rinna/japanese-stable-diffusion", "project_name": "japanese-stable-diffusion", "downloads": 87, "source": "Hugging Face", "score": -0.08795358389359718, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "c4ai-command-r-v01-japanese-instruct-GGUF 概要 Aratako/c4ai-command-r-v01-japanese-instructの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct-GGUF", "project_name": "c4ai-command-r-v01-japanese-instruct-GGUF", "downloads": 87, "source": "Hugging Face", "score": -0.08795358389359718, "first_commit": "2024-04-05 17:10:51", "latest_commit": "2024-04-07 03:19:34", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": null }, { "description": "概要 このモデルはllama3.1-8B-instructをもとに日本語性能を高めることを目的にMergekit&ファインチューニングを用いて作成されました。 ", "url": "https://huggingface.co/DataPilot/Llama3.1-ArrowSE-v0.4", "project_name": "Llama3.1-ArrowSE-v0.4", "downloads": 86, "source": "Hugging Face", "score": -0.08796259491156871, "first_commit": "2024-07-24 07:37:16", "latest_commit": "2024-07-24 12:00:46", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "pfnet-nekomata-14b-pfn-qfin-gguf pfnetさんが公開しているnekomata-14b-pfn-qfinのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-gguf", "project_name": "pfnet-nekomata-14b-pfn-qfin-gguf", "downloads": 86, "source": "Hugging Face", "score": -0.08796259491156871, "first_commit": "2024-04-24 12:58:10", "latest_commit": "2024-04-24 14:46:15", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null }, { "description": "Qwen2.5-ja-zh", "url": "https://huggingface.co/hakutaku/qwen2.5-ja-zh", "project_name": "qwen2.5-ja-zh", "downloads": 85, "source": "Hugging Face", "score": -0.08797160592954024, "first_commit": "2024-09-19 14:15:49", "latest_commit": "2024-09-20 07:45:25", "languages": [], "model_or_dataset": "model", "model_size": 7.62, "model_architectures": "Qwen2ForCausalLM" }, { "description": "Heron BLIP Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1", "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1", "downloads": 85, "source": "Hugging Face", "score": -0.08797160592954024, "first_commit": "2024-02-20 11:32:57", "latest_commit": "2024-02-27 13:57:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VideoBlipForConditionalGeneration" }, { "description": "whisper-large-v3-japanese-4k-steps This model is a fine-tuned version of openai/whisper-large-v3 on the Common Voice 16.1 dataset.", "url": "https://huggingface.co/drewschaub/whisper-large-v3-japanese-4k-steps", "project_name": "whisper-large-v3-japanese-4k-steps", "downloads": 85, "source": "Hugging Face", "score": -0.08797160592954024, "first_commit": "2024-02-17 01:01:51", "latest_commit": "2024-02-18 01:31:35", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "range3/wiki40b-ja This dataset consists of three parquet files from the wiki40b dataset with only Japanese data extracted.", "url": "https://huggingface.co/datasets/range3/wiki40b-ja", "project_name": "wiki40b-ja", "downloads": 85, "source": "Hugging Face", "score": -0.08797160592954024, "first_commit": "2023-02-04 04:54:17", "latest_commit": "2023-02-04 05:44:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressiveの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-GGUF", "project_name": "Ninja-v1-RP-expressive-GGUF", "downloads": 83, "source": "Hugging Face", "score": -0.08798962796548329, "first_commit": "2024-05-21 12:16:42", "latest_commit": "2024-05-24 15:11:25", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "[EZO model card]", "url": "https://huggingface.co/AXCXEPT/EZO-InternVL2-26B", "project_name": "EZO-InternVL2-26B", "downloads": 83, "source": "Hugging Face", "score": -0.08798962796548329, "first_commit": "2024-08-19 08:03:55", "latest_commit": "2024-08-23 10:56:47", "languages": [], "model_or_dataset": "model", "model_size": 25.5, "model_architectures": "InternVLChatModel" }, { "description": "zenz-v1 Checkpoints zenz-v1 is a language model specialized for kana-kanji conversion tasks based on the GPT-2 architecture.", "url": "https://huggingface.co/Miwa-Keita/zenz-v1-checkpoints", "project_name": "zenz-v1-checkpoints", "downloads": 83, "source": "Hugging Face", "score": -0.08798962796548329, "first_commit": "2024-06-28 14:26:33", "latest_commit": "2024-06-28 14:53:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-base-1_6b", "project_name": "japanese-stablelm-2-base-1_6b", "downloads": 83, "source": "Hugging Face", "score": -0.08798962796548329, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 1.64, "model_architectures": null }, { "description": "Aerner LM-v2 事前学習から全部日本語で学習させたモデルのバージョン2です。 ", "url": "https://huggingface.co/aerner/lm-v2", "project_name": "lm-v2", "downloads": 81, "source": "Hugging Face", "score": -0.08800765000142634, "first_commit": "2023-06-09 15:19:12", "latest_commit": "2023-06-09 16:08:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OpenLlamaForCausalLM" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-medium", "project_name": "t5-base-medium", "downloads": 81, "source": "Hugging Face", "score": -0.08800765000142634, "first_commit": "2023-04-26 08:27:09", "latest_commit": "2023-05-10 10:00:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-base Fine-tuned openai/whisper-base on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/whisper-base-japanese", "project_name": "whisper-base-japanese", "downloads": 80, "source": "Hugging Face", "score": -0.08801666101939787, "first_commit": "2023-05-17 04:36:41", "latest_commit": "2023-06-08 00:17:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Kotoba-Speech-v0.1 Kotoba-Speech v0.1 is a 1.2B Transformer-based speech generative model.", "url": "https://huggingface.co/kotoba-tech/kotoba-speech-v0.1", "project_name": "kotoba-speech-v0.1", "downloads": 80, "source": "Hugging Face", "score": -0.08801666101939787, "first_commit": "2024-03-14 01:21:58", "latest_commit": "2024-04-17 07:54:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Ninja-v1-RP-expressive-GGUF 概要 Aratako/Ninja-v1-RP-expressive-v2の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-v2-GGUF", "project_name": "Ninja-v1-RP-expressive-v2-GGUF", "downloads": 80, "source": "Hugging Face", "score": -0.08801666101939787, "first_commit": "2024-05-26 06:09:57", "latest_commit": "2024-05-26 15:22:01", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "albert-base-japanese-v1-with-japanese 日本語事前学習済みALBERTモデルですこのモデルではTokenizerにBertJapaneseTokenizerクラスを利用していますalbert-base-japanese-v1よりトークナイズ処理が楽になっています How to use ファインチューニング このモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定しています Fill-Mask for PyTorch from transformers import ( AutoModelForMaskedLM, AutoTokenizer ) tokenizer = AutoTokenizer.from_pretrained(\"ken11/albert-base-japanese-v1-with-japanese-tokenizer\")", "url": "https://huggingface.co/ken11/albert-base-japanese-v1-with-japanese-tokenizer", "project_name": "albert-base-japanese-v1-with-japanese-tokenizer", "downloads": 80, "source": "Hugging Face", "score": -0.08801666101939787, "first_commit": "2022-04-20 16:34:22", "latest_commit": "2022-04-21 02:28:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "AlbertForMaskedLM" }, { "description": "roberta-long-japanese (jumanpp + sentencepiece, mC4 Japanese)", "url": "https://huggingface.co/megagonlabs/roberta-long-japanese", "project_name": "roberta-long-japanese", "downloads": 79, "source": "Hugging Face", "score": -0.08802567203736938, "first_commit": "2022-09-04 14:31:06", "latest_commit": "2022-10-04 23:36:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf tokyotech-llmさんが公開しているSwallow-7b-instruct-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-7b-instruct-v0.1-gguf", "project_name": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf", "downloads": 79, "source": "Hugging Face", "score": -0.08802567203736938, "first_commit": "2024-05-03 04:09:27", "latest_commit": "2024-05-03 04:53:43", "languages": [], "model_or_dataset": "model", "model_size": 6.83, "model_architectures": null }, { "description": "HODACHI様の EZO-Common-T2-2B-gemma-2-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/EZO-Common-T2-2B-gemma-2-it-GGUF", "project_name": "EZO-Common-T2-2B-gemma-2-it-GGUF", "downloads": 79, "source": "Hugging Face", "score": -0.08802567203736938, "first_commit": "2024-08-01 11:38:48", "latest_commit": "2024-08-01 13:42:20", "languages": [], "model_or_dataset": "model", "model_size": 2.61, "model_architectures": null }, { "description": "Summary This is a text classifier for assigning a JLPT level.", "url": "https://huggingface.co/bennexx/cl-tohoku-bert-base-japanese-v3-jlpt-classifier", "project_name": "cl-tohoku-bert-base-japanese-v3-jlpt-classifier", "downloads": 77, "source": "Hugging Face", "score": -0.08804369407331245, "first_commit": "2024-01-19 00:32:15", "latest_commit": "2024-07-10 13:41:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Dataset Summary RealPersonaChat は,話者本人のペルソナと性格特性を含む,約14,000件の日本語雑談対話からなるコーパスです.", "url": "https://huggingface.co/datasets/nu-dialogue/real-persona-chat", "project_name": "real-persona-chat", "downloads": 77, "source": "Hugging Face", "score": -0.08804369407331245, "first_commit": "2024-03-09 22:52:22", "latest_commit": "2024-03-13 10:26:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese E5 Mixtral 7B Slerp GGUF GGUF conversion of oshizo/japanese-e5-mistral-7b_slerp Avaiable formats: Q2_K.gguf Q3_K.gguf Q4_K.gguf Q5_K.gguf", "url": "https://huggingface.co/mm/japanese-e5-mistral-7b_slerp_gguf", "project_name": "japanese-e5-mistral-7b_slerp_gguf", "downloads": 76, "source": "Hugging Face", "score": -0.08805270509128396, "first_commit": "2024-06-09 08:34:37", "latest_commit": "2024-06-14 16:12:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "AIBunCho/japanese-novel-gpt-j-6b AI BunChoで利用しているモデルです。", "url": "https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b", "project_name": "japanese-novel-gpt-j-6b", "downloads": 75, "source": "Hugging Face", "score": -0.08806171610925549, "first_commit": "2023-08-11 00:52:32", "latest_commit": "2023-08-26 04:20:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM" }, { "description": "Model Card for Japanese character-level GPT-2 Large Model description", "url": "https://huggingface.co/ku-nlp/gpt2-large-japanese-char", "project_name": "gpt2-large-japanese-char", "downloads": 75, "source": "Hugging Face", "score": -0.08806171610925549, "first_commit": "2023-12-27 11:18:45", "latest_commit": "2023-12-27 12:07:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "roberta-base-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-char-luw-upos", "project_name": "roberta-base-japanese-char-luw-upos", "downloads": 75, "source": "Hugging Face", "score": -0.08806171610925549, "first_commit": "2021-12-28 05:01:56", "latest_commit": "2024-08-20 18:21:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "このデータセットはkunishou氏が公開している\"databricks-dolly-15k\"を日本語訳したkunishou/databricks-dolly-15k-jaデータセットの語尾をArrowPro-7B-KUJIRAを用いて「にゃん!", "url": "https://huggingface.co/datasets/DataPilot/databricks-dolly-15k-Nyan-ja", "project_name": "databricks-dolly-15k-Nyan-ja", "downloads": 75, "source": "Hugging Face", "score": -0.08806171610925549, "first_commit": "2024-05-18 13:03:25", "latest_commit": "2024-05-19 10:24:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "This is a BERT Base model for emotion analysis in Japanese additionally fine-tuned for emotion detection and classification.", "url": "https://huggingface.co/alter-wang/bert-base-japanese-emotion-lily", "project_name": "bert-base-japanese-emotion-lily", "downloads": 74, "source": "Hugging Face", "score": -0.08807072712722701, "first_commit": "2024-04-25 06:05:51", "latest_commit": "2024-06-17 01:44:16", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/aio-retriever", "project_name": "aio-retriever", "downloads": 72, "source": "Hugging Face", "score": -0.08808874916317007, "first_commit": "2023-07-04 04:53:47", "latest_commit": "2023-10-25 15:31:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "DataPilot様の ArrowPro-7B-KUJIRA をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/ArrowPro-7B-KUJIRA-GGUF", "project_name": "ArrowPro-7B-KUJIRA-GGUF", "downloads": 71, "source": "Hugging Face", "score": -0.08809776018114159, "first_commit": "2024-05-09 13:34:05", "latest_commit": "2024-05-09 23:32:52", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k elyzaさんが公開しているELYZA-japanese-CodeLlama-7b-instructを 日本語のキャリブレーションセットで生成したGPTQモデルになります。", "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k", "project_name": "ELYZA-japanese-CodeLlama-7b-instruct-GPTQ-calib-ja-1k", "downloads": 71, "source": "Hugging Face", "score": -0.08809776018114159, "first_commit": "2023-11-15 16:33:25", "latest_commit": "2023-11-16 14:28:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "HODACHI-EZO-Humanities-9B-gemma-2-it-gguf HODACHIさんが公開しているEZO-Humanities-9B-gemma-2-itのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/HODACHI-EZO-Humanities-9B-gemma-2-it-gguf", "project_name": "HODACHI-EZO-Humanities-9B-gemma-2-it-gguf", "downloads": 70, "source": "Hugging Face", "score": -0.08810677119911312, "first_commit": "2024-07-15 15:43:00", "latest_commit": "2024-07-15 17:01:09", "languages": [], "model_or_dataset": "model", "model_size": 9.24, "model_architectures": null }, { "description": "RoBERTa base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", "url": "https://huggingface.co/ybelkada/japanese-roberta-question-answering", "project_name": "japanese-roberta-question-answering", "downloads": 70, "source": "Hugging Face", "score": -0.08810677119911312, "first_commit": "2022-04-08 08:52:22", "latest_commit": "2022-04-08 11:38:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering" }, { "description": "LLM-jp Toxicity Dataset 日本語有害文書データセット「LLM-jp Toxicity Dataset」 See https://gitlab.llm-jp.nii.ac.jp/datasets/llm-jp-toxicity-dataset", "url": "https://huggingface.co/datasets/p1atdev/LLM-jp-Toxicity-Dataset", "project_name": "LLM-jp-Toxicity-Dataset", "downloads": 69, "source": "Hugging Face", "score": -0.08811578221708463, "first_commit": "2024-08-07 07:11:08", "latest_commit": "2024-08-07 07:21:07", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "GGUF conversion of NTQAI/chatntq-ja-7b-v1.0 ChatNTQ-JA-7b-v1.0 is a Japanese chat fine-tuned model built on top of the stabilityai/japanese-stablelm-base-gamma-7b, which is originally based on Mistral 7B v0.1.", "url": "https://huggingface.co/TFMC/ChatNTQ-JA-7b-v1.0-GGUF", "project_name": "ChatNTQ-JA-7b-v1.0-GGUF", "downloads": 68, "source": "Hugging Face", "score": -0.08812479323505616, "first_commit": "2024-04-03 22:42:14", "latest_commit": "2024-04-04 23:10:54", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "bert-japanese-ner このモデルは日本語の固有表現抽出タスクを目的として、京都大学 黒橋・褚・村脇研究室が公開しているBERT日本語Pretrainedモデルをベースにストックマーク株式会社が公開しているner-wikipedia-datasetでファインチューニングしたものです。 ", "url": "https://huggingface.co/ken11/bert-japanese-ner", "project_name": "bert-japanese-ner", "downloads": 67, "source": "Hugging Face", "score": -0.08813380425302769, "first_commit": "2021-11-13 16:28:23", "latest_commit": "2021-11-14 02:34:01", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "Tanuki-8x8B-dpo-v1.0-GPTQ-8bit 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8x8B-dpo-v1.0のGPTQ 8bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8x8B-dpo-v1.0-GPTQ-8bit", "project_name": "Tanuki-8x8B-dpo-v1.0-GPTQ-8bit", "downloads": 67, "source": "Hugging Face", "score": -0.08813380425302769, "first_commit": "2024-08-28 02:30:27", "latest_commit": "2024-09-03 09:26:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TanukiForCausalLM" }, { "description": "Convert from: drewschaub/whisper-large-v3-japanese-4k-steps Whisper large-v3 model for CTranslate2 This repository contains the conversion of drewschaub/whisper-large-v3-japanese-4k-steps to the CTranslate2 model format.", "url": "https://huggingface.co/JhonVanced/whisper-large-v3-japanese-4k-steps-ct2", "project_name": "whisper-large-v3-japanese-4k-steps-ct2", "downloads": 64, "source": "Hugging Face", "score": -0.08816083730694226, "first_commit": "2024-02-20 13:41:17", "latest_commit": "2024-02-22 01:11:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4", "project_name": "Orion-14B-Chat-Int4", "downloads": 64, "source": "Hugging Face", "score": -0.08816083730694226, "first_commit": "2024-01-18 09:54:07", "latest_commit": "2024-03-26 10:04:46", "languages": [], "model_or_dataset": "model", "model_size": 2.69, "model_architectures": "OrionForCausalLM" }, { "description": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf alfredplplさんが公開しているLlama-3-8B-Instruct-Jaのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/alfredplpl-Llama-3-8B-Instruct-Ja-gguf", "project_name": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf", "downloads": 63, "source": "Hugging Face", "score": -0.08816984832491379, "first_commit": "2024-04-23 14:18:57", "latest_commit": "2024-04-23 15:24:47", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Model Card for Model ID Fine tunned ASR model from distil-whisper/distil-large-v2.", "url": "https://huggingface.co/spow12/Visual-novel-transcriptor", "project_name": "Visual-novel-transcriptor", "downloads": 63, "source": "Hugging Face", "score": -0.08816984832491379, "first_commit": "2024-04-15 01:43:08", "latest_commit": "2024-08-12 12:39:52", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "更新履歴 2023年5月7日 「oasst1-89k-ja」データセットを追加して対話システムに対応しました。", "url": "https://huggingface.co/inu-ai/dolly-japanese-gpt-1b", "project_name": "dolly-japanese-gpt-1b", "downloads": 62, "source": "Hugging Face", "score": -0.08817885934288532, "first_commit": "2023-04-13 22:46:07", "latest_commit": "2023-08-01 07:55:27", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel" }, { "description": "luke-japanese-base-lite-xlm-roberta studio-ousia/luke-japanese-base-liteの重みの名前をXLMRoberta形式に置き換え、XLMRobertaモデルとして扱えるようにした物です。 ", "url": "https://huggingface.co/hotchpotch/luke-japanese-base-lite-xlm-roberta", "project_name": "luke-japanese-base-lite-xlm-roberta", "downloads": 62, "source": "Hugging Face", "score": -0.08817885934288532, "first_commit": "2024-09-09 18:18:38", "latest_commit": "2024-09-09 18:33:44", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "XLMRobertaForMaskedLM" }, { "description": "BERT base Japanese model This repository contains a BERT base model trained on Japanese Wikipedia dataset.", "url": "https://huggingface.co/colorfulscoop/bert-base-ja", "project_name": "bert-base-ja", "downloads": 61, "source": "Hugging Face", "score": -0.08818787036085683, "first_commit": "2021-07-30 10:11:35", "latest_commit": "2021-09-23 15:46:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForPreTraining" }, { "description": "QuantFactory/Llama3-ArrowSE-8B-v0.3-GGUF This is quantized version of DataPilot/Llama3-ArrowSE-8B-v0.3 created using llama.cpp Original Model Card 概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", "url": "https://huggingface.co/QuantFactory/Llama3-ArrowSE-8B-v0.3-GGUF", "project_name": "Llama3-ArrowSE-8B-v0.3-GGUF", "downloads": 58, "source": "Hugging Face", "score": -0.08821490341477141, "first_commit": "2024-07-28 15:51:47", "latest_commit": "2024-07-28 16:29:51", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Japanese stopwords for nagisa", "url": "https://huggingface.co/datasets/taishi-i/nagisa_stopwords", "project_name": "nagisa_stopwords", "downloads": 58, "source": "Hugging Face", "score": -0.08821490341477141, "first_commit": "2023-08-06 17:10:10", "latest_commit": "2023-08-07 02:58:31", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "bert-large-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-unidic-luw-upos", "project_name": "bert-large-japanese-unidic-luw-upos", "downloads": 56, "source": "Hugging Face", "score": -0.08823292545071446, "first_commit": "2022-02-13 01:00:41", "latest_commit": "2023-11-05 18:44:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "c4ai-command-r-v01-japanese-instruct GGUF版はこちら/Click here for the GGUF version 概要 CohereForAI/c4ai-command-r-v01を、ichikara-instructionを使って追加で日本語インストラクションチューニングを施したモデルです。 ", "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct", "project_name": "c4ai-command-r-v01-japanese-instruct", "downloads": 56, "source": "Hugging Face", "score": -0.08823292545071446, "first_commit": "2024-04-04 03:56:52", "latest_commit": "2024-04-07 15:18:37", "languages": [], "model_or_dataset": "model", "model_size": 35.0, "model_architectures": "CohereForCausalLM" }, { "description": "To load a language pair which isn't part of the config, all you need to do is specify the language code as pairs.", "url": "https://huggingface.co/datasets/Helsinki-NLP/tatoeba", "project_name": "tatoeba", "downloads": 56, "source": "Hugging Face", "score": -0.08823292545071446, "first_commit": "2022-01-25 16:36:30", "latest_commit": "2024-01-18 11:16:48", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "electra-base-cyberbullying This is an ELECTRA Small model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/electra-small-japanese-discriminator-cyberbullying", "project_name": "electra-small-japanese-discriminator-cyberbullying", "downloads": 55, "source": "Hugging Face", "score": -0.08824193646868599, "first_commit": "2022-09-09 02:43:59", "latest_commit": "2022-11-01 07:14:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification" }, { "description": "DataPilot様の Llama3-ArrowSE-8B-v0.3 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama3-ArrowSE-8B-v0.3-GGUF", "project_name": "Llama3-ArrowSE-8B-v0.3-GGUF", "downloads": 55, "source": "Hugging Face", "score": -0.08824193646868599, "first_commit": "2024-07-07 07:53:32", "latest_commit": "2024-07-07 13:40:26", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "This is a Japanese+English sentence-BERT model.", "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-en-mean-tokens", "project_name": "sentence-bert-base-ja-en-mean-tokens", "downloads": 54, "source": "Hugging Face", "score": -0.08825094748665752, "first_commit": "2022-05-08 03:05:08", "latest_commit": "2022-05-08 03:29:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "BERT base Japanese - JaQuAD Description A Japanese Question Answering model fine-tuned on JaQuAD.", "url": "https://huggingface.co/SkelterLabsInc/bert-base-japanese-jaquad", "project_name": "bert-base-japanese-jaquad", "downloads": 54, "source": "Hugging Face", "score": -0.08825094748665752, "first_commit": "2022-01-27 08:08:53", "latest_commit": "2022-02-04 02:39:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering" }, { "description": "Japanese Anime Speech Dataset 日本語はこちら japanese-anime-speech is an audio-text dataset designed for the training of automatic speech recognition models.", "url": "https://huggingface.co/datasets/joujiboi/japanese-anime-speech", "project_name": "japanese-anime-speech", "downloads": 54, "source": "Hugging Face", "score": -0.08825094748665752, "first_commit": "2023-11-07 13:53:40", "latest_commit": "2024-06-30 10:06:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "rinna/japanese-data2vec-audio-base Overview This is a Japanese data2vec Audio Base model trained by rinna Co.", "url": "https://huggingface.co/rinna/japanese-data2vec-audio-base", "project_name": "japanese-data2vec-audio-base", "downloads": 52, "source": "Hugging Face", "score": -0.08826896952260056, "first_commit": "2024-03-05 10:32:32", "latest_commit": "2024-07-22 08:12:56", "languages": [], "model_or_dataset": "model", "model_size": 0.0932, "model_architectures": "Data2VecAudioModel" }, { "description": "Fine-tuned XLSR-53 large model for speech diarization in Japanese phone-call 2 speakers diarization model which was fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using phone-call data CallHome.", "url": "https://huggingface.co/Ivydata/wav2vec2-large-speech-diarization-jp", "project_name": "wav2vec2-large-speech-diarization-jp", "downloads": 52, "source": "Hugging Face", "score": -0.08826896952260056, "first_commit": "2023-05-08 10:10:43", "latest_commit": "2023-05-10 00:32:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForAudioFrameClassification" }, { "description": "bert-base-japanese-jsnli This model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on the JSNLI dataset.", "url": "https://huggingface.co/Formzu/bert-base-japanese-jsnli", "project_name": "bert-base-japanese-jsnli", "downloads": 52, "source": "Hugging Face", "score": -0.08826896952260056, "first_commit": "2022-10-14 07:50:13", "latest_commit": "2022-10-18 12:13:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Asian Language Treebank (ALT) Project ALT Parallel Corpusのうち、日英対訳部分のみを抽出したデータセットです。", "url": "https://huggingface.co/datasets/hpprc/alt-parallel-en-ja", "project_name": "alt-parallel-en-ja", "downloads": 52, "source": "Hugging Face", "score": -0.08826896952260056, "first_commit": "2024-03-21 02:24:27", "latest_commit": "2024-03-21 12:40:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "bert-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-luw-upos", "project_name": "bert-large-japanese-luw-upos", "downloads": 51, "source": "Hugging Face", "score": -0.08827798054057208, "first_commit": "2021-10-26 13:54:17", "latest_commit": "2022-09-18 19:43:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "ArrowPro-7B-KillerWhale-gguf DataPilotさんが公開しているArrowPro-7B-KillerWhaleのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/ArrowPro-7B-KillerWhale-gguf", "project_name": "ArrowPro-7B-KillerWhale-gguf", "downloads": 51, "source": "Hugging Face", "score": -0.08827798054057208, "first_commit": "2024-05-29 15:06:55", "latest_commit": "2024-05-29 15:53:17", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-large", "project_name": "ruri-reranker-stage1-large", "downloads": 51, "source": "Hugging Face", "score": -0.08827798054057208, "first_commit": "2024-08-19 23:48:54", "latest_commit": "2024-09-04 08:54:05", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification" }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-ELYZA-sqlcoder-2x8B-GGUF", "project_name": "Llama-3-ELYZA-sqlcoder-2x8B-GGUF", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2024-06-28 01:51:50", "latest_commit": "2024-06-28 05:56:23", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": null }, { "description": "Model Description", "url": "https://huggingface.co/knosing/japanese_ner_model", "project_name": "japanese_ner_model", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2024-05-08 06:15:37", "latest_commit": "2024-05-08 07:06:22", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForTokenClassification" }, { "description": "Introduction Who am I: Qishen Ha", "url": "https://huggingface.co/haqishen/h2o-Llama-3-8B-Japanese-Instruct", "project_name": "h2o-Llama-3-8B-Japanese-Instruct", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2024-04-24 07:48:45", "latest_commit": "2024-06-24 08:57:49", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Introduction Who am I: Qishen Ha", "url": "https://huggingface.co/haqishen/Llama-3-8B-Japanese-Instruct", "project_name": "Llama-3-8B-Japanese-Instruct", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2024-04-23 04:41:19", "latest_commit": "2024-05-02 03:36:10", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "luke-large-defamation-detection-japanese 日本語誹謗中傷検出器", "url": "https://huggingface.co/kubota/luke-large-defamation-detection-japanese", "project_name": "luke-large-defamation-detection-japanese", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2023-01-23 06:25:08", "latest_commit": "2023-02-07 15:49:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForSequenceClassification" }, { "description": "Dataset.", "url": "https://huggingface.co/datasets/hpprc/jsick", "project_name": "jsick", "downloads": 50, "source": "Hugging Face", "score": -0.08828699155854361, "first_commit": "2023-04-08 16:02:06", "latest_commit": "2023-04-11 15:18:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "GPT-Neo 1.3B pre-trained model for Japanese Model Description GPT2/GPT3 like model trained on Japanese.corpus.", "url": "https://huggingface.co/yellowback/gpt-neo-japanese-1.3B", "project_name": "gpt-neo-japanese-1.3B", "downloads": 49, "source": "Hugging Face", "score": -0.08829600257651514, "first_commit": "2021-12-09 08:09:40", "latest_commit": "2021-12-09 17:59:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/sambanovasystems/SambaLingo-Japanese-Chat", "project_name": "SambaLingo-Japanese-Chat", "downloads": 49, "source": "Hugging Face", "score": -0.08829600257651514, "first_commit": "2024-02-15 22:45:08", "latest_commit": "2024-04-16 22:32:15", "languages": [], "model_or_dataset": "model", "model_size": 6.95, "model_architectures": "LlamaForCausalLM" }, { "description": "ryota39様の Tora-7B-v0.2 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Tora-7B-v0.2-GGUF", "project_name": "Tora-7B-v0.2-GGUF", "downloads": 49, "source": "Hugging Face", "score": -0.08829600257651514, "first_commit": "2024-05-06 12:50:49", "latest_commit": "2024-06-15 03:17:32", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "bert-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-luw-upos", "project_name": "bert-base-japanese-luw-upos", "downloads": 48, "source": "Hugging Face", "score": -0.08830501359448666, "first_commit": "2021-10-26 13:26:38", "latest_commit": "2022-09-18 19:43:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "Japanese-LLaMA-2-13B-GGUF Japanese-LLaMA-2-13B-GGUFはJapanese-LLaMA-2-13BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-13b-gguf", "project_name": "japanese-llama-2-13b-gguf", "downloads": 48, "source": "Hugging Face", "score": -0.08830501359448666, "first_commit": "2023-12-20 05:37:09", "latest_commit": "2023-12-26 11:45:15", "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": null }, { "description": "Model Card for Model ID", "url": "https://huggingface.co/Respair/Japanese_Phoneme_to_Grapheme_LLM", "project_name": "Japanese_Phoneme_to_Grapheme_LLM", "downloads": 48, "source": "Hugging Face", "score": -0.08830501359448666, "first_commit": "2024-09-06 23:01:09", "latest_commit": "2024-09-09 23:16:12", "languages": [], "model_or_dataset": "model", "model_size": 3.09, "model_architectures": "Qwen2Model" }, { "description": "Fugaku-LLM利用規約 この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B", "project_name": "Fugaku-LLM-13B", "downloads": 48, "source": "Hugging Face", "score": -0.08830501359448666, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "BERT large Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char", "project_name": "bert-large-japanese-char", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2021-03-05 06:36:24", "latest_commit": "2021-09-23 15:45:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "deberta-large-japanese-unidic-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-ud-head", "project_name": "deberta-large-japanese-unidic-ud-head", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2022-06-19 00:10:56", "latest_commit": "2023-11-05 17:51:08", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "Local-Novel-LLM-project様の Vecteus-V2-7B をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Vecteus-V2-7B-GGUF", "project_name": "Vecteus-V2-7B-GGUF", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2024-06-16 05:26:00", "latest_commit": "2024-06-16 11:32:15", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Japanese-TextGen-Kage-v0.1-2x7B Kage is \"影\" in Japanese or \"Shadow\" in English.", "url": "https://huggingface.co/dddump/Japanese-TextGen-Kage-v0.1-2x7B-gguf", "project_name": "Japanese-TextGen-Kage-v0.1-2x7B-gguf", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2024-05-04 07:03:38", "latest_commit": "2024-05-19 08:54:19", "languages": [], "model_or_dataset": "model", "model_size": 12.9, "model_architectures": null }, { "description": "karasu-lora-jp-qa-chat karasu fine tuned model by lora method with the original Q&A dataset.", "url": "https://huggingface.co/aipib/karasu-lora-jp-qa-chat", "project_name": "karasu-lora-jp-qa-chat", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2024-04-24 02:26:58", "latest_commit": "2024-06-03 01:02:33", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM" }, { "description": "The English document is here. ", "url": "https://huggingface.co/watashiha/Watashiha-Llama-2-13B-Ogiri-sft", "project_name": "Watashiha-Llama-2-13B-Ogiri-sft", "downloads": 47, "source": "Hugging Face", "score": -0.08831402461245819, "first_commit": "2024-01-19 06:59:08", "latest_commit": "2024-03-04 05:24:31", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": "LlamaForCausalLM" }, { "description": "Oumuamua-7b-instruct-GGUF This is quantized version of nitky/Oumuamua-7b-instruct created using llama.cpp Model Description This is a merge of pre-trained language models created using mergekit. ", "url": "https://huggingface.co/QuantFactory/Oumuamua-7b-instruct-GGUF", "project_name": "Oumuamua-7b-instruct-GGUF", "downloads": 46, "source": "Hugging Face", "score": -0.0883230356304297, "first_commit": "2024-06-19 08:52:12", "latest_commit": "2024-06-19 11:40:58", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": null }, { "description": "つくよみちゃんデータセットを用いて calm-2-7b-chat をファインチューニングしたモデルです。", "url": "https://huggingface.co/offtoung/tsukuyomi-chan-calm2-7b", "project_name": "tsukuyomi-chan-calm2-7b", "downloads": 46, "source": "Hugging Face", "score": -0.0883230356304297, "first_commit": "2023-12-21 08:46:37", "latest_commit": "2023-12-27 04:07:20", "languages": [], "model_or_dataset": "model", "model_size": 7.01, "model_architectures": "LlamaForCausalLM" }, { "description": "sonoisa/t5-base-japaneseをファインチューニングして、タイトル生成に用いれるようにしたモデルです。 ", "url": "https://huggingface.co/Mizuiro-sakura/t5-CAMERA-title-generation", "project_name": "t5-CAMERA-title-generation", "downloads": 45, "source": "Hugging Face", "score": -0.08833204664840123, "first_commit": "2023-03-21 10:49:27", "latest_commit": "2023-07-21 14:11:13", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.2 Model Details: Built with Meta Llama 3", "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.2", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.2", "downloads": 44, "source": "Hugging Face", "score": -0.08834105766637276, "first_commit": "2024-05-04 04:16:35", "latest_commit": "2024-05-15 12:56:34", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Barba Barba is a multilingual natural language inference model for textual entailment and zero-shot text classification, available as an end-to-end service through TensorFlow Serving.", "url": "https://huggingface.co/hyperonym/barba", "project_name": "barba", "downloads": 44, "source": "Hugging Face", "score": -0.08834105766637276, "first_commit": "2023-04-29 06:27:12", "latest_commit": "2023-04-29 13:45:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMRobertaForSequenceClassification" }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/swallow-3-8B-sqlcoder-2x8B-GGUF", "project_name": "swallow-3-8B-sqlcoder-2x8B-GGUF", "downloads": 43, "source": "Hugging Face", "score": -0.08835006868434428, "first_commit": "2024-07-03 11:02:45", "latest_commit": "2024-07-04 07:20:41", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": null }, { "description": "英語+日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on English and Japanese balanced corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-english-japanese", "project_name": "t5-base-english-japanese", "downloads": 43, "source": "Hugging Face", "score": -0.08835006868434428, "first_commit": "2022-07-28 11:31:28", "latest_commit": "2022-08-27 09:07:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-base", "project_name": "ruri-pt-base", "downloads": 42, "source": "Hugging Face", "score": -0.08835907970231581, "first_commit": "2024-08-17 10:38:19", "latest_commit": "2024-09-13 01:38:07", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertModel" }, { "description": "deberta-large-japanese-wikipedia-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-luw-upos", "project_name": "deberta-large-japanese-wikipedia-luw-upos", "downloads": 42, "source": "Hugging Face", "score": -0.08835907970231581, "first_commit": "2022-07-06 03:15:12", "latest_commit": "2024-08-20 17:54:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/sociocom/MedNERN-CR-JA", "project_name": "MedNERN-CR-JA", "downloads": 41, "source": "Hugging Face", "score": -0.08836809072028733, "first_commit": "2023-04-13 08:25:56", "latest_commit": "2024-02-26 13:53:06", "languages": [], "model_or_dataset": "model", "model_size": 0.11, "model_architectures": "BertForTokenClassification" }, { "description": "bert-base-japanese-v3-jcommonsenseqa 「大規模言語モデル入門」の第5章で紹介している(多肢選択式質問応答)のモデルです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jcommonsenseqa", "project_name": "bert-base-japanese-v3-jcommonsenseqa", "downloads": 41, "source": "Hugging Face", "score": -0.08836809072028733, "first_commit": "2023-06-20 07:01:53", "latest_commit": "2023-07-24 06:49:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMultipleChoice" }, { "description": "Deepreneur-blue-lizard-gguf Deepreneurさんが公開しているblue-lizardのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Deepreneur-blue-lizard-gguf", "project_name": "Deepreneur-blue-lizard-gguf", "downloads": 41, "source": "Hugging Face", "score": -0.08836809072028733, "first_commit": "2024-02-13 15:18:15", "latest_commit": "2024-02-13 16:26:26", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": null }, { "description": "Japanese-Alpaca-2-13B-GGUF Japanese-Alpaca-2-13B-GGUFはJapanese-Alpaca-2-13BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-alpaca-2-13b-gguf", "project_name": "japanese-alpaca-2-13b-gguf", "downloads": 40, "source": "Hugging Face", "score": -0.08837710173825886, "first_commit": "2023-12-20 10:56:08", "latest_commit": "2023-12-26 11:46:41", "languages": [], "model_or_dataset": "model", "model_size": 13.3, "model_architectures": null }, { "description": "umiyuki様の Japanese-Chat-Umievo-itr004-7b をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Japanese-Chat-Umievo-itr004-7b-GGUF", "project_name": "Japanese-Chat-Umievo-itr004-7b-GGUF", "downloads": 40, "source": "Hugging Face", "score": -0.08837710173825886, "first_commit": "2024-05-13 16:28:41", "latest_commit": "2024-05-13 23:33:49", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Kurage Kurage is a multipurpose RAG model from Lightblue.", "url": "https://huggingface.co/lightblue/kurage-ja", "project_name": "kurage-ja", "downloads": 40, "source": "Hugging Face", "score": -0.08837710173825886, "first_commit": "2024-09-11 03:39:10", "latest_commit": "2024-09-16 08:12:19", "languages": [], "model_or_dataset": "model", "model_size": 7.61, "model_architectures": "Qwen2ForCausalLM" }, { "description": "This dataset is the data that corrected the translation errors and untranslated data of the Japanese data in MBZUAI/multilingual-llava-bench-in-the-wild.", "url": "https://huggingface.co/datasets/toshi456/llava-bench-in-the-wild-ja", "project_name": "llava-bench-in-the-wild-ja", "downloads": 40, "source": "Hugging Face", "score": -0.08837710173825886, "first_commit": "2024-03-06 21:56:53", "latest_commit": "2024-04-01 15:15:57", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "以下のデータ源からランダムに抽出した日本語のテキストをもとに、RAG形式のQ&Aを自動生成したものです。 Wikibooks Wikipedia 判例データ instruction datasetとしてではなく、事前学習での利用を想定しています(質疑応答をするための訓練)。 一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました。", "url": "https://huggingface.co/datasets/kanhatakeyama/CreativeCommons-RAG-QA-Mixtral8x22b", "project_name": "CreativeCommons-RAG-QA-Mixtral8x22b", "downloads": 40, "source": "Hugging Face", "score": -0.08837710173825886, "first_commit": "2024-07-03 07:54:49", "latest_commit": "2024-07-12 06:43:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "bert-base-japanese-char-extended Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-char-extended", "project_name": "bert-base-japanese-char-extended", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2021-08-26 22:44:12", "latest_commit": "2022-06-21 07:21:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "nlp-waseda/gpt2-small-japanese This model is Japanese GPT-2 pretrained on Japanese Wikipedia and CC-100.", "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese", "project_name": "gpt2-small-japanese", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2022-03-30 03:34:11", "latest_commit": "2022-03-30 04:28:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "deberta-base-japanese-unidic-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-ud-head", "project_name": "deberta-base-japanese-unidic-ud-head", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2022-06-18 10:20:24", "latest_commit": "2024-08-20 20:09:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "rinna/nekomata-7b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-7b-instruction.", "url": "https://huggingface.co/rinna/nekomata-7b-instruction-gguf", "project_name": "nekomata-7b-instruction-gguf", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2023-12-19 08:11:08", "latest_commit": "2024-07-20 08:38:34", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": null }, { "description": "Original Model Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/TareHimself/manga-ocr-base", "project_name": "manga-ocr-base", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2023-09-14 04:15:52", "latest_commit": "2024-06-03 05:10:11", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "VisionEncoderDecoderModel" }, { "description": "bart-base-japanese-news(base-sized model)", "url": "https://huggingface.co/stockmark/bart-base-japanese-news", "project_name": "bart-base-japanese-news", "downloads": 39, "source": "Hugging Face", "score": -0.08838611275623039, "first_commit": "2023-01-20 04:23:07", "latest_commit": "2023-12-08 03:39:50", "languages": [], "model_or_dataset": "model", "model_size": 0.125, "model_architectures": "BartForConditionalGeneration" }, { "description": "Model trained on 800,000 Japanese sentences after reducing oshizo/japanese-e5-mistral-7b_slerp to 8 layers.", "url": "https://huggingface.co/oshizo/japanese-e5-mistral-1.9b", "project_name": "japanese-e5-mistral-1.9b", "downloads": 38, "source": "Hugging Face", "score": -0.0883951237742019, "first_commit": "2024-02-02 12:39:11", "latest_commit": "2024-02-03 00:28:28", "languages": [], "model_or_dataset": "model", "model_size": 1.88, "model_architectures": "MistralForEmbedding" }, { "description": "japanese-sentiment-analysis This model is the work of jarvisx17 and was trained from scratch on the chABSA dataset.", "url": "https://huggingface.co/RPAmodels/PN-analysis", "project_name": "PN-analysis", "downloads": 38, "source": "Hugging Face", "score": -0.0883951237742019, "first_commit": "2022-11-15 06:28:39", "latest_commit": "2024-09-27 05:20:33", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "roberta-small-japanese-aozora-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-aozora-char", "project_name": "roberta-small-japanese-aozora-char", "downloads": 38, "source": "Hugging Face", "score": -0.0883951237742019, "first_commit": "2021-12-23 02:38:26", "latest_commit": "2021-12-23 11:55:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "bert-base-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-unidic-luw-upos", "project_name": "bert-base-japanese-unidic-luw-upos", "downloads": 37, "source": "Hugging Face", "score": -0.08840413479217343, "first_commit": "2022-02-13 01:00:01", "latest_commit": "2023-11-05 18:44:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "ltgbert-base-japanese-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/ltgbert-base-japanese-ud-goeswith", "project_name": "ltgbert-base-japanese-ud-goeswith", "downloads": 37, "source": "Hugging Face", "score": -0.08840413479217343, "first_commit": "2024-09-13 16:29:53", "latest_commit": "2024-09-14 07:34:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LtgbertForTokenClassification" }, { "description": "electra-base-cyberbullying This is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-cyberbullying", "project_name": "bert-base-japanese-basic-char-v2-cyberbullying", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2022-09-08 09:09:39", "latest_commit": "2022-11-01 07:20:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "rinna/nekomata-7b-gguf Overview The model is the GGUF version of rinna/nekomata-7b.", "url": "https://huggingface.co/rinna/nekomata-7b-gguf", "project_name": "nekomata-7b-gguf", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2023-12-19 08:10:42", "latest_commit": "2024-07-20 08:36:15", "languages": [], "model_or_dataset": "model", "model_size": 7.72, "model_architectures": null }, { "description": "TinySlime-1.1B-Chat-v1.0 TinySlime は日本語に特化した小規模言語モデルです。 ", "url": "https://huggingface.co/2121-8/TinySlime-1.1B-Chat-v1.0", "project_name": "TinySlime-1.1B-Chat-v1.0", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2024-07-02 03:34:30", "latest_commit": "2024-07-02 08:53:11", "languages": [], "model_or_dataset": "model", "model_size": 1.1, "model_architectures": "LlamaForCausalLM" }, { "description": "rinna/nekomata-14b-instruction-gguf Overview The model is the GGUF version of rinna/nekomata-14b-instruction.", "url": "https://huggingface.co/rinna/nekomata-14b-instruction-gguf", "project_name": "nekomata-14b-instruction-gguf", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2023-12-19 08:12:06", "latest_commit": "2024-07-20 08:34:05", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null }, { "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-small Fine-tuned openai/whisper-small on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/whisper-small-japanese", "project_name": "whisper-small-japanese", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2023-05-19 10:42:27", "latest_commit": "2023-05-19 10:50:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "roberta-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-luw-upos", "project_name": "roberta-large-japanese-luw-upos", "downloads": 36, "source": "Hugging Face", "score": -0.08841314581014495, "first_commit": "2021-12-26 13:51:46", "latest_commit": "2024-08-20 18:34:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "bert-large-japanese-char-extended Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-char-extended", "project_name": "bert-large-japanese-char-extended", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2021-06-04 13:29:34", "latest_commit": "2024-08-20 17:45:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-xl", "project_name": "t5-xl", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2023-04-26 07:19:08", "latest_commit": "2023-05-10 10:01:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Japanese-LLaMA-2-7B-GGUF Japanese-LLaMA-2-7B-GGUFはJapanese-LLaMA-2-7BのGGUF形式です。 ", "url": "https://huggingface.co/owner203/japanese-llama-2-7b-gguf", "project_name": "japanese-llama-2-7b-gguf", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2024-01-22 03:00:02", "latest_commit": "2024-06-05 02:30:01", "languages": [], "model_or_dataset": "model", "model_size": 6.97, "model_architectures": null }, { "description": "Heron GIT Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-git-ja-stablelm-base-7b-v1", "project_name": "heron-chat-git-ja-stablelm-base-7b-v1", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2024-03-29 09:09:32", "latest_commit": "2024-05-02 07:55:57", "languages": [], "model_or_dataset": "model", "model_size": 7.32, "model_architectures": "GitJapaneseStableLMAlphaForCausalLM" }, { "description": "Hibiki ASR Phonemizer This model is a Phoneme Level Speech Recognition network, originally a fine-tuned version of openai/whisper-large-v3 on a mixture of Different Japanese datasets.", "url": "https://huggingface.co/Respair/Hibiki_ASR_Phonemizer_v0.2", "project_name": "Hibiki_ASR_Phonemizer_v0.2", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2024-08-12 01:30:08", "latest_commit": "2024-08-19 18:13:01", "languages": [], "model_or_dataset": "model", "model_size": 1.54, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "deberta-large-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-juman-ud-goeswith", "project_name": "deberta-large-japanese-juman-ud-goeswith", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2023-02-05 13:24:47", "latest_commit": "2024-08-30 14:27:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-base-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-juman-ud-goeswith", "project_name": "deberta-base-japanese-juman-ud-goeswith", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2023-02-05 06:48:32", "latest_commit": "2023-05-12 01:16:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-large-japanese-wikipedia-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-ud-goeswith", "project_name": "deberta-large-japanese-wikipedia-ud-goeswith", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2022-09-18 08:41:06", "latest_commit": "2023-05-12 01:29:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-large-japanese-wikipedia Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia", "project_name": "deberta-large-japanese-wikipedia", "downloads": 35, "source": "Hugging Face", "score": -0.08842215682811648, "first_commit": "2022-07-05 22:01:16", "latest_commit": "2023-02-27 10:15:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "roberta-base-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora", "project_name": "roberta-base-japanese-aozora", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2021-12-21 00:04:03", "latest_commit": "2022-10-15 14:20:11", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "roberta-large-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-juman-ud-goeswith", "project_name": "roberta-large-japanese-juman-ud-goeswith", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2023-02-21 06:38:32", "latest_commit": "2024-08-30 14:49:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "yacis-electra-small-cyberbullying", "url": "https://huggingface.co/ptaszynski/yacis-electra-small-japanese-cyberbullying", "project_name": "yacis-electra-small-japanese-cyberbullying", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2022-01-12 03:57:13", "latest_commit": "2022-01-16 13:51:28", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForSequenceClassification" }, { "description": "BERT small Japanese finance This is a BERT model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/bert-small-japanese-fin", "project_name": "bert-small-japanese-fin", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2021-10-04 13:15:37", "latest_commit": "2022-12-09 00:41:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "[github].", "url": "https://huggingface.co/datasets/fujiki/japanese_alpaca_data", "project_name": "japanese_alpaca_data", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2023-05-18 07:13:15", "latest_commit": "2023-05-19 12:54:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "JP Voice-Text Dataset for", "url": "https://huggingface.co/datasets/deepghs/fgo_voices_jp", "project_name": "fgo_voices_jp", "downloads": 34, "source": "Hugging Face", "score": -0.08843116784608801, "first_commit": "2024-08-28 08:56:04", "latest_commit": "2024-08-28 09:14:22", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "transformers-ud-japanese-electra-ginza (sudachitra-wordpiece, mC4 Japanese) -", "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-discriminator", "project_name": "transformers-ud-japanese-electra-base-discriminator", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2021-08-23 09:54:09", "latest_commit": "2021-09-22 11:00:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining" }, { "description": "deberta-large-japanese-unidic Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic", "project_name": "deberta-large-japanese-unidic", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2022-06-10 12:49:12", "latest_commit": "2022-06-19 09:15:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "Japanese BERT-base (Juman++ + BPE) How to load the tokenizer Please download the dictionary file for Juman++ + BPE from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_jumanpp-bpe", "project_name": "bert-base-japanese_jumanpp-bpe", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2023-06-14 07:01:02", "latest_commit": "2023-06-16 01:02:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "project_name": "japanese-large-lm-3.6b-instruction-sft-8bit-1g-actorder_True", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2023-09-26 06:16:23", "latest_commit": "2023-09-28 00:02:06", "languages": [], "model_or_dataset": "model", "model_size": 1.17, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "deberta-v3-base-japanese-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-v3-base-japanese-ud-goeswith", "project_name": "deberta-v3-base-japanese-ud-goeswith", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2024-05-21 11:42:12", "latest_commit": "2024-09-12 23:31:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Ninja-v1-RP-GGUF 概要 Aratako/Ninja-v1-RPの量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/Ninja-v1-RP-GGUF", "project_name": "Ninja-v1-RP-GGUF", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2024-05-20 17:08:50", "latest_commit": "2024-05-24 15:11:08", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "Tanuki-ZeRo-gguf kanhatakeyamaさんが公開しているTanuki-ZeRoのggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/Tanuki-ZeRo-gguf", "project_name": "Tanuki-ZeRo-gguf", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2024-03-30 10:49:02", "latest_commit": "2024-03-30 17:01:16", "languages": [], "model_or_dataset": "model", "model_size": 13.1, "model_architectures": null }, { "description": "nlp-waseda/gpt2-xl-japanese This is Japanese GPT2 with approximately 1.5B parameters pretrained on Japanese Wikipedia and CC-100", "url": "https://huggingface.co/nlp-waseda/gpt2-xl-japanese", "project_name": "gpt2-xl-japanese", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2022-11-30 04:33:31", "latest_commit": "2023-06-21 04:29:10", "languages": [], "model_or_dataset": "model", "model_size": 1.61, "model_architectures": "GPT2LMHeadModel" }, { "description": "deberta-base-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora", "project_name": "deberta-base-japanese-aozora", "downloads": 33, "source": "Hugging Face", "score": -0.08844017886405953, "first_commit": "2022-05-24 04:30:28", "latest_commit": "2023-01-08 08:41:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "doc2query/msmarco-japanese-mt5-base-v1 This is a doc2query model based on mT5 (also known as docT5query).", "url": "https://huggingface.co/doc2query/msmarco-japanese-mt5-base-v1", "project_name": "msmarco-japanese-mt5-base-v1", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-04-29 12:05:21", "latest_commit": "2022-04-29 14:05:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration" }, { "description": "deberta-large-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-luw-upos", "project_name": "deberta-large-japanese-unidic-luw-upos", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-06-10 12:53:45", "latest_commit": "2024-08-20 20:16:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Sarashina2-7B Instruct sarashina2-7Bを会話できるようにフルファインチューニングしたものです。", "url": "https://huggingface.co/alfredplpl/sarashina2-7b-it", "project_name": "sarashina2-7b-it", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2024-06-12 02:24:28", "latest_commit": "2024-06-12 03:00:35", "languages": [], "model_or_dataset": "model", "model_size": 7.32, "model_architectures": "LlamaForCausalLM" }, { "description": "概要 elyza/Llama-3-ELYZA-JP-8Bを元にchat vectorを用いて改良しAItuberに特化させました。 ", "url": "https://huggingface.co/DataPilot/Llama3-ArrowSE-8B-v0.3", "project_name": "Llama3-ArrowSE-8B-v0.3", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2024-07-06 15:39:54", "latest_commit": "2024-07-07 14:18:02", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "roberta-base-japanese-juman-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-juman-ud-goeswith", "project_name": "roberta-base-japanese-juman-ud-goeswith", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2023-02-21 03:43:52", "latest_commit": "2024-08-30 14:46:25", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "ku-nlp/roberta-large-japanese-char-wwm Model description This is a Japanese RoBERTa large model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.", "url": "https://huggingface.co/ku-nlp/roberta-large-japanese-char-wwm", "project_name": "roberta-large-japanese-char-wwm", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-09-18 08:10:44", "latest_commit": "2023-03-19 01:58:12", "languages": [], "model_or_dataset": "model", "model_size": 0.323, "model_architectures": "RobertaForMaskedLM" }, { "description": "deberta-large-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-wikipedia-ud-head", "project_name": "deberta-large-japanese-wikipedia-ud-head", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-07-06 03:51:14", "latest_commit": "2024-08-20 19:51:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "deberta-large-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora", "project_name": "deberta-large-japanese-aozora", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-05-26 14:46:58", "latest_commit": "2023-01-14 00:27:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "roberta-large-japanese-aozora-char Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-char", "project_name": "roberta-large-japanese-aozora-char", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2021-12-30 14:19:53", "latest_commit": "2022-06-22 10:22:43", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "Dataset 5M (5121625) clean Japanese full sentence with the context.", "url": "https://huggingface.co/datasets/AhmedSSabir/Japanese-wiki-dump-sentence-dataset", "project_name": "Japanese-wiki-dump-sentence-dataset", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2022-06-08 11:34:04", "latest_commit": "2023-07-11 12:22:09", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese-Heron-Bench Dataset Description Japanese-Heron-Bench is a benchmark for evaluating Japanese VLMs (Vision-Language Models).", "url": "https://huggingface.co/datasets/turing-motors/Japanese-Heron-Bench", "project_name": "Japanese-Heron-Bench", "downloads": 32, "source": "Hugging Face", "score": -0.08844918988203106, "first_commit": "2024-04-12 01:54:01", "latest_commit": "2024-04-12 08:59:36", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ELYZA-japanese-CodeLlama-7b Model Description ELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。 ", "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b", "project_name": "ELYZA-japanese-CodeLlama-7b", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2023-11-07 12:48:15", "latest_commit": "2023-11-15 00:38:12", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "Japanese BERT-base (Sudachi + Unigram)", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_sudachi-unigram", "project_name": "bert-base-japanese_sudachi-unigram", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2023-06-14 07:16:29", "latest_commit": "2023-06-16 01:03:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/nold/Orion-14B-Base-GGUF", "project_name": "Orion-14B-Base-GGUF", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2024-03-07 14:56:51", "latest_commit": "2024-03-07 19:33:53", "languages": [], "model_or_dataset": "model", "model_size": 14.5, "model_architectures": null }, { "description": "bert-base-japanese-v3-jsts 「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。 ", "url": "https://huggingface.co/masato12/bert-base-japanese-v3-jsts-with-tokenizer", "project_name": "bert-base-japanese-v3-jsts-with-tokenizer", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2024-07-21 04:58:46", "latest_commit": "2024-07-21 18:21:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "Llama3ベースの日本語医療LLM MedLlama3-JP このモデルはLlama3の継続学習により作成された4種類のLLMから成るマージモデルです。 ", "url": "https://huggingface.co/EQUES/MedLLama3-JP-v2", "project_name": "MedLLama3-JP-v2", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2024-07-01 13:42:17", "latest_commit": "2024-07-13 06:12:43", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf lightblueさんが公開しているKarasu-Mixtral-8x22B-v0.1のggufフォーマット変換版です。 ", "url": "https://huggingface.co/mmnga/lightblue-Karasu-Mixtral-8x22B-v0.1-gguf", "project_name": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2024-05-07 12:53:56", "latest_commit": "2024-05-07 18:07:43", "languages": [], "model_or_dataset": "model", "model_size": 141.0, "model_architectures": null }, { "description": "このモデルはluke-japanese-largeをファインチューニングして、JCommonsenseQA(選択式応答)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/luke-large-commonsenseqa-japanese", "project_name": "luke-large-commonsenseqa-japanese", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2023-02-05 16:17:54", "latest_commit": "2023-02-05 17:04:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMultipleChoice" }, { "description": "ELECTRA base Japanese discriminator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-base-japanese-discriminator", "project_name": "electra-base-japanese-discriminator", "downloads": 31, "source": "Hugging Face", "score": -0.08845820090000257, "first_commit": "2021-11-15 17:39:41", "latest_commit": "2022-12-09 00:43:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining" }, { "description": "roberta-base-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-ud-goeswith", "project_name": "roberta-base-japanese-aozora-ud-goeswith", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2022-10-15 04:01:29", "latest_commit": "2024-08-20 18:49:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "transformer-lm-japanese-0.1b", "url": "https://huggingface.co/fukugawa/transformer-lm-japanese-0.1b", "project_name": "transformer-lm-japanese-0.1b", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2023-07-12 02:11:11", "latest_commit": "2024-06-03 06:17:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TransformerLMForCausalLM" }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-small", "project_name": "ruri-reranker-stage1-small", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2024-08-19 09:44:00", "latest_commit": "2024-09-04 08:53:02", "languages": [], "model_or_dataset": "model", "model_size": 0.06870000000000001, "model_architectures": "DistilBertForSequenceClassification" }, { "description": "deberta-base-japanese-wikipedia-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-goeswith", "project_name": "deberta-base-japanese-wikipedia-ud-goeswith", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2022-09-18 06:02:55", "latest_commit": "2024-08-20 19:38:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Japanese GPT2 Lyric Model Model description", "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-medium", "project_name": "gpt2-japanese-lyric-medium", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2022-07-08 13:28:12", "latest_commit": "2023-10-21 14:53:57", "languages": [], "model_or_dataset": "model", "model_size": 0.361, "model_architectures": "GPT2LMHeadModel" }, { "description": "deberta-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-luw-upos", "project_name": "deberta-base-japanese-luw-upos", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2022-05-24 06:55:47", "latest_commit": "2024-08-20 19:21:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "abc-multiple-choice Dataset abc-multiple-choice は、競技クイズの大会「abc」で使用された4択問題を元に作成された、多肢選択式の質問応答データセットです。 ", "url": "https://huggingface.co/datasets/tohoku-nlp/abc-multiple-choice", "project_name": "abc-multiple-choice", "downloads": 30, "source": "Hugging Face", "score": -0.0884672119179741, "first_commit": "2024-03-02 03:58:25", "latest_commit": "2024-03-12 07:32:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Local-Novel-LLM-project様の Ninja-V3 をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Ninja-V3-GGUF", "project_name": "Ninja-V3-GGUF", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2024-07-03 11:52:04", "latest_commit": "2024-07-03 16:59:05", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "モデルの説明(English explanation is below.", "url": "https://huggingface.co/keitokei1994/Llama-3-Umievo-Shizuko-sqlcoder-2x8B", "project_name": "Llama-3-Umievo-Shizuko-sqlcoder-2x8B", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2024-06-09 12:17:00", "latest_commit": "2024-06-11 07:39:45", "languages": [], "model_or_dataset": "model", "model_size": 13.7, "model_architectures": "MixtralForCausalLM" }, { "description": "rinna/nekomata-14b-gguf Overview The model is the GGUF version of rinna/nekomata-14b.", "url": "https://huggingface.co/rinna/nekomata-14b-gguf", "project_name": "nekomata-14b-gguf", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2023-12-19 08:11:51", "latest_commit": "2024-07-20 08:29:58", "languages": [], "model_or_dataset": "model", "model_size": 14.2, "model_architectures": null }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-base-short", "project_name": "t5-base-short", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2023-04-26 08:20:52", "latest_commit": "2023-05-10 10:00:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "deberta-base-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-head", "project_name": "deberta-base-japanese-wikipedia-ud-head", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2022-06-25 13:03:09", "latest_commit": "2024-08-20 19:47:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "ELECTRA small Japanese finance generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-fin-generator", "project_name": "electra-small-paper-japanese-fin-generator", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2021-10-04 13:38:47", "latest_commit": "2023-10-21 13:21:24", "languages": [], "model_or_dataset": "model", "model_size": 0.00491, "model_architectures": "ElectraForMaskedLM" }, { "description": "Dataset Summary This is the Business Scene Dialogue (BSD) dataset, a Japanese-English parallel corpus containing written conversations in various business scenarios.", "url": "https://huggingface.co/datasets/ryo0634/bsd_ja_en", "project_name": "bsd_ja_en", "downloads": 29, "source": "Hugging Face", "score": -0.08847622293594563, "first_commit": "2022-01-25 16:35:02", "latest_commit": "2024-01-11 07:36:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "タイトルから記事本文を生成するモデル SEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", "url": "https://huggingface.co/sonoisa/t5-base-japanese-article-generation", "project_name": "t5-base-japanese-article-generation", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2021-04-03 13:55:25", "latest_commit": "2024-04-17 11:39:12", "languages": [], "model_or_dataset": "model", "model_size": 0.223, "model_architectures": "T5ForConditionalGeneration" }, { "description": "deberta-small-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-upos", "project_name": "deberta-small-japanese-upos", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-05-23 23:55:56", "latest_commit": "2024-07-26 15:38:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-base-japanese-unidic Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic", "project_name": "deberta-base-japanese-unidic", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-06-08 08:05:33", "latest_commit": "2022-06-18 23:02:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "deberta-large-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora-ud-goeswith", "project_name": "deberta-large-japanese-aozora-ud-goeswith", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-10-14 11:35:36", "latest_commit": "2024-08-20 19:20:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "COMET-GPT2 ja Finetuned GPT-2 on ATOMIC ja using a causal language modeling (CLM) objective.", "url": "https://huggingface.co/nlp-waseda/comet-gpt2-small-japanese", "project_name": "comet-gpt2-small-japanese", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-11-15 05:14:35", "latest_commit": "2023-02-13 10:26:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "bert-base-irony", "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-irony", "project_name": "bert-base-japanese-basic-char-v2-irony", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-11-07 07:33:23", "latest_commit": "2022-11-08 00:10:26", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "whisper-large-v2-mix-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-mix-jp to the CTranslate2 model format.", "url": "https://huggingface.co/arc-r/faster-whisper-large-v2-mix-jp", "project_name": "faster-whisper-large-v2-mix-jp", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2023-07-07 05:53:52", "latest_commit": "2023-07-07 17:56:03", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Llama-3-8B-Instruct-JP-nk2t-v0.3 Model Details: Built with Meta Llama 3 llama-3-8bの日本語継続学習モデルにChatVectorを適用し、さらにQLoraでファインチューニングしたモデルです。 ", "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.3", "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.3", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2024-05-15 12:24:06", "latest_commit": "2024-05-22 11:02:28", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Local-Novel-LLM-project様の Ninja-V2-7B をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Ninja-V2-7B-GGUF", "project_name": "Ninja-V2-7B-GGUF", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2024-06-15 16:23:41", "latest_commit": "2024-06-15 21:25:59", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "This repository contains some GGUF quantizations of the merged VNTL LLaMA3 8B 202409 qlora model, created using a custom version of the VNTL dataset combined with the VNTL-Chat dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-202409-gguf", "project_name": "vntl-llama3-8b-202409-gguf", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2024-09-25 15:04:57", "latest_commit": "2024-09-25 16:29:08", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Riga_collectionとは? ", "url": "https://huggingface.co/natsusakiyomi/Riga_Collection", "project_name": "Riga_Collection", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2023-03-18 17:40:43", "latest_commit": "2023-03-24 16:13:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Dataset Preprocessing Supported Tasks and Leaderboards Languages 注釈はすべて日本語を主要言語としています。", "url": "https://huggingface.co/datasets/shunk031/jsnli", "project_name": "jsnli", "downloads": 28, "source": "Hugging Face", "score": -0.08848523395391715, "first_commit": "2022-12-01 01:31:32", "latest_commit": "2022-12-12 16:36:58", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "japanese-soseki-gpt2-1b", "url": "https://huggingface.co/jweb/japanese-soseki-gpt2-1b", "project_name": "japanese-soseki-gpt2-1b", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2022-03-03 04:53:15", "latest_commit": "2023-03-27 12:09:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "deberta-base-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-goeswith", "project_name": "deberta-base-japanese-aozora-ud-goeswith", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2022-10-14 09:43:58", "latest_commit": "2024-08-20 18:52:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "bert-base-japanese-v3-bpr-question-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRの質問エンコーダです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-question-aio", "project_name": "bert-base-japanese-v3-bpr-question-aio", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2023-06-06 08:21:13", "latest_commit": "2023-07-24 07:12:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese-with-auto-jumanpp", "project_name": "deberta-v2-base-japanese-with-auto-jumanpp", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2023-09-07 06:04:29", "latest_commit": "2023-11-20 06:00:08", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "INPUT: Japanese name in ROMAJI FORM OUTPUT:", "url": "https://huggingface.co/tarudesu/gendec-with-distilmbert", "project_name": "gendec-with-distilmbert", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2023-11-14 02:12:39", "latest_commit": "2024-03-23 16:49:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForSequenceClassification" }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4", "project_name": "Orion-14B-Base-Int4", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2024-01-18 09:50:31", "latest_commit": "2024-03-26 09:55:37", "languages": [], "model_or_dataset": "model", "model_size": 2.69, "model_architectures": "OrionForCausalLM" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-medium", "project_name": "t5-small-medium", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2023-04-26 08:26:19", "latest_commit": "2023-05-10 10:01:16", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "deberta-base-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-upos", "project_name": "deberta-base-japanese-upos", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2022-05-24 08:12:05", "latest_commit": "2024-07-26 15:59:24", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "Dataset Summary SNOW T15:The simplified corpus for the Japanese language.", "url": "https://huggingface.co/datasets/SNOW-NLP/snow_simplified_japanese_corpus", "project_name": "snow_simplified_japanese_corpus", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2022-01-25 16:36:23", "latest_commit": "2024-01-18 11:16:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "recruit-jp/japanese-image-classification-evaluation-dataset Overview Developed by: Recruit Co.", "url": "https://huggingface.co/datasets/recruit-jp/japanese-image-classification-evaluation-dataset", "project_name": "japanese-image-classification-evaluation-dataset", "downloads": 27, "source": "Hugging Face", "score": -0.08849424497188868, "first_commit": "2023-12-19 09:17:24", "latest_commit": "2024-01-22 10:48:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "roberta-large-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-head", "project_name": "roberta-large-japanese-aozora-ud-head", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2022-06-22 00:49:08", "latest_commit": "2024-08-20 19:54:48", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering" }, { "description": "roberta-large-japanese-aozora-ud-goeswith Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-goeswith", "project_name": "roberta-large-japanese-aozora-ud-goeswith", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2022-10-15 04:15:39", "latest_commit": "2024-08-20 18:51:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "roberta-base-japanese-jsnli This model is a fine-tuned version of nlp-waseda/roberta-base-japanese on the JSNLI dataset.", "url": "https://huggingface.co/Formzu/roberta-base-japanese-jsnli", "project_name": "roberta-base-japanese-jsnli", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2022-10-14 07:50:47", "latest_commit": "2022-10-19 11:08:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForSequenceClassification" }, { "description": "HODACHI様の Llama-3-EZO-8b-Common-it をGGUF形式に変換したものです。 ", "url": "https://huggingface.co/MCZK/Llama-3-EZO-8b-Common-it-GGUF", "project_name": "Llama-3-EZO-8b-Common-it-GGUF", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2024-07-15 11:58:12", "latest_commit": "2024-07-15 20:08:22", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": null }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-small", "project_name": "ruri-pt-small", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2024-08-17 10:39:05", "latest_commit": "2024-08-30 03:11:20", "languages": [], "model_or_dataset": "model", "model_size": 0.0681, "model_architectures": "DistilBertModel" }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-LongChat", "project_name": "Orion-14B-LongChat", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2024-01-19 07:15:36", "latest_commit": "2024-03-26 10:10:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM" }, { "description": "line-corporation/japanese-large-lm-3.6b line-corporationさんが公開しているjapanese-large-lm-3.6bのgguf変換版です。 ", "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-gguf", "project_name": "line-corp-japanese-large-lm-3.6b-gguf", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2023-09-02 18:18:41", "latest_commit": "2023-09-08 02:53:05", "languages": [], "model_or_dataset": "model", "model_size": 3.71, "model_architectures": null }, { "description": "Fine-tuned Japanese Wav2Vec2 model for speech recognition using XLSR-53 large Fine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using Common Voice, JVS and JSUT.", "url": "https://huggingface.co/Ivydata/wav2vec2-large-xlsr-53-japanese", "project_name": "wav2vec2-large-xlsr-53-japanese", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2023-05-11 08:47:29", "latest_commit": "2023-05-12 02:15:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "deberta-base-japanese-wikipedia-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-luw-upos", "project_name": "deberta-base-japanese-wikipedia-luw-upos", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2022-06-25 06:28:11", "latest_commit": "2024-08-20 17:53:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "roberta-large-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-char-luw-upos", "project_name": "roberta-large-japanese-char-luw-upos", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2021-12-30 15:56:46", "latest_commit": "2022-09-18 19:44:49", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "This dataset is a clarified version of the image, context, and question set included in the Japanese-Heron-Bench for the construction of the Japanese evaluation benchmark suite.", "url": "https://huggingface.co/datasets/Silviase/Japanese-Heron-Bench", "project_name": "Japanese-Heron-Bench", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2024-07-16 08:12:30", "latest_commit": "2024-07-28 12:33:15", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "固有表現ラベルはllm-book/ner-wikipedia-datasetと同様のものを採用しており、全部で8種類 (人名、法人名、地名、製品名、政治的組織名、施設名、その他の組織名、イベント名)あります。 ", "url": "https://huggingface.co/datasets/llm-book/ner-wikinews-dataset", "project_name": "ner-wikinews-dataset", "downloads": 26, "source": "Hugging Face", "score": -0.08850325598986021, "first_commit": "2023-04-22 14:32:21", "latest_commit": "2023-12-12 11:22:26", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "deberta-small-japanese-aozora Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-aozora", "project_name": "deberta-small-japanese-aozora", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2022-05-23 04:58:53", "latest_commit": "2023-01-15 15:25:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "roberta-base-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-ud-head", "project_name": "roberta-base-japanese-aozora-ud-head", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2022-06-21 05:21:38", "latest_commit": "2024-08-20 19:52:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForQuestionAnswering" }, { "description": "deberta-large-japanese-aozora-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora-ud-head", "project_name": "deberta-large-japanese-aozora-ud-head", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2022-06-17 15:00:25", "latest_commit": "2023-03-04 20:17:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "bert-base-sudachitra-v11", "url": "https://huggingface.co/hiroshi-matsuda-rit/bert-base-sudachitra-v11", "project_name": "bert-base-sudachitra-v11", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2023-05-06 11:00:39", "latest_commit": "2024-01-14 16:29:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "ベースモデル:cl-tohoku/bert-base-japanese-whole-word-masking データセット:llm-book/wrime-sentiment オプティマイザ: adamw Optunaでハイパーパラメータ探索 学習率スケジュールのタイプ(lr_scheduler_type):", "url": "https://huggingface.co/A-Funakoshi/bert-base-japanese-v3-wrime-v2", "project_name": "bert-base-japanese-v3-wrime-v2", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2023-10-27 12:05:45", "latest_commit": "2023-10-27 12:16:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-instruct-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1", "project_name": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2024-01-16 12:23:01", "latest_commit": "2024-01-16 12:27:54", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged Mixtral-8x7B-Instruct-v0.1-japanese-alpha-mergedはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルに対して、差分マージを実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2024-04-16 07:54:14", "latest_commit": "2024-04-20 09:14:59", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM" }, { "description": "Overview This model is based on rinna's [rinna/llama-3-youko-8b], fine-tuned using LoRA on a small number of parallel sentences from English to Japanese.", "url": "https://huggingface.co/lyu-boxuan/llama-3-youko-8b-En-Ja-MT-LoRA", "project_name": "llama-3-youko-8b-En-Ja-MT-LoRA", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2024-05-10 14:33:57", "latest_commit": "2024-05-21 14:54:46", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "Ruri-Reranker: Japanese General Reranker Usage Direct Usage (Sentence Transformers)", "url": "https://huggingface.co/cl-nagoya/ruri-reranker-stage1-base", "project_name": "ruri-reranker-stage1-base", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2024-08-19 16:14:12", "latest_commit": "2024-09-04 08:52:18", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "Japanese BERT-base (Nothing + WordPiece) How to load the tokenizer Please download the dictionary file for Nothing + WordPiece from our GitHub repository.", "url": "https://huggingface.co/hitachi-nlp/bert-base-japanese_nothing-wordpiece", "project_name": "bert-base-japanese_nothing-wordpiece", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2023-06-14 08:08:06", "latest_commit": "2023-06-16 01:07:33", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "このモデルはdeberta-v2-large-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-large-japanese-finetuned-ner", "project_name": "deberta-v2-large-japanese-finetuned-ner", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2023-05-10 13:22:23", "latest_commit": "2023-07-21 14:10:02", "languages": [], "model_or_dataset": "model", "model_size": 0.339, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "electra-base-japanese-discriminator (sudachitra-wordpiece, mC4 Japanese) -", "url": "https://huggingface.co/hiroshi-matsuda-rit/electra-base-japanese-discriminator-v2", "project_name": "electra-base-japanese-discriminator-v2", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2023-05-07 08:14:12", "latest_commit": "2023-05-07 17:41:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForPreTraining" }, { "description": "roberta-small-japanese-char-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-char-luw-upos", "project_name": "roberta-small-japanese-char-luw-upos", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2021-12-23 02:47:23", "latest_commit": "2024-08-20 18:36:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "Dataset Summary JMultiWOZ is a large-scale Japanese multi-domain task-oriented dialogue dataset.", "url": "https://huggingface.co/datasets/nu-dialogue/jmultiwoz", "project_name": "jmultiwoz", "downloads": 25, "source": "Hugging Face", "score": -0.08851226700783173, "first_commit": "2024-02-29 15:38:29", "latest_commit": "2024-03-13 02:15:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "nlp-waseda/bigbird-base-japanese Model description This is a Japanese BigBird base model pretrained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/nlp-waseda/bigbird-base-japanese", "project_name": "bigbird-base-japanese", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2023-06-03 12:51:12", "latest_commit": "2023-06-20 10:49:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BigBirdForMaskedLM" }, { "description": "Orion-14B 🌐English | 🇨", "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-Plugin", "project_name": "Orion-14B-Chat-Plugin", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-01-16 12:19:45", "latest_commit": "2024-03-26 10:12:37", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OrionForCausalLM" }, { "description": "SambaLingo-Japanese-Chat SambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-GGUF", "project_name": "SambaLingo-Japanese-Chat-GGUF", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-03-07 06:38:01", "latest_commit": "2024-03-07 06:48:27", "languages": [], "model_or_dataset": "model", "model_size": 6.95, "model_architectures": null }, { "description": "Miwa-Keita/zenz-v1-checkpoints を optimum 用に ONNX に変換したモデルです。", "url": "https://huggingface.co/p1atdev/zenz-v1-onnx", "project_name": "zenz-v1-onnx", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-06-29 03:03:03", "latest_commit": "2024-06-29 03:40:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "Ruri: Japanese General Text Embeddings Usage First install the Sentence Transformers library: pip install -U sentence-transformers Then you can load this model and run inference.", "url": "https://huggingface.co/cl-nagoya/ruri-pt-large", "project_name": "ruri-pt-large", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-08-19 00:58:49", "latest_commit": "2024-08-30 00:59:26", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertModel" }, { "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-ner", "project_name": "deberta-v2-base-japanese-finetuned-ner", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2023-01-20 05:57:37", "latest_commit": "2023-03-27 08:05:06", "languages": [], "model_or_dataset": "model", "model_size": 0.112, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "This pre-trained model is work in progress!", "url": "https://huggingface.co/naclbit/gpt-j-japanese-6.8b", "project_name": "gpt-j-japanese-6.8b", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2021-10-17 08:02:54", "latest_commit": "2021-11-10 15:28:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTJForCausalLM" }, { "description": "roberta-base-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-luw-upos", "project_name": "roberta-base-japanese-luw-upos", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2021-12-21 00:41:00", "latest_commit": "2022-09-18 19:44:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForTokenClassification" }, { "description": "Dataset Description This is the Japanese Translation version of sciq.", "url": "https://huggingface.co/datasets/izumi-lab/sciq-ja-mbartm2m", "project_name": "sciq-ja-mbartm2m", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2023-05-19 02:03:47", "latest_commit": "2023-05-19 03:54:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Sakura_dataset 商用利用可能な超小規模高品質日本語データセット。 ", "url": "https://huggingface.co/datasets/saldra/sakura_japanese_dataset", "project_name": "sakura_japanese_dataset", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2023-06-07 05:44:23", "latest_commit": "2023-06-08 11:31:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "cyberagent/calm2-7b-chatの出力を人手でチェック・修正することで作成した日本語Instructionデータセットです。 ", "url": "https://huggingface.co/datasets/Kendamarron/jimba-instuction-1k-beta", "project_name": "jimba-instuction-1k-beta", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-02-29 15:23:48", "latest_commit": "2024-04-25 12:49:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted 20240907 データ増量(約19800件→約39600件) 概要 gpt-4o-miniを用いて作成した日本語ロールプレイデータセットであるAratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6kにsystem messageを追加して整形したデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted", "project_name": "Synthetic-Japanese-Roleplay-gpt-4o-mini-39.6k-formatted", "downloads": 24, "source": "Hugging Face", "score": -0.08852127802580326, "first_commit": "2024-08-16 16:46:06", "latest_commit": "2024-09-07 12:34:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Google's mt5-base fine-tuned in Japanese to summarize patent claims in a limited Pharmaceutical domain. ", "url": "https://huggingface.co/kz/mt5base-finetuned-patentsum-japanese-small", "project_name": "mt5base-finetuned-patentsum-japanese-small", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2021-04-10 00:31:15", "latest_commit": "2022-05-19 06:50:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration" }, { "description": "TakoMT", "url": "https://huggingface.co/staka/takomt", "project_name": "takomt", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2022-05-08 03:52:40", "latest_commit": "2023-08-15 17:32:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel" }, { "description": "モデル概要 このモデルは、 Twitter/twhin-bert-large をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました", "url": "https://huggingface.co/TomokiFujihara/twhin-bert-large-japanese-offensiveness-estimation", "project_name": "twhin-bert-large-japanese-offensiveness-estimation", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-03-24 10:28:39", "latest_commit": "2024-03-24 16:46:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OffensivenessEstimationModel" }, { "description": "japanese-gpt-1b-PII-masking Model Description japanese-gpt-1b-PII-masking は、 日本語事前学習済み1B GPTモデルをベースとして、日本語の文章から個人情報をマスキングするように学習したモデルです。 ", "url": "https://huggingface.co/cameltech/japanese-gpt-1b-PII-masking", "project_name": "japanese-gpt-1b-PII-masking", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-04-05 07:26:29", "latest_commit": "2024-05-17 11:42:00", "languages": [], "model_or_dataset": "model", "model_size": 1.3, "model_architectures": "GPT2LMHeadModel" }, { "description": "Llama-3-Umievo-itr014-Shizuko-8b このモデルは日本語に対応しているLlama-3ベースの4つのモデルを進化的アルゴリズムで進化的マージしたものです。", "url": "https://huggingface.co/umiyuki/Llama-3-Umievo-itr014-Shizuko-8b", "project_name": "Llama-3-Umievo-itr014-Shizuko-8b", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-06-08 05:25:05", "latest_commit": "2024-06-08 07:47:59", "languages": [], "model_or_dataset": "model", "model_size": 8.03, "model_architectures": "LlamaForCausalLM" }, { "description": "RakutenAI-7B-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/RakutenAI-7B-upos", "project_name": "RakutenAI-7B-upos", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-07-11 06:42:57", "latest_commit": "2024-08-20 17:20:57", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForTokenClassification" }, { "description": "transformer-lm-japanese-1.0b This is a JAX/Flax-based transformer language model trained on a Japanese dataset.", "url": "https://huggingface.co/fukugawa/transformer-lm-japanese-1.0b", "project_name": "transformer-lm-japanese-1.0b", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-07-25 04:27:53", "latest_commit": "2024-09-06 12:44:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "TransformerLMForCausalLM" }, { "description": "Llama-3-EZO-VLM-1 Based on SakanaAI/Llama-3-EvoVLM-JP-v2, it has been enhanced for Japanese usage through additional pre-training and instruction tuning.", "url": "https://huggingface.co/AXCXEPT/Llama-3-EZO-VLM-1", "project_name": "Llama-3-EZO-VLM-1", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-08-03 17:15:09", "latest_commit": "2024-08-23 10:55:53", "languages": [], "model_or_dataset": "model", "model_size": 8.48, "model_architectures": "LlavaForConditionalGeneration" }, { "description": "Tanuki-8B-dpo-v1.0-4k-AWQ 概要 GENIAC 松尾研 LLM開発プロジェクトで開発されたLLMであるweblab-GENIAC/Tanuki-8B-dpo-v1.0-4kのAWQ 4bit量子化モデルです。", "url": "https://huggingface.co/team-hatakeyama-phase2/Tanuki-8B-dpo-v1.0-4k-AWQ", "project_name": "Tanuki-8B-dpo-v1.0-4k-AWQ", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-08-27 05:48:42", "latest_commit": "2024-09-03 09:28:33", "languages": [], "model_or_dataset": "model", "model_size": 1.47, "model_architectures": "LlamaForCausalLM" }, { "description": "Heron BLIP Japanese StableLM", "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k", "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k", "downloads": 23, "source": "Hugging Face", "score": -0.08853028904377477, "first_commit": "2024-02-27 13:48:02", "latest_commit": "2024-02-27 13:59:23", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VideoBlipForConditionalGeneration" }, { "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-1b on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-1b-japanese-hiragana-katakana", "project_name": "wav2vec2-xls-r-1b-japanese-hiragana-katakana", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2022-02-04 11:27:09", "latest_commit": "2022-03-24 11:56:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "deberta-base-japanese-unidic-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-luw-upos", "project_name": "deberta-base-japanese-unidic-luw-upos", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2022-06-08 08:26:25", "latest_commit": "2024-08-20 20:15:13", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "alpaca-guanaco-japanese-gpt-1b 1.3Bパラメータの日本語GPTモデルを使用した対話AIです。", "url": "https://huggingface.co/inu-ai/alpaca-guanaco-japanese-gpt-1b", "project_name": "alpaca-guanaco-japanese-gpt-1b", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2023-04-12 00:18:29", "latest_commit": "2023-04-13 10:25:48", "languages": [], "model_or_dataset": "model", "model_size": 1.33, "model_architectures": "GPT2LMHeadModel" }, { "description": "zenz-v1 zenz-v1はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", "url": "https://huggingface.co/Miwa-Keita/zenz-v1", "project_name": "zenz-v1", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2024-05-12 15:48:46", "latest_commit": "2024-05-13 16:34:02", "languages": [], "model_or_dataset": "model", "model_size": 0.09509999999999999, "model_architectures": "GPT2LMHeadModel" }, { "description": "Model card for model ID", "url": "https://huggingface.co/retrieva-jp/t5-small-long", "project_name": "t5-small-long", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2023-04-26 08:26:49", "latest_commit": "2023-05-10 10:01:29", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "モデル説明 (model explanation) V1 = MoeDiffusion 1.0 + (HassanBlend 1.5 - VMix03) * 0.2 V2 = MoeDiffusion 0.6 : HassanBlend 1.5 0.2 : VMix03 : 0.2 マージ元のルーツにNAIリークやInsta系モデルが含まれるという噂があるので、NAIリークアンチ・Insta系モデルアンチには非推奨 理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", "url": "https://huggingface.co/ThePioneer/MoeDiffusionPlusPlus", "project_name": "MoeDiffusionPlusPlus", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2023-01-19 13:04:02", "latest_commit": "2023-01-21 02:05:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "deberta-large-japanese-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-upos", "project_name": "deberta-large-japanese-upos", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2022-05-27 06:50:55", "latest_commit": "2024-07-26 16:00:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "deberta-large-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-luw-upos", "project_name": "deberta-large-japanese-luw-upos", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2022-05-26 14:52:32", "latest_commit": "2023-01-14 23:15:30", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "AIO with extended answers AIO (AI王) is a Japanese quiz dataset.", "url": "https://huggingface.co/datasets/sbintuitions/aio-extended-answers", "project_name": "aio-extended-answers", "downloads": 22, "source": "Hugging Face", "score": -0.0885393000617463, "first_commit": "2024-06-21 08:15:23", "latest_commit": "2024-07-29 08:26:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "roberta-small-hi-char-mlm Model Description", "url": "https://huggingface.co/nakamura196/roberta-small-hi-char-mlm", "project_name": "roberta-small-hi-char-mlm", "downloads": 21, "source": "Hugging Face", "score": -0.08854831107971783, "first_commit": "2022-07-14 20:34:59", "latest_commit": "2022-07-22 09:10:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "project_name": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", "downloads": 21, "source": "Hugging Face", "score": -0.08854831107971783, "first_commit": "2023-09-26 06:15:16", "latest_commit": "2023-09-29 03:19:23", "languages": [], "model_or_dataset": "model", "model_size": 0.446, "model_architectures": "GPT2LMHeadModel" }, { "description": "Vecteus-V2-7B このモデルは、ベクトルマージなどを用い作成された高性能ベースモデルです。 ", "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-V2-7B", "project_name": "Vecteus-V2-7B", "downloads": 21, "source": "Hugging Face", "score": -0.08854831107971783, "first_commit": "2024-06-16 03:51:43", "latest_commit": "2024-07-06 13:39:41", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Deepreneur-blue-lizard Model Description Deepreneur-blue-lizardは、MetaのLlama-2-7bに対して、Wikipediaや書籍等の日本語の学習データを用いて追加事前学習と独自データによるファインチューニングを実施したモデルです。", "url": "https://huggingface.co/Deepreneur/blue-lizard", "project_name": "blue-lizard", "downloads": 21, "source": "Hugging Face", "score": -0.08854831107971783, "first_commit": "2024-02-05 16:29:48", "latest_commit": "2024-02-12 14:43:33", "languages": [], "model_or_dataset": "model", "model_size": 6.74, "model_architectures": "LlamaForCausalLM" }, { "description": "Kaidan Nihonbunka: A Journey Through Hyakumonogatari's Ghostly Tales Welcome to the Kaidan Nihonbunka Dataset About Name kaidan Nihonbunka translates to 怪談日本文化 in Japanese: 怪談 (Kwaidan): Ghost story or supernatural tale.", "url": "https://huggingface.co/datasets/mohamed-khalil/KaidanNihonbunka", "project_name": "KaidanNihonbunka", "downloads": 21, "source": "Hugging Face", "score": -0.08854831107971783, "first_commit": "2024-04-15 15:43:41", "latest_commit": "2024-04-15 16:03:13", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese ELECTRA-small We provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", "url": "https://huggingface.co/cinmodel/electra-small-japanese-generator", "project_name": "electra-small-japanese-generator", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2020-11-13 06:49:52", "latest_commit": "2020-12-11 22:26:17", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "ElectraForMaskedLM" }, { "description": "bert-large-japanese-wikipedia-ud-head Model Description", "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-wikipedia-ud-head", "project_name": "bert-large-japanese-wikipedia-ud-head", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2022-06-21 07:38:19", "latest_commit": "2024-08-20 19:45:52", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForQuestionAnswering" }, { "description": "Mixtral-8x7B-v0.1-japanese Mixtral-8x7B-v0.1-japaneseはMixtral-8x7B-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-v0.1-japanese", "project_name": "Mixtral-8x7B-v0.1-japanese", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2024-04-16 03:06:14", "latest_commit": "2024-04-20 09:14:10", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM" }, { "description": "XML-RoBERTa-NER-Japanese This model is a fine-tuned version of xlm-roberta-base on the Wikipedia Japanese NER dataset from Stockmark Inc.", "url": "https://huggingface.co/ithattieu/XML-RoBERTa-NER-Japanese", "project_name": "XML-RoBERTa-NER-Japanese", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2024-08-17 08:18:04", "latest_commit": "2024-08-18 04:03:33", "languages": [], "model_or_dataset": "model", "model_size": 0.277, "model_architectures": "RobertaForTokenClassification" }, { "description": "このモデルはdeberta-v2-tiny-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-tiny-japanese-finetuned-QA", "project_name": "deberta-v2-tiny-japanese-finetuned-QA", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2023-05-11 10:34:38", "latest_commit": "2023-05-11 10:38:32", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForQuestionAnswering" }, { "description": "bart-base-japanese This model is converted from the original Japanese BART Pretrained model released by Kyoto University.", "url": "https://huggingface.co/Formzu/bart-base-japanese", "project_name": "bart-base-japanese", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2022-10-31 06:52:38", "latest_commit": "2022-11-07 11:13:39", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MBartForConditionalGeneration" }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしました ファインチューニングには、あらゆる分野から140,000件ほどのESを用いました。 ", "url": "https://huggingface.co/huranokuma/es2", "project_name": "es2", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2022-08-09 08:20:00", "latest_commit": "2022-08-20 04:26:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "Danbooru2023:", "url": "https://huggingface.co/datasets/nyanko7/danbooru2023", "project_name": "danbooru2023", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2024-01-07 19:51:58", "latest_commit": "2024-05-22 18:43:24", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "OpenMathInstruct-1 を日本語に自動翻訳した商用利用可能な180万件の指示チューニングデータセットになります。 ", "url": "https://huggingface.co/datasets/kunishou/OpenMathInstruct-1-1.8m-ja", "project_name": "OpenMathInstruct-1-1.8m-ja", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2024-02-23 16:31:34", "latest_commit": "2024-02-24 18:29:28", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Synthetic-JP-Conversations-Magpie-Nemotron-4-10k Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約10000件の日本語instruction tuning用データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Conversations-Magpie-Nemotron-4-10k", "project_name": "Synthetic-JP-Conversations-Magpie-Nemotron-4-10k", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2024-07-05 13:53:45", "latest_commit": "2024-07-05 13:57:08", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "range3/cc100-ja This dataset consists of parquet files from the cc100 dataset with only the Japanese language extracted and sharded.", "url": "https://huggingface.co/datasets/range3/cc100-ja", "project_name": "cc100-ja", "downloads": 20, "source": "Hugging Face", "score": -0.08855732209768935, "first_commit": "2023-02-04 05:10:34", "latest_commit": "2023-02-04 05:43:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Google's mt5-base fine-tuned in Japanese to solve error detection and correction task. ", "url": "https://huggingface.co/kz/mt5base-finetuned-ECC-japanese-small", "project_name": "mt5base-finetuned-ECC-japanese-small", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2021-03-21 19:07:13", "latest_commit": "2022-05-26 13:50:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionalGeneration" }, { "description": "モデル説明 (model explanation) YaguruMagiku 0.6 : AbyssOrangeMix2_sfw 0.4 マージ元のルーツにNAIリークが含まれるという噂があるので、NAIリークアンチには非推奨 理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。 ", "url": "https://huggingface.co/ThePioneer/MoeDiffusion", "project_name": "MoeDiffusion", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2023-01-18 11:14:31", "latest_commit": "2023-01-21 02:10:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "LINE DistilBERT Japanese (forked by liwii)", "url": "https://huggingface.co/liwii/line-distilbert-base-japanese-fork", "project_name": "line-distilbert-base-japanese-fork", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2023-12-01 08:26:36", "latest_commit": "2023-12-01 09:16:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DistilBertForMaskedLM" }, { "description": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1 is a merge of the following models: mistralai/Mistral-7B-Instruct-v0.1 stabilityai/japanese-stablelm-base-gamma-7b 🧩 Configuration slices: - sources: - model: mistralai/Mistral-7B-Instruct-v0.1 layer_range:", "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1", "project_name": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2024-01-17 04:41:20", "latest_commit": "2024-01-17 04:46:18", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "japanese-sexual-moderation-v2は、studio-ousia/luke-japanese-large-liteをファインチューニングしたモデルです。", "url": "https://huggingface.co/oshizo/japanese-sexual-moderation-v2", "project_name": "japanese-sexual-moderation-v2", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2024-01-03 04:58:17", "latest_commit": "2024-01-03 07:09:05", "languages": [], "model_or_dataset": "model", "model_size": 0.41400000000000003, "model_architectures": "LukeForSequenceClassification" }, { "description": "Model Card for Japanese DeBERTa V2 base Model description This is a Japanese DeBERTa V2 base model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.", "url": "https://huggingface.co/G-Root/deberta-v2-base-japanese", "project_name": "deberta-v2-base-japanese", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2023-09-14 10:06:00", "latest_commit": "2023-09-14 17:24:52", "languages": [], "model_or_dataset": "model", "model_size": 0.137, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "yuyuyui-chatbot", "url": "https://huggingface.co/ushikado/yuyuyui-chatbot", "project_name": "yuyuyui-chatbot", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2021-05-04 14:52:12", "latest_commit": "2021-05-23 13:27:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "ELECTRA small Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-generator", "project_name": "electra-small-paper-japanese-generator", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2021-10-04 13:47:24", "latest_commit": "2023-10-21 13:21:31", "languages": [], "model_or_dataset": "model", "model_size": 0.00491, "model_architectures": "ElectraForMaskedLM" }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_fastspeech2 ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_fastspeech2", "project_name": "kan-bayashi_jsut_fastspeech2", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2021-07-03 14:45:57", "latest_commit": "2021-07-03 10:46:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "It covers multiple fields such as tourism, medical treatment, daily life, news, etc. ", "url": "https://huggingface.co/datasets/Nexdata/English-Japanese_Parallel_Corpus_Data", "project_name": "English-Japanese_Parallel_Corpus_Data", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2023-11-08 10:47:40", "latest_commit": "2024-08-05 03:14:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "This dataset is a collection of Korean, Chinese, and Japanese OpenOrca translation datasets.", "url": "https://huggingface.co/datasets/werty1248/OpenOrca-EnKoZhJa-18k", "project_name": "OpenOrca-EnKoZhJa-18k", "downloads": 19, "source": "Hugging Face", "score": -0.08856633311566088, "first_commit": "2024-08-10 18:54:09", "latest_commit": "2024-08-10 19:16:35", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)", "url": "https://huggingface.co/hiroshi-matsuda-rit/bert-base-japanese-basic-char-v2", "project_name": "bert-base-japanese-basic-char-v2", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2021-08-04 11:01:49", "latest_commit": "2021-09-23 16:49:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "日本語ByT5事前学習済みモデル This is a ByT5 (a tokenizer-free extension of the Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/byt5-small-japanese", "project_name": "byt5-small-japanese", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2021-06-04 13:14:22", "latest_commit": "2021-09-23 18:29:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MT5ForConditionGeneration" }, { "description": "Cross-Encoder for Natural Language Inference(NLI) for Japanese This model was trained using SentenceTransformers Cross-Encoder class.", "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick", "project_name": "bert-base-japanese-v3_nli-jsnli-jnli-jsick", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-04-26 05:15:05", "latest_commit": "2024-04-26 06:02:55", "languages": [], "model_or_dataset": "model", "model_size": 0.111, "model_architectures": "BertForSequenceClassification" }, { "description": "モデルについて Qwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したモデルです。 ", "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-v0.1", "project_name": "Tokara-0.5B-v0.1", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-05-06 11:39:26", "latest_commit": "2024-05-08 12:44:05", "languages": [], "model_or_dataset": "model", "model_size": 0.464, "model_architectures": "Qwen2ForCausalLM" }, { "description": "Oumuamua-7b-instruct-v2 🚨 If you want to avoid outputs that appear to be literal translations, please prompt this model to role-play as a Japanese person.", "url": "https://huggingface.co/nitky/Oumuamua-7b-instruct-v2", "project_name": "Oumuamua-7b-instruct-v2", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-06-14 07:08:07", "latest_commit": "2024-06-19 22:29:07", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM" }, { "description": "Kotoba-Whisper-Bilingual (v1.0)", "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-bilingual-v1.0", "project_name": "kotoba-whisper-bilingual-v1.0", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-09-27 06:18:39", "latest_commit": "2024-09-30 06:57:43", "languages": [], "model_or_dataset": "model", "model_size": 0.756, "model_architectures": "WhisperForConditionalGeneration" }, { "description": "Overview of bert-japanese-12M The bert-japanese-12M model is a transformer-based model with BERT architecture, which is designed to be used on Japanese text.", "url": "https://huggingface.co/nptdat/bert-japanese-12M", "project_name": "bert-japanese-12M", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-08-16 16:46:49", "latest_commit": "2024-08-19 02:56:14", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "This model learned the proceedings of the Japanese parliament in 2022.", "url": "https://huggingface.co/ohtaman/falcon-7b-kokkai2022-lora", "project_name": "falcon-7b-kokkai2022-lora", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2023-07-14 20:05:55", "latest_commit": "2023-09-20 16:36:19", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "モデル説明 (model explanation) MoeDiffusionPlusPlus 0.7 : DreamShaper 3.3 (full) 0.3。 ", "url": "https://huggingface.co/ThePioneer/MoeSharpV1", "project_name": "MoeSharpV1", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2023-01-21 07:30:06", "latest_commit": "2023-02-03 23:46:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Synthetic-JP-Roleplay-Instruction-Nemotron-4 Magpieの手法をnvidia/Nemotron-4-340B-Instructに対して適用し作成した、約1000件の日本語ロールプレイ用のinstructionデータセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-Roleplay-Instruction-Nemotron-4-1k", "project_name": "Synthetic-JP-Roleplay-Instruction-Nemotron-4-1k", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2024-06-23 08:28:26", "latest_commit": "2024-06-23 08:42:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Dataset Summary 53,640 Japanese tweets with annotation if a tweet is related to COVID-19 or not.", "url": "https://huggingface.co/datasets/community-datasets/covid_tweets_japanese", "project_name": "covid_tweets_japanese", "downloads": 18, "source": "Hugging Face", "score": -0.0885753441336324, "first_commit": "2022-01-25 16:35:12", "latest_commit": "2024-06-24 11:21:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-300m-japanese", "project_name": "wav2vec2-xls-r-300m-japanese", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2022-01-26 15:43:02", "latest_commit": "2022-03-23 18:34:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "reazonspeech-espnet-v1 reazonspeech-espnet-v1 is an ESPnet model trained for Japanese automatic speech recognition (ASR).", "url": "https://huggingface.co/reazon-research/reazonspeech-espnet-v1", "project_name": "reazonspeech-espnet-v1", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2023-01-13 07:44:37", "latest_commit": "2023-01-16 16:44:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "275.86Mのmixtralを日本語データセットでpretrainingしたものです sample from transformers import AutoTokenizer, AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(\"if001/tiny_mixtral_ja\")", "url": "https://huggingface.co/if001/tiny_mixtral_ja", "project_name": "tiny_mixtral_ja", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2024-01-22 15:02:21", "latest_commit": "2024-01-23 00:42:05", "languages": [], "model_or_dataset": "model", "model_size": 0.276, "model_architectures": "MixtralForCausalLM" }, { "description": "A very tiny 33.5M Llama3 model trained on a Macbook Pro with M3 Max for 10 hours.", "url": "https://huggingface.co/frost-beta/Llama3-33.5M-Japanese", "project_name": "Llama3-33.5M-Japanese", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2024-07-16 23:58:54", "latest_commit": "2024-07-17 08:27:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "deberta-base-japanese-wikipedia Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia", "project_name": "deberta-base-japanese-wikipedia", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2022-06-25 03:46:58", "latest_commit": "2023-01-27 17:51:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForMaskedLM" }, { "description": "nlp-waseda/gpt2-small-japanese-wikipedia This model is Japanese GPT-2 pretrained on Japanese Wikipedia.", "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese-wikipedia", "project_name": "gpt2-small-japanese-wikipedia", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2021-12-28 01:22:40", "latest_commit": "2021-12-28 15:31:38", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "ELECTRA small Japanese finance generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-small-japanese-fin-generator", "project_name": "electra-small-japanese-fin-generator", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2021-10-04 14:07:16", "latest_commit": "2023-10-21 13:21:23", "languages": [], "model_or_dataset": "model", "model_size": 0.013800000000000002, "model_architectures": "ElectraForMaskedLM" }, { "description": "ELECTRA base Japanese generator This is a ELECTRA model pretrained on texts in the Japanese language.", "url": "https://huggingface.co/izumi-lab/electra-base-japanese-generator", "project_name": "electra-base-japanese-generator", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2021-11-15 17:23:50", "latest_commit": "2023-10-21 13:21:16", "languages": [], "model_or_dataset": "model", "model_size": 0.035500000000000004, "model_architectures": "ElectraForMaskedLM" }, { "description": "「LLM-jp-3 172B beta1」利用規約 この利用規約(以下「本規約」といいます)は、大学共同利用機関法人 情報・システム研究機構 国立情報学研究所(以下「提供者」といいます)による開発の成果物として公開する大規模言語モデル「LLM-jp-3 172B beta1」(以下「本プログラム」といいます)の利用に関する条件を定めるものです。", "url": "https://huggingface.co/llm-jp/llm-jp-3-172b-beta1", "project_name": "llm-jp-3-172b-beta1", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "model", "model_size": 172.0, "model_architectures": null }, { "description": "A slightly modified version of the parsing and chunking method for singletongue/wikipedia-utils.", "url": "https://huggingface.co/datasets/oshizo/japanese-wikipedia-paragraphs", "project_name": "japanese-wikipedia-paragraphs", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2023-12-09 11:14:53", "latest_commit": "2023-12-09 14:09:30", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "概要 このデータセットはnull-instruct-jaとDeepSeek-v2.5のq4を用いて合成されました。 ", "url": "https://huggingface.co/datasets/DataPilot/Generated-dataset-by-deepseek-v2.5", "project_name": "Generated-dataset-by-deepseek-v2.5", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2024-09-11 10:38:57", "latest_commit": "2024-09-11 12:20:29", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Dataset Details Dataset Type:Japanese LLaVA v1.5", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-v1.5-Instruct-620K-JA", "project_name": "LLaVA-v1.5-Instruct-620K-JA", "downloads": 17, "source": "Hugging Face", "score": -0.08858435515160393, "first_commit": "2024-04-10 05:04:58", "latest_commit": "2024-04-12 09:18:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_tacotron2 ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_tacotron2", "project_name": "kan-bayashi_jsut_tacotron2", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2021-07-03 14:43:58", "latest_commit": "2021-07-03 10:44:00", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "GPT2 Japanese base model version 2 Prerequisites transformers==4.19.2 Model architecture This model uses GPT2 base setttings except vocabulary size.", "url": "https://huggingface.co/ClassCat/gpt2-base-japanese-v2", "project_name": "gpt2-base-japanese-v2", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2022-06-04 02:30:34", "latest_commit": "2022-06-25 15:36:22", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "bert-base-japanese-v3-bpr-passage-aio 「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRのパッセージエンコーダです。 ", "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-passage-aio", "project_name": "bert-base-japanese-v3-bpr-passage-aio", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2023-06-06 08:22:28", "latest_commit": "2023-07-24 07:14:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertModel" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GPTQ", "project_name": "japanese-stablelm-instruct-beta-70B-GPTQ", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2023-11-02 15:45:24", "latest_commit": "2023-11-02 20:04:07", "languages": [], "model_or_dataset": "model", "model_size": 9.1, "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF", "project_name": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-03-03 12:51:40", "latest_commit": "2024-03-03 13:39:01", "languages": [], "model_or_dataset": "model", "model_size": 21.5, "model_architectures": null }, { "description": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF 概要 Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1の量子化済みGGUF版です。", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF", "project_name": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-03-07 13:21:38", "latest_commit": "2024-03-07 13:47:58", "languages": [], "model_or_dataset": "model", "model_size": 11.2, "model_architectures": null }, { "description": "Our Models Vecteus Ninja-v1 Ninja-v1-NSFW Ninja-v1-128k Ninja-v1-NSFW-128k Model Card for Ninja-v1-128k The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1 Ninja-128k has the following changes compared to Mistral-7B-v0.1.", "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1-128k", "project_name": "Ninja-v1-128k", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-05-01 02:56:38", "latest_commit": "2024-05-04 04:07:00", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "モデル ベースモデル:microsoft/Phi-3-mini-4k-instruct 学習データセット:llm-jp/hh-rlhf-12k-ja 学習方式:フルパラメータチューニング サンプル import torch from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", trust_remote_code=True, ) model = AutoModelForCausalLM.from_pretrained( \"ryota39/Phi-3-mini-4k-instruct-dpo\", device_map=\"auto\", torch_dtype='auto', trust_remote_code=True, ) text = \"<|user|>\\n与えられた質問に対して英語で思考し、日本語で答えてください。", "url": "https://huggingface.co/ryota39/Phi-3-mini-4k-instruct-dpo", "project_name": "Phi-3-mini-4k-instruct-dpo", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-04-24 16:21:32", "latest_commit": "2024-05-01 07:41:46", "languages": [], "model_or_dataset": "model", "model_size": 3.82, "model_architectures": "Phi3ForCausalLM" }, { "description": "HPLT Bert for Japanese This is one of the encoder-only monolingual language models trained as a first release by the HPLT project.", "url": "https://huggingface.co/HPLT/hplt_bert_base_ja", "project_name": "hplt_bert_base_ja", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-04-22 01:23:46", "latest_commit": "2024-07-11 11:36:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LtgbertForMaskedLM" }, { "description": "deberta-small-japanese-luw-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-luw-upos", "project_name": "deberta-small-japanese-luw-upos", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2022-05-24 03:52:45", "latest_commit": "2024-08-20 17:28:44", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "DebertaV2ForTokenClassification" }, { "description": "名言推論モデル", "url": "https://huggingface.co/Momerio/meigen_generate_Japanese", "project_name": "meigen_generate_Japanese", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2021-10-13 15:30:14", "latest_commit": "2021-10-26 01:19:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "Japanese Wikipedia Human Retrieval dataset This is a Japanese question answereing dataset with retrieval on Wikipedia articles by trained human workers.", "url": "https://huggingface.co/datasets/baobab-trees/wikipedia-human-retrieval-ja", "project_name": "wikipedia-human-retrieval-ja", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-01-15 13:52:30", "latest_commit": "2024-03-19 04:25:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "ShareGPT-Processed The RyokoAI/ShareGPT52K dataset, converted to Markdown and labeled with the language used.", "url": "https://huggingface.co/datasets/zetavg/ShareGPT-Processed", "project_name": "ShareGPT-Processed", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2023-05-16 19:50:04", "latest_commit": "2023-05-21 03:50:14", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k nvidia/Nemotron-4-340B-Instructを用いて作成した、約1000件・各10ターンの日本語ロールプレイの対話を収録した合成対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k", "project_name": "Synthetic-JP-10-Turns-Roleplay-Dialogues-Nemotron-4-1k", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2024-07-03 13:21:22", "latest_commit": "2024-07-03 13:53:20", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。 ", "url": "https://huggingface.co/datasets/llm-book/aio-passages", "project_name": "aio-passages", "downloads": 16, "source": "Hugging Face", "score": -0.08859336616957546, "first_commit": "2023-06-06 02:03:34", "latest_commit": "2023-06-24 05:55:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "日本語T5事前学習済みモデル This is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus. ", "url": "https://huggingface.co/sonoisa/t5-base-japanese-mC4-Wikipedia", "project_name": "t5-base-japanese-mC4-Wikipedia", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2021-06-30 12:53:09", "latest_commit": "2021-09-23 18:29:58", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Manga OCR Optical character recognition for Japanese text, with the main focus being Japanese manga.", "url": "https://huggingface.co/TeamFnord/manga-ocr", "project_name": "manga-ocr", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2022-01-15 17:39:06", "latest_commit": "2022-02-10 07:50:15", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "VisionEncoderDecoderModel" }, { "description": "◆REV-Mix \"レボリューション\"なモデルです。 ", "url": "https://huggingface.co/Hemlok/REV-Mix", "project_name": "REV-Mix", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2023-08-06 17:04:53", "latest_commit": "2023-08-26 16:19:02", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "project_name": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2023-09-26 06:16:04", "latest_commit": "2023-09-27 23:54:44", "languages": [], "model_or_dataset": "model", "model_size": 0.771, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "モデル概要 このモデルは、 Twitter/twhin-bert-base をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました", "url": "https://huggingface.co/TomokiFujihara/twhin-bert-base-japanese-offensiveness-estimation", "project_name": "twhin-bert-base-japanese-offensiveness-estimation", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2024-03-24 10:15:19", "latest_commit": "2024-03-24 16:05:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "OffensivenessEstimationModel" }, { "description": "rinna-gpt-neox-small-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt-neox-small-japanese-ud-causal", "project_name": "rinna-gpt-neox-small-japanese-ud-causal", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2024-09-08 01:53:03", "latest_commit": "2024-09-12 22:30:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForTokenClassification" }, { "description": "Summary This is an LLaMA 3 Youko qlora, created using a custom version of the VNTL dataset combined with the VNTL-Chat dataset.", "url": "https://huggingface.co/lmg-anon/vntl-llama3-8b-202409-qlora", "project_name": "vntl-llama3-8b-202409-qlora", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2024-09-24 03:18:52", "latest_commit": "2024-09-25 16:23:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "gpt2-medium-japanese-unidic-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-unidic-upos", "project_name": "gpt2-medium-japanese-unidic-upos", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2024-08-30 04:34:52", "latest_commit": "2024-08-30 14:09:41", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "studio-ousia/luke-japanese-baseに対して次の変更を加えたモデルです。 ", "url": "https://huggingface.co/uzabase/luke-japanese-wordpiece-base", "project_name": "luke-japanese-wordpiece-base", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2023-08-10 06:04:58", "latest_commit": "2023-11-28 13:35:07", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LukeForMaskedLM" }, { "description": "NLLB 1.3B fine-tuned on Japanese to English Light Novel translation This model was fine-tuned on light and web novel for Japanese to English translation.", "url": "https://huggingface.co/thefrigidliquidation/nllb-jaen-1.3B-lightnovels", "project_name": "nllb-jaen-1.3B-lightnovels", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2022-10-01 00:43:59", "latest_commit": "2023-06-04 13:38:43", "languages": [], "model_or_dataset": "model", "model_size": 1.37, "model_architectures": "M2M100ForConditionalGeneration" }, { "description": "NLLB-200 1.3B fine-tuned on Ascendance of a Bookworm", "url": "https://huggingface.co/thefrigidliquidation/nllb-200-distilled-1.3B-bookworm", "project_name": "nllb-200-distilled-1.3B-bookworm", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2022-07-27 20:39:08", "latest_commit": "2024-04-14 18:45:22", "languages": [], "model_or_dataset": "model", "model_size": 1.37, "model_architectures": "M2M100ForConditionalGeneration" }, { "description": "jpn-heb source group: Japanese target group:", "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-he", "project_name": "opus-mt-ja-he", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2020-08-19 00:28:58", "latest_commit": "2023-08-16 11:59:12", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MarianMTModel" }, { "description": "オリジナルのサイトと同じものを使用しています。 ", "url": "https://huggingface.co/datasets/llm-book/ja-vicuna-qa-benchmark", "project_name": "ja-vicuna-qa-benchmark", "downloads": 15, "source": "Hugging Face", "score": -0.08860237718754697, "first_commit": "2024-06-25 22:14:55", "latest_commit": "2024-08-31 12:37:25", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Unihan LM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database Model description Chinese and Japanese share many characters with similar surface morphology.", "url": "https://huggingface.co/microsoft/unihanlm-base", "project_name": "unihanlm-base", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2020-09-27 11:23:02", "latest_commit": "2021-09-22 11:00:56", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "XLMModel" }, { "description": "MobileBERT 日本語事前学習済みモデル爆誕!! ", "url": "https://huggingface.co/ysakuramoto/mobilebert-ja", "project_name": "mobilebert-ja", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2022-01-23 11:29:39", "latest_commit": "2022-01-24 05:25:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Aerner LM-v1 事前学習から全部日本語で学習させたモデルです。 ", "url": "https://huggingface.co/aerner/lm-v1", "project_name": "lm-v1", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2023-05-25 12:35:32", "latest_commit": "2023-05-25 13:35:34", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "whisper-large-v2-jp model for CTranslate2 This repository contains the conversion of vumichien/whisper-large-v2-jp to the CTranslate2 model format.", "url": "https://huggingface.co/arc-r/faster-whisper-large-v2-jp", "project_name": "faster-whisper-large-v2-jp", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2023-07-07 06:16:06", "latest_commit": "2023-07-07 18:09:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Model overview This model is the baseline model for awesome-japanese-nlp-classification-dataset.", "url": "https://huggingface.co/taishi-i/awesome-japanese-nlp-classification-model", "project_name": "awesome-japanese-nlp-classification-model", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2023-09-09 09:23:05", "latest_commit": "2023-09-10 00:18:22", "languages": [], "model_or_dataset": "model", "model_size": 0.178, "model_architectures": "BertForSequenceClassification" }, { "description": "rinna-gpt2-medium-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt2-medium-japanese-ud-causal", "project_name": "rinna-gpt2-medium-japanese-ud-causal", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2024-09-07 07:54:18", "latest_commit": "2024-09-12 22:28:53", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "KoichiYasuoka/karasu-1.1B-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/karasu-1.1B-upos", "project_name": "karasu-1.1B-upos", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2024-08-30 04:04:28", "latest_commit": "2024-08-30 13:13:05", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForTokenClassification" }, { "description": "This is a model for named entity recognition of Japanese medical documents.", "url": "https://huggingface.co/Tomohiro/RealMedNLP_CR_JA", "project_name": "RealMedNLP_CR_JA", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2022-08-08 08:55:23", "latest_commit": "2022-08-13 03:06:31", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForTokenClassification" }, { "description": "ESを書くAI Japanese GPT-2 modelをファインチューニングしました ファインチューニングには、内定者の二万件以上のESを用いました。 ", "url": "https://huggingface.co/huranokuma/es", "project_name": "es", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2022-08-01 14:59:47", "latest_commit": "2022-08-14 05:47:18", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "日本語 gpt2 蒸留モデル このモデルはrinna/japanese-gpt2-meduimを教師として蒸留したものです。 ", "url": "https://huggingface.co/knok/japanese-distilgpt2", "project_name": "japanese-distilgpt2", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2022-04-14 09:32:23", "latest_commit": "2022-04-15 06:00:51", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "JSNLI Version 1.1 のデータセットのうち、フィルタリング後の訓練セット (train_w_filtering)", "url": "https://huggingface.co/datasets/llm-book/jsnli", "project_name": "jsnli", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2023-06-19 12:31:46", "latest_commit": "2023-10-25 15:22:46", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "JBLiMP This is the data from \"JBLiMP: Japanese Benchmark of Linguistic Minimal Pairs\" (Someya and Oseki, 2023).", "url": "https://huggingface.co/datasets/polm-stability/jblimp", "project_name": "jblimp", "downloads": 14, "source": "Hugging Face", "score": -0.0886113882055185, "first_commit": "2023-05-29 09:31:31", "latest_commit": "2023-05-29 18:49:16", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。 ", "url": "https://huggingface.co/Mizuiro-sakura/bert-large-japanese-v2-finetuned-ner", "project_name": "bert-large-japanese-v2-finetuned-ner", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-05-26 09:38:08", "latest_commit": "2023-07-21 14:10:18", "languages": [], "model_or_dataset": "model", "model_size": 0.336, "model_architectures": "BertForTokenClassification" }, { "description": "MPT-7B-inst このモデルは、MosaicMLのllm-foundryリポジトリを使用してmosaicml/mpt-7b-instructをファインチューニングしたモデルです。 ", "url": "https://huggingface.co/Jumtra/mpt-7b-inst", "project_name": "mpt-7b-inst", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-05-24 14:22:33", "latest_commit": "2023-06-26 01:09:06", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MPTForCausalLM" }, { "description": "Chat-Vector-LLaVA-v1.5-7b-JA Model Card Model detail Model type: Chat-Vector-LLaVA-v1.5-7b-JA is a vision-language model that can converse about input images in Japanese.", "url": "https://huggingface.co/toshi456/chat-vector-llava-v1.5-7b-ja", "project_name": "chat-vector-llava-v1.5-7b-ja", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-05-06 04:07:19", "latest_commit": "2024-05-06 11:33:32", "languages": [], "model_or_dataset": "model", "model_size": 7.06, "model_architectures": "LlavaLlamaForCausalLM" }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese Mixtral-8x7B-Instruct-v0.1-japaneseはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-04-17 02:56:15", "latest_commit": "2024-04-20 09:14:27", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM" }, { "description": "日本語でtrainingしたllama2 model size: 417.12M trainingは以下のscript参照https://github.com/Lightning-AI/lit-gpt/tree/main use from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained(\"if001/sentencepiece_ja\", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(\"if001/llama2_ja_small\")", "url": "https://huggingface.co/if001/llama2_ja_small", "project_name": "llama2_ja_small", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-10-11 09:11:41", "latest_commit": "2023-10-14 13:50:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "ベースモデル:cl-tohoku/bert-base-japanese-whole-word-masking データセット:llm-book/wrime-sentiment オプティマイザ: adafactor Optunaでハイパーパラメータ探索 学習率スケジュールのタイプ(lr_scheduler_type):", "url": "https://huggingface.co/A-Funakoshi/bert-base-japanese-v3-wrime-v1", "project_name": "bert-base-japanese-v3-wrime-v1", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-09-29 10:52:53", "latest_commit": "2023-10-25 22:58:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForSequenceClassification" }, { "description": "bert-large-japanese-v2-finetuned-wrime", "url": "https://huggingface.co/MuneK/bert-large-japanese-v2-finetuned-jed", "project_name": "bert-large-japanese-v2-finetuned-jed", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-09-11 09:37:42", "latest_commit": "2023-11-07 11:47:01", "languages": [], "model_or_dataset": "model", "model_size": 0.337, "model_architectures": "BertForSequenceClassification" }, { "description": "Model Card for Model ID このモデルはrinna/japanese-gpt-1bをベースモデルとして、 コンテキストからの抽出型QAと、解答を新たなコンテキストでリファインするための学習を行ったモデルです。 ", "url": "https://huggingface.co/oshizo/qa-refine-japanese-gpt-1b", "project_name": "qa-refine-japanese-gpt-1b", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2023-01-18 15:43:39", "latest_commit": "2023-01-19 10:14:36", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "calm3-22bを使って簡単な日本語の例文を作成したデータセットです。 ", "url": "https://huggingface.co/datasets/if001/elementray_m", "project_name": "elementray_m", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-09-25 11:22:49", "latest_commit": "2024-09-28 08:59:49", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "自動生成のマルチターンデータセット オープンなデータソースから、Calm3-22bを使ってQ&Aを自動生成したものです。 一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました。 データソース はじめの質問(q1)を、種々のデータソースから収集しました。その後のやりとりはすべて、Calmが生成しました。質問文については、元データのライセンスに準拠します。 oasst2-33k-ja apache 2.0 databricks-dolly-15k-ja cc-by-sa-3.0 minnade CC0 cyberagent/chatbot-arena-ja-calm2-7b-chat-experimental cc-by-4.0", "url": "https://huggingface.co/datasets/kanhatakeyama/AutoMultiTurnByCalm3-22B", "project_name": "AutoMultiTurnByCalm3-22B", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-07-17 09:53:20", "latest_commit": "2024-07-17 10:03:02", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "クイズの杜様に掲載のクイズのうち、2024年8月5日時点において取得可能だったクイズのうち「二次利用許諾レベル」が「フリー」であったものを収載したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/quiz-no-mori", "project_name": "quiz-no-mori", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-08-05 01:24:25", "latest_commit": "2024-08-05 08:04:34", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "mqaデータセットのquery--passageのペアについて重複を削除したデータセットです。 ", "url": "https://huggingface.co/datasets/hpprc/mqa-ja", "project_name": "mqa-ja", "downloads": 13, "source": "Hugging Face", "score": -0.08862039922349002, "first_commit": "2024-04-07 06:23:02", "latest_commit": "2024-04-07 15:16:42", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "japanese-gpt-1b This repository provides a 1.3B-parameter Japanese GPT model.", "url": "https://huggingface.co/yohida/yoshida_gpt", "project_name": "yoshida_gpt", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2022-02-04 10:03:54", "latest_commit": "2022-02-04 10:13:45", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2LMHeadModel" }, { "description": "nlp-waseda/roberta-large-japanese-with-auto-jumanpp Model description", "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-with-auto-jumanpp", "project_name": "roberta-large-japanese-with-auto-jumanpp", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2022-10-15 05:40:40", "latest_commit": "2022-10-21 15:55:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "RobertaForMaskedLM" }, { "description": "japanese-gpt2-medium-unidic This is a medium-sized Japanese GPT-2 model using BERT-like tokenizer.", "url": "https://huggingface.co/okazaki-lab/japanese-gpt2-medium-unidic", "project_name": "japanese-gpt2-medium-unidic", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-02-27 05:42:22", "latest_commit": "2023-03-22 06:22:32", "languages": [], "model_or_dataset": "model", "model_size": 0.362, "model_architectures": "GPT2LMHeadModel" }, { "description": "ku-accms/bert-base-japanese-ssuw Model description This is a pre-trained Japanese BERT base model for super short unit words (SSUW).", "url": "https://huggingface.co/ku-accms/bert-base-japanese-ssuw", "project_name": "bert-base-japanese-ssuw", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-04-11 13:57:30", "latest_commit": "2023-04-12 04:40:42", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "BertForMaskedLM" }, { "description": "ebisuke/liz-nojaloli-ja License MIT Licenseベースとしてrinna/japanese-gpt-neox-3.6bを使用しています。 ", "url": "https://huggingface.co/ebisuke/liz-nojaloli-ja", "project_name": "liz-nojaloli-ja", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-05-23 16:59:22", "latest_commit": "2023-05-30 16:01:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "Model Card for Model ID Original model elyza/ELYZA-japanese-Llama-2-7b-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese instruction.", "url": "https://huggingface.co/dahara1/ELYZA-japanese-Llama-2-7b-instruct-AWQ", "project_name": "ELYZA-japanese-Llama-2-7b-instruct-AWQ", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-09-08 08:35:31", "latest_commit": "2023-09-17 04:24:55", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "LlamaForCausalLM" }, { "description": "japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "project_name": "japanese-large-lm-3.6b-instruction-sft-4bit-32g-actorder_False", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-09-26 06:15:51", "latest_commit": "2023-09-27 23:56:05", "languages": [], "model_or_dataset": "model", "model_size": 0.861, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "project_name": "japanese-large-lm-1.7b-instruction-sft-4bit-32g-actorder_False", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-09-26 06:14:25", "latest_commit": "2023-09-27 01:23:34", "languages": [], "model_or_dataset": "model", "model_size": 0.487, "model_architectures": "GPT2LMHeadModel" }, { "description": "llm-jp-13b-instruct-lora-jaster-v1.0", "url": "https://huggingface.co/llm-jp/llm-jp-13b-instruct-lora-jaster-v1.0", "project_name": "llm-jp-13b-instruct-lora-jaster-v1.0", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-10-18 18:53:58", "latest_commit": "2023-10-20 08:41:20", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "japanese-novel-gpt-j-6b https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b\" に合計216個の評価の高いなろう小説、青空文庫、ウィキペディアなどの文章をQLoRA学習させた小説生成用モデルです。 ", "url": "https://huggingface.co/akineAItech/Jeneri-SAMA-6B", "project_name": "Jeneri-SAMA-6B", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-02-25 10:30:06", "latest_commit": "2024-03-16 15:00:14", "languages": [], "model_or_dataset": "model", "model_size": 6.05, "model_architectures": "GPTJForCausalLM" }, { "description": "Swallow-MoE-4x7B-lisa 概要 tokyotech-llm/Swallow-7b-hfをベースに、以下の4モデルをgate_mode=randomでMoEし、その後LISAという手法でインストラクションチューニングを施したモデルです。 ", "url": "https://huggingface.co/Aratako/Swallow-MoE-4x7B-lisa", "project_name": "Swallow-MoE-4x7B-lisa", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-04-02 01:20:21", "latest_commit": "2024-04-05 11:35:25", "languages": [], "model_or_dataset": "model", "model_size": 19.8, "model_architectures": "MixtralForCausalLM" }, { "description": "abeja-gpt2-large-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/abeja-gpt2-large-japanese-ud-causal", "project_name": "abeja-gpt2-large-japanese-ud-causal", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-09-08 02:06:47", "latest_commit": "2024-09-12 22:35:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "Japanese-Starling-ChatV-7B このモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", "url": "https://huggingface.co/AbeShinzo0708/Japanese-Starling-ChatV-7B-exl2", "project_name": "Japanese-Starling-ChatV-7B-exl2", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-04-22 09:34:13", "latest_commit": "2024-04-22 09:39:09", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "MistralForCausalLM" }, { "description": "This model was created by merging intfloat/e5-mistral-7b-instruct and stabilityai/japanese-stablelm-base-gamma-7b.", "url": "https://huggingface.co/oshizo/japanese-e5-mistral-7b_slerp", "project_name": "japanese-e5-mistral-7b_slerp", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-01-04 12:33:19", "latest_commit": "2024-01-05 15:48:24", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralModel" }, { "description": "spekulatius マージしているとたまに出てくる「目的の意図とは違うのだけどなんだか消すにはもったいないモデル」をおすそ分けするシリーズです。 ", "url": "https://huggingface.co/Lasorco/spekulatius", "project_name": "spekulatius", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-10-24 13:56:21", "latest_commit": "2023-10-26 04:21:35", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "A pretrained Japanese TTS model intended for use in VITS-JaPros-WebUI.", "url": "https://huggingface.co/litagin/vits-japros-pretrained", "project_name": "vits-japros-pretrained", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-09-30 00:16:22", "latest_commit": "2023-10-11 09:55:47", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Allganize RAG Leaderboard とは Allganize RAG Leaderboard は、5つの業種ドメイン(金融、情報通信、製造、公共、流通・小売)において、日本語のRAGの性能評価を実施したものです。", "url": "https://huggingface.co/datasets/allganize/RAG-Evaluation-Dataset-JA", "project_name": "RAG-Evaluation-Dataset-JA", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-09-03 09:00:27", "latest_commit": "2024-09-13 00:53:44", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "データセット概要 手動で作成したDatabricksに関する質問と回答ペアの日本語データセットです。 ", "url": "https://huggingface.co/datasets/yulanfmy/databricks-qa-ja", "project_name": "databricks-qa-ja", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2023-05-15 13:27:23", "latest_commit": "2023-05-15 14:55:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Yandere2023:", "url": "https://huggingface.co/datasets/nyanko7/yandere2023", "project_name": "yandere2023", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-01-07 10:31:53", "latest_commit": "2024-05-06 08:22:23", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "alpaca_jp_python alpaca_jp_pythonは、 Stanford Alpacaの手法 mistralai/Mixtral-8x22B-Instruct-v0.1 で作った合成データ(Synthetic data)です。", "url": "https://huggingface.co/datasets/HachiML/alpaca_jp_python", "project_name": "alpaca_jp_python", "downloads": 12, "source": "Hugging Face", "score": -0.08862941024146155, "first_commit": "2024-05-16 02:02:09", "latest_commit": "2024-05-20 01:44:32", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "friendly_JA-Model (T5 fine-tuned model) MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon Examples input output 最適化を応用した機械翻訳モデルは高精度だ オプティマイゼーションを応用したマシントランスレーションモデルは高いアキュラシーだ 彼は架空の世界に住んでいる 彼はイマジナリー世界に住んでいる 新型コロナウイルスに感染してしまった コロナウイルスにかかってしまった 深層学習は難しい ディープラーニングはむずかしい 新たな概念を紹介する 新しいコンセプトを紹介する 津波の警報が流れた ツナミのアラートが流れた 南海トラフの災害は震源地による 南海トラフのディザスターはエピ", "url": "https://huggingface.co/astremo/friendly_JA", "project_name": "friendly_JA", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2022-01-10 06:31:18", "latest_commit": "2022-05-22 14:57:21", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "T5ForConditionalGeneration" }, { "description": "Japanese transformer pipeline (bert-base).", "url": "https://huggingface.co/hiroshi-matsuda-rit/ja_gsd_bert_wwm_unidic_lite", "project_name": "ja_gsd_bert_wwm_unidic_lite", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2021-07-08 12:11:06", "latest_commit": "2021-08-11 20:25:04", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Model Card Summary This model was trained using H2O LLM Studio.", "url": "https://huggingface.co/yukismd/JapaneseQuizChatbot_v1", "project_name": "JapaneseQuizChatbot_v1", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-06-08 00:25:01", "latest_commit": "2023-06-08 00:48:50", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPTNeoXForCausalLM" }, { "description": "ebisuke/liz-nojaloli-nxja-ja License MIT ベースとしてabeja/gpt-neox-japanese-2.7bを使用しています。 ", "url": "https://huggingface.co/ebisuke/liz-nojaloli-nxja-ja", "project_name": "liz-nojaloli-nxja-ja", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-06-02 02:12:20", "latest_commit": "2023-06-09 02:07:42", "languages": [], "model_or_dataset": "model", "model_size": 2.6, "model_architectures": "GPTNeoXJapaneseForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-AWQ", "project_name": "japanese-stablelm-instruct-beta-7B-AWQ", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-11-03 01:04:31", "latest_commit": "2023-11-09 18:16:12", "languages": [], "model_or_dataset": "model", "model_size": 1.13, "model_architectures": "LlamaForCausalLM" }, { "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1 English description here 概要 Llama-2ベースの学習済み日本語モデルであるelyza/ELYZA-japanese-Llama-2-13bと、そのinstruction tuningモデルであるelyza/ELYZA-japanese-Llama-2-13b-instruct を、mergekitを使ってMoEを行い作成したモデルです。 ", "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1", "project_name": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-03-03 09:25:37", "latest_commit": "2024-03-19 02:34:53", "languages": [], "model_or_dataset": "model", "model_size": 21.5, "model_architectures": "MixtralForCausalLM" }, { "description": "swallow-hermes-st-v1 物語作成に強めなモデルが出来ないかと考えて作ったモデルです。", "url": "https://huggingface.co/napopoa32/swallow-hermes-st-v1", "project_name": "swallow-hermes-st-v1", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-03-24 06:19:48", "latest_commit": "2024-03-26 12:36:41", "languages": [], "model_or_dataset": "model", "model_size": 7.33, "model_architectures": "MistralForCausalLM" }, { "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha Mixtral-8x7B-Instruct-v0.1-japanese-alphaはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルです。", "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese-alpha", "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-04-16 07:52:55", "latest_commit": "2024-04-20 09:14:43", "languages": [], "model_or_dataset": "model", "model_size": 46.9, "model_architectures": "MixtralForCausalLM" }, { "description": "sehiro/EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "url": "https://huggingface.co/sehiro/EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "project_name": "EvoLLM-JP-A-v1-7B-IQ4_XS-GGUF", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-09-15 03:54:05", "latest_commit": "2024-09-15 03:54:23", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": null }, { "description": "rinna-gpt2-xsmall-japanese-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/rinna-gpt2-xsmall-japanese-ud-causal", "project_name": "rinna-gpt2-xsmall-japanese-ud-causal", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-09-07 07:29:26", "latest_commit": "2024-09-12 22:22:40", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "gpt2-medium-japanese-unidic-ud-causal Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-medium-japanese-unidic-ud-causal", "project_name": "gpt2-medium-japanese-unidic-ud-causal", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-08-30 13:44:48", "latest_commit": "2024-08-30 22:48:46", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "gpt2-small-japanese-juman-upos Model Description", "url": "https://huggingface.co/KoichiYasuoka/gpt2-small-japanese-juman-upos", "project_name": "gpt2-small-japanese-juman-upos", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-08-30 10:23:29", "latest_commit": "2024-09-12 22:49:59", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "GPT2ForTokenClassification" }, { "description": "This model is a voice clone of myself created specifically for Style Bert VITS2.", "url": "https://huggingface.co/ThePioneer/MyVoiceClone-Style-Bert-VITS2", "project_name": "MyVoiceClone-Style-Bert-VITS2", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-02-29 19:34:12", "latest_commit": "2024-03-04 10:43:27", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "Japanese Stable LM Instruct Gamma 7B +", "url": "https://huggingface.co/ohwi/japanese-stablelm-instruct-gamma-7b-dpo-uf-v0", "project_name": "japanese-stablelm-instruct-gamma-7b-dpo-uf-v0", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-12-21 14:10:58", "latest_commit": "2023-12-23 06:53:31", "languages": [], "model_or_dataset": "model", "model_size": 7.24, "model_architectures": "MistralForCausalLM" }, { "description": "Chat & support: TheBloke's Discord server Want to contribute?", "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-GPTQ", "project_name": "japanese-stablelm-instruct-gamma-7B-GPTQ", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-10-28 19:03:17", "latest_commit": "2023-10-28 20:24:40", "languages": [], "model_or_dataset": "model", "model_size": 1.2, "model_architectures": "MistralForCausalLM" }, { "description": "Wav2Vec2-XLS-R-300M-Japanese-Hiragana Fine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using the Common Voice and JSUT.", "url": "https://huggingface.co/slplab/wav2vec2-xls-r-300m-japanese-hiragana", "project_name": "wav2vec2-xls-r-300m-japanese-hiragana", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2022-09-16 07:34:58", "latest_commit": "2022-09-16 11:01:54", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": "Wav2Vec2ForCTC" }, { "description": "Example ESPnet2 TTS model kan-bayashi/jsut_fastspeech ♻", "url": "https://huggingface.co/espnet/kan-bayashi_jsut_fastspeech", "project_name": "kan-bayashi_jsut_fastspeech", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2021-07-03 14:44:06", "latest_commit": "2021-07-03 10:44:10", "languages": [], "model_or_dataset": "model", "model_size": null, "model_architectures": null }, { "description": "更新情報 日本語機能とinstructベクトルのバランス調整したver.2をアップロードしましたSwallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2 モデル概要 Swallow-MX-8x7b-NVE-v0.1に対し、 Mixtral-8x7B-Instruct-v0.1とMixtral-8x7B-v0.1の差分をマージしたモデルです。 ", "url": "https://huggingface.co/aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct", "project_name": "Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-03-20 16:15:26", "latest_commit": "2024-03-23 04:14:49", "languages": [], "model_or_dataset": "model", "model_size": 46.7, "model_architectures": "MixtralForCausalLM" }, { "description": "Wikipediaを用いた日本語の固有表現抽出データセット GitHub: https://github.com/stockmarkteam/ner-wikipedia-dataset/ LICENSE: CC-BY-SA 3.0 Developed by Stockmark Inc.", "url": "https://huggingface.co/datasets/stockmark/ner-wikipedia-dataset", "project_name": "ner-wikipedia-dataset", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-09-02 14:38:55", "latest_commit": "2023-09-02 14:42:18", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Japanese-Vietnamese Translated Sentence Pairs.", "url": "https://huggingface.co/datasets/dichmau/ja_vi_translation", "project_name": "ja_vi_translation", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-03-13 17:52:50", "latest_commit": "2024-04-08 19:35:06", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Dataset Details Dataset Type:Japanese LLaVA Pretrain is a localized version of the original LLaVA Pretrain dataset.", "url": "https://huggingface.co/datasets/turing-motors/LLaVA-Pretrain-JA", "project_name": "LLaVA-Pretrain-JA", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-04-10 05:07:24", "latest_commit": "2024-04-12 09:15:37", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Synthetic-JP-EN-Coding-Dataset-Magpie-69k Magpieの手法を様々なモデルに対して適用し作成した、約69000件の日本語・英語のコーディング対話データセットです。 ", "url": "https://huggingface.co/datasets/Aratako/Synthetic-JP-EN-Coding-Dataset-Magpie-69k", "project_name": "Synthetic-JP-EN-Coding-Dataset-Magpie-69k", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-07-11 10:19:45", "latest_commit": "2024-07-11 12:07:01", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "Introduction This is a LLM-filtered set of the first 1M rows from ntt's JParaCrawl v3 large English-Japanese parallel corpus.", "url": "https://huggingface.co/datasets/Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", "project_name": "JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2024-03-01 06:17:09", "latest_commit": "2024-03-07 21:20:21", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "OpenOrcaデータセットの日本語翻訳版です https://huggingface.co/datasets/Open-Orca/OpenOrca 現在翻訳作業が続行中で、OpenOrca全体の1/5程度の翻訳が終わった状態でひとまず公開します。", "url": "https://huggingface.co/datasets/shi3z/OpenOrcaJapanese", "project_name": "OpenOrcaJapanese", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-10-27 06:15:27", "latest_commit": "2023-10-28 02:50:27", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "For the English version, please click here. ", "url": "https://huggingface.co/datasets/sakusakumura/databricks-dolly-15k-ja-scored", "project_name": "databricks-dolly-15k-ja-scored", "downloads": 11, "source": "Hugging Face", "score": -0.08863842125943308, "first_commit": "2023-06-27 09:14:41", "latest_commit": "2023-06-27 09:18:39", "languages": [], "model_or_dataset": "dataset", "model_size": null, "model_architectures": null }, { "description": "External dictionary importer for Yomichan.", "url": "https://github.com/FooSoft/yomichan-import", "project_name": "yomichan-import", "stargazers_count": 82, "source": "GitHub", "score": -0.09015208701999117, "first_commit": "2016-07-26 20:24:33", "latest_commit": "2023-02-25 12:43:03", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "日英変換・英語略語展開のための IME 追加辞書 orange_book 日本語から英語への和英変換や英語略語の展開を Google 日本語入力や ATOK などで可能にする IME 拡張辞書", "url": "https://github.com/peaceiris/google-ime-dictionary", "project_name": "google-ime-dictionary", "stargazers_count": 82, "source": "GitHub", "score": -0.09015208701999117, "first_commit": "2018-09-13 01:54:32", "latest_commit": "2023-01-16 10:47:31", "languages": [], "model_or_dataset": "dataset" }, { "description": "databricks/dolly-v2-12b の学習データに使用されたdatabricks-dolly-15k.jsonl を日本語に翻訳したデータセットになります。", "url": "https://github.com/kunishou/databricks-dolly-15k-ja", "project_name": "databricks-dolly-15k-ja", "stargazers_count": 81, "source": "GitHub", "score": -0.09312273479563814, "first_commit": "2023-04-14 23:43:27", "latest_commit": "2023-07-26 00:08:32", "languages": [], "model_or_dataset": "dataset" }, { "description": "Python library for CJK (Chinese, Japanese, and Korean) language dictionary", "url": "https://github.com/cihai/cihai", "project_name": "cihai", "stargazers_count": 80, "source": "GitHub", "score": -0.09609338257128511, "first_commit": "2013-12-03 09:42:52", "latest_commit": "2024-08-10 13:13:53", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語文字変換ライブラリ (javascript)", "url": "https://github.com/kazuhikoarase/jaconv", "project_name": "jaconv", "stargazers_count": 80, "source": "GitHub", "score": -0.09609338257128511, "first_commit": "2016-10-22 05:22:02", "latest_commit": "2024-01-20 05:35:13", "languages": [ "TypeScript", "JavaScript", "Java" ], "model_or_dataset": null }, { "description": "Neologism dictionary based on the language resources on the Web for mecab-unidic", "url": "https://github.com/neologd/mecab-unidic-neologd", "project_name": "mecab-unidic-neologd", "stargazers_count": 80, "source": "GitHub", "score": -0.09609338257128511, "first_commit": "2015-03-19 10:52:02", "latest_commit": "2020-09-14 19:58:39", "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese Dictionary", "url": "https://github.com/gojp/nihongo", "project_name": "nihongo", "stargazers_count": 78, "source": "GitHub", "score": -0.10203467812257905, "first_commit": "2013-09-03 00:22:50", "latest_commit": "2024-02-07 18:36:24", "languages": [ "Go", "Python" ], "model_or_dataset": null }, { "description": "Japanese SKK input method library", "url": "https://github.com/ueno/libskk", "project_name": "libskk", "stargazers_count": 78, "source": "GitHub", "score": -0.10203467812257905, "first_commit": "2011-10-05 18:18:07", "latest_commit": "2024-09-02 12:09:00", "languages": [ "C" ], "model_or_dataset": "dataset" }, { "description": "A large parallel corpus of English and Japanese", "url": "https://github.com/rpryzant/JESC", "project_name": "JESC", "stargazers_count": 78, "source": "GitHub", "score": -0.10203467812257905, "first_commit": "2017-10-25 07:41:35", "latest_commit": "2017-10-31 21:08:56", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Japanese Morphological Analysis written in Rust", "url": "https://github.com/Leko/goya", "project_name": "goya", "stargazers_count": 77, "source": "GitHub", "score": -0.10500532589822602, "first_commit": "2021-09-08 19:51:11", "latest_commit": "2021-12-30 19:44:15", "languages": [ "Rust", "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "Japanese tokenizer for Transformers", "url": "https://github.com/WorksApplications/SudachiTra", "project_name": "SudachiTra", "stargazers_count": 77, "source": "GitHub", "score": -0.10500532589822602, "first_commit": "2021-06-22 19:48:29", "latest_commit": "2023-12-15 08:13:45", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "UNICODE絵文字の日本語読み/キーワード/分類辞書", "url": "https://github.com/yagays/emoji-ja", "project_name": "emoji-ja", "stargazers_count": 77, "source": "GitHub", "score": -0.10500532589822602, "first_commit": "2018-08-24 08:25:08", "latest_commit": "2023-05-09 14:57:44", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Kyoto University Web Document Leads Corpus", "url": "https://github.com/ku-nlp/KWDLC", "project_name": "KWDLC", "stargazers_count": 75, "source": "GitHub", "score": -0.11094662144951996, "first_commit": "2015-05-20 19:13:17", "latest_commit": "2023-12-18 14:13:14", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "LLM構築用の日本語チャットデータセット", "url": "https://github.com/masanorihirano/llm-japanese-dataset", "project_name": "llm-japanese-dataset", "stargazers_count": 75, "source": "GitHub", "score": -0.11094662144951996, "first_commit": "2023-04-19 14:34:02", "latest_commit": "2024-01-23 09:37:30", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Japanese Realistic Textual Entailment Corpus (NLP 2020, LREC 2020)", "url": "https://github.com/megagonlabs/jrte-corpus", "project_name": "jrte-corpus", "stargazers_count": 75, "source": "GitHub", "score": -0.11094662144951996, "first_commit": "2020-10-15 14:59:37", "latest_commit": "2023-06-23 14:06:26", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "get japanese manga from url to translate manga image", "url": "https://github.com/ttop32/JMTrans", "project_name": "JMTrans", "stargazers_count": 74, "source": "GitHub", "score": -0.11391726922516693, "first_commit": "2020-08-19 23:30:03", "latest_commit": "2021-01-16 21:44:37", "languages": [ "Python", "C#" ], "model_or_dataset": null }, { "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", "url": "https://github.com/reppy4620/Dialog", "project_name": "Dialog", "stargazers_count": 73, "source": "GitHub", "score": -0.1168879170008139, "first_commit": "2019-09-12 13:05:51", "latest_commit": "2020-10-01 17:25:08", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "JMdict, JMnedict, KANJIDIC for Yomitan/Yomichan.", "url": "https://github.com/themoeway/jmdict-yomitan", "project_name": "jmdict-yomitan", "stargazers_count": 73, "source": "GitHub", "score": -0.1168879170008139, "first_commit": "2023-09-24 20:01:40", "latest_commit": "2024-07-30 10:44:53", "languages": [ "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "「言語処理100本ノック 2020」をPythonで解く", "url": "https://github.com/upura/nlp100v2020", "project_name": "nlp100v2020", "stargazers_count": 73, "source": "GitHub", "score": -0.1168879170008139, "first_commit": "2020-04-07 20:23:08", "latest_commit": "2023-11-05 23:36:07", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Laboro BERT Japanese: Japanese BERT Pre-Trained With Web-Corpus", "url": "https://github.com/laboroai/Laboro-BERT-Japanese", "project_name": "Laboro-BERT-Japanese", "stargazers_count": 72, "source": "GitHub", "score": -0.11985856477646087, "first_commit": "2020-03-31 12:05:07", "latest_commit": "2022-05-12 17:06:31", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Resembla: Word-based Japanese similar sentence search library", "url": "https://github.com/tuem/resembla", "project_name": "resembla", "stargazers_count": 71, "source": "GitHub", "score": -0.12282921255210784, "first_commit": "2017-07-24 17:07:39", "latest_commit": "2019-01-27 00:08:52", "languages": [ "C++", "Python", "Ruby" ], "model_or_dataset": null }, { "description": "Unidic packaged for installation via pip.", "url": "https://github.com/polm/unidic-py", "project_name": "unidic-py", "stargazers_count": 71, "source": "GitHub", "score": -0.12282921255210784, "first_commit": "2020-01-05 16:19:49", "latest_commit": "2023-06-16 20:50:30", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Utility scripts for preprocessing Wikipedia texts for NLP", "url": "https://github.com/singletongue/wikipedia-utils", "project_name": "wikipedia-utils", "stargazers_count": 71, "source": "GitHub", "score": -0.12282921255210784, "first_commit": "2022-01-09 16:42:14", "latest_commit": "2024-04-10 08:41:09", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "TinySegmenter用の学習モデルを自作するためのツール.", "url": "https://github.com/shogo82148/TinySegmenterMaker", "project_name": "TinySegmenterMaker", "stargazers_count": 69, "source": "GitHub", "score": -0.1287705081034018, "first_commit": "2012-11-15 22:24:06", "latest_commit": "2022-09-30 13:41:19", "languages": [ "Python", "JavaScript", "C++", "Perl", "Go", "Julia", "Ruby", "Java", "C#" ], "model_or_dataset": null }, { "description": "text-only archives of www.aozora.gr.jp", "url": "https://github.com/aozorahack/aozorabunko_text", "project_name": "aozorabunko_text", "stargazers_count": 69, "source": "GitHub", "score": -0.1287705081034018, "first_commit": "2019-02-11 03:06:07", "latest_commit": "2023-03-22 01:21:29", "languages": [ "Ruby" ], "model_or_dataset": "dataset" }, { "description": "5chの過去ログをスクレイピングして、過去流行った単語(ex, 香具師, orz)などを追跡調査", "url": "https://github.com/GINK03/5ch-analysis", "project_name": "5ch-analysis", "stargazers_count": 67, "source": "GitHub", "score": -0.13471180365469573, "first_commit": "2018-11-11 16:58:44", "latest_commit": "2018-11-11 23:37:16", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "and Romaji", "url": "https://github.com/PSeitz/wana_kana_rust", "project_name": "wana_kana_rust", "stargazers_count": 67, "source": "GitHub", "score": -0.13471180365469573, "first_commit": "2018-02-02 18:39:03", "latest_commit": "2023-01-19 21:51:26", "languages": [ "Rust", "JavaScript" ], "model_or_dataset": null }, { "description": "Japanese CLIP by rinna Co., Ltd.", "url": "https://github.com/rinnakk/japanese-clip", "project_name": "japanese-clip", "stargazers_count": 66, "source": "GitHub", "score": -0.1376824514303427, "first_commit": "2022-04-25 17:19:28", "latest_commit": "2022-07-19 18:20:52", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "ニコニコ大百科とピクシブ百科事典の共通部分のIME辞書", "url": "https://github.com/ncaq/dic-nico-intersection-pixiv", "project_name": "dic-nico-intersection-pixiv", "stargazers_count": 66, "source": "GitHub", "score": -0.1376824514303427, "first_commit": "2017-03-09 08:44:44", "latest_commit": "2024-04-19 13:29:16", "languages": [ "Haskell" ], "model_or_dataset": "dataset" }, { "description": "マウスオーバーした単語を自動で読み取る汎用辞書ツール", "url": "https://github.com/kengo700/mouse_over_dictionary", "project_name": "mouse_over_dictionary", "stargazers_count": 66, "source": "GitHub", "score": -0.1376824514303427, "first_commit": "2020-01-09 20:26:17", "latest_commit": "2020-01-24 08:57:39", "languages": [ "C++" ], "model_or_dataset": "dataset" }, { "description": "The Business Scene Dialogue corpus", "url": "https://github.com/tsuruoka-lab/BSD", "project_name": "BSD", "stargazers_count": 66, "source": "GitHub", "score": -0.1376824514303427, "first_commit": "2020-07-25 01:04:11", "latest_commit": "2021-11-10 21:33:34", "languages": [], "model_or_dataset": "dataset" }, { "description": "Neural IME: Neural Input Method Engine", "url": "https://github.com/yohokuno/neural_ime", "project_name": "neural_ime", "stargazers_count": 65, "source": "GitHub", "score": -0.14065309920598967, "first_commit": "2016-10-31 15:23:42", "latest_commit": "2016-12-27 21:10:30", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "SKK (Simple Kana Kanji henkan) library", "url": "https://github.com/naokiri/cskk", "project_name": "cskk", "stargazers_count": 65, "source": "GitHub", "score": -0.14065309920598967, "first_commit": "2018-06-17 15:36:26", "latest_commit": "2024-03-10 13:45:41", "languages": [ "Rust", "C" ], "model_or_dataset": null }, { "description": "ひらがなIME for IBus", "url": "https://github.com/esrille/ibus-hiragana", "project_name": "ibus-hiragana", "stargazers_count": 64, "source": "GitHub", "score": -0.14362374698163663, "first_commit": "2017-04-28 03:50:59", "latest_commit": "2024-08-15 04:09:53", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "デジタル化資料OCRテキスト化事業において作成されたOCR学習用データセット", "url": "https://github.com/ndl-lab/pdmocrdataset-part1", "project_name": "pdmocrdataset-part1", "stargazers_count": 64, "source": "GitHub", "score": -0.14362374698163663, "first_commit": "2022-04-20 11:55:33", "latest_commit": "2024-06-26 16:10:44", "languages": [], "model_or_dataset": "dataset" }, { "description": "japanese sentence segmentation library for python", "url": "https://github.com/wwwcojp/ja_sentence_segmenter", "project_name": "ja_sentence_segmenter", "stargazers_count": 63, "source": "GitHub", "score": -0.1465943947572836, "first_commit": "2019-12-15 13:50:07", "latest_commit": "2023-04-03 13:09:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This repository has implementations of data augmentation for NLP for Japanese.", "url": "https://github.com/kajyuuen/daaja", "project_name": "daaja", "stargazers_count": 63, "source": "GitHub", "score": -0.1465943947572836, "first_commit": "2022-02-12 20:22:34", "latest_commit": "2023-02-16 19:39:30", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Samples codes for natural language processing in Japanese", "url": "https://github.com/upura/nlp-recipes-ja", "project_name": "nlp-recipes-ja", "stargazers_count": 63, "source": "GitHub", "score": -0.1465943947572836, "first_commit": "2020-08-01 09:09:07", "latest_commit": "2021-04-11 08:07:45", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "spaCy tutorial in English and Japanese. spacy-transformers, BERT, GiNZA.", "url": "https://github.com/yuibi/spacy_tutorial", "project_name": "spacy_tutorial", "stargazers_count": 63, "source": "GitHub", "score": -0.1465943947572836, "first_commit": "2019-12-29 04:28:30", "latest_commit": "2020-01-24 20:02:24", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "GPTがYouTuberをやります", "url": "https://github.com/karakuri-ai/gptuber-by-langchain", "project_name": "gptuber-by-langchain", "stargazers_count": 62, "source": "GitHub", "score": -0.14956504253293057, "first_commit": "2023-01-07 00:37:20", "latest_commit": "2023-01-07 00:37:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "GPTがYouTuberをやります", "url": "https://github.com/karakuri-ai/gptuber-by-langchain", "project_name": "gptuber-by-langchain", "stargazers_count": 62, "source": "GitHub", "score": -0.14956504253293057, "first_commit": "2023-01-07 00:37:20", "latest_commit": "2023-01-07 00:37:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "ぷるーふおぶこんせぷと で公開した機械翻訳エンジンを利用する翻訳環境です。 フォームに入力された文字列の翻訳、PDFの翻訳が可能です。", "url": "https://github.com/s-taka/fugumt", "project_name": "fugumt", "stargazers_count": 61, "source": "GitHub", "score": -0.15253569030857755, "first_commit": "2021-01-02 20:35:49", "latest_commit": "2021-02-28 11:46:52", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Java library and command-line tool to transliterate Japanese kanji to romaji (Latin alphabet)", "url": "https://github.com/nicolas-raoul/jakaroma", "project_name": "jakaroma", "stargazers_count": 61, "source": "GitHub", "score": -0.15253569030857755, "first_commit": "2016-04-11 18:21:38", "latest_commit": "2021-03-30 23:21:16", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "Pure Python Japanese address geocoder", "url": "https://github.com/t-sagara/jageocoder", "project_name": "jageocoder", "stargazers_count": 60, "source": "GitHub", "score": -0.1555063380842245, "first_commit": "2021-02-20 17:31:56", "latest_commit": "2024-07-03 06:01:58", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Project of llm evaluation to Japanese tasks", "url": "https://github.com/wandb/llm-leaderboard", "project_name": "llm-leaderboard", "stargazers_count": 60, "source": "GitHub", "score": -0.1555063380842245, "first_commit": "2023-06-27 15:09:25", "latest_commit": "2024-08-07 00:38:16", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "OpenAIのChatGPT APIをSlack上で利用するためのSlackbotスクリプト (日本語での利用が前提)", "url": "https://github.com/sifue/chatgpt-slackbot", "project_name": "chatgpt-slackbot", "stargazers_count": 60, "source": "GitHub", "score": -0.1555063380842245, "first_commit": "2022-12-06 22:50:09", "latest_commit": "2024-07-22 18:50:41", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese IOB2 tagged corpus for Named Entity Recognition.", "url": "https://github.com/Hironsan/IOB2Corpus", "project_name": "IOB2Corpus", "stargazers_count": 60, "source": "GitHub", "score": -0.1555063380842245, "first_commit": "2016-01-29 09:21:25", "latest_commit": "2020-02-25 09:34:11", "languages": [], "model_or_dataset": "dataset" }, { "description": "This repo contains a list of the 44,998 most common Japanese words in order of frequency, as determined by the University of Leeds Corpus.", "url": "https://github.com/hingston/japanese", "project_name": "japanese", "stargazers_count": 60, "source": "GitHub", "score": -0.1555063380842245, "first_commit": "2018-09-13 21:10:10", "latest_commit": "2018-09-13 22:02:23", "languages": [], "model_or_dataset": "dataset" }, { "description": "Converts Japanese Numerals into number", "url": "https://github.com/twada/japanese-numerals-to-number", "project_name": "japanese-numerals-to-number", "stargazers_count": 58, "source": "GitHub", "score": -0.16144763363551848, "first_commit": "2017-02-25 22:53:18", "latest_commit": "2023-02-17 01:34:12", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Exploring Japanese SimCSE", "url": "https://github.com/hpprc/simple-simcse-ja", "project_name": "simple-simcse-ja", "stargazers_count": 57, "source": "GitHub", "score": -0.16441828141116543, "first_commit": "2022-11-11 19:05:53", "latest_commit": "2023-10-31 14:18:17", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Safe Rust bindings for mecab a part-of-speech and morphological analyzer library", "url": "https://github.com/tsurai/mecab-rs", "project_name": "mecab-rs", "stargazers_count": 55, "source": "GitHub", "score": -0.17035957696245937, "first_commit": "2015-04-19 09:30:14", "latest_commit": "2023-09-03 22:03:49", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Extractive summarizer using BertSum as summarization model", "url": "https://github.com/neilctwu/YouyakuMan", "project_name": "YouyakuMan", "stargazers_count": 53, "source": "GitHub", "score": -0.17630087251375332, "first_commit": "2019-10-29 17:43:01", "latest_commit": "2020-09-02 13:37:05", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Google 日本語入力用DvorakJPローマ字テーブル / DvorakJP Roman Table for Google Japanese Input", "url": "https://github.com/shinespark/dvorakjp-romantable", "project_name": "dvorakjp-romantable", "stargazers_count": 53, "source": "GitHub", "score": -0.17630087251375332, "first_commit": "2015-10-11 16:49:41", "latest_commit": "2024-06-24 12:24:34", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Convert external words into Mozc system dictionary", "url": "https://github.com/reasonset/mozcdict-ext", "project_name": "mozcdict-ext", "stargazers_count": 52, "source": "GitHub", "score": -0.1792715202894003, "first_commit": "2023-01-12 18:13:26", "latest_commit": "2024-07-13 17:37:43", "languages": [ "Ruby" ], "model_or_dataset": "dataset" }, { "description": "Japanese Language Model Financial Evaluation Harness", "url": "https://github.com/pfnet-research/japanese-lm-fin-harness", "project_name": "japanese-lm-fin-harness", "stargazers_count": 51, "source": "GitHub", "score": -0.18224216806504726, "first_commit": "2023-09-28 10:48:05", "latest_commit": "2024-08-07 13:01:57", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Kanji transliteration to hiragana/katakana/romaji, in Java", "url": "https://github.com/nicolas-raoul/kakasi-java", "project_name": "kakasi-java", "stargazers_count": 51, "source": "GitHub", "score": -0.18224216806504726, "first_commit": "2012-01-18 17:33:15", "latest_commit": "2016-04-13 15:56:34", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "Japanese synonym library", "url": "https://github.com/WorksApplications/chikkarpy", "project_name": "chikkarpy", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2021-05-24 17:10:56", "latest_commit": "2022-02-07 15:11:36", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "jp-localization", "url": "https://github.com/yantaisa11/Retrieval-based-Voice-Conversion-WebUI-JP-localization", "project_name": "Retrieval-based-Voice-Conversion-WebUI-JP-localization", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2023-03-27 17:59:11", "latest_commit": "2023-04-11 11:08:47", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Lindera tokenizer for Tantivy.", "url": "https://github.com/lindera-morphology/lindera-tantivy", "project_name": "lindera-tantivy", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2020-02-19 10:37:34", "latest_commit": "2023-12-02 21:03:48", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Kuromoji morphological analyzer for kuroshiro.", "url": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji", "project_name": "kuroshiro-analyzer-kuromoji", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2018-03-09 17:41:37", "latest_commit": "2018-08-05 12:41:55", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "JLPT level tags for words in Yomichan", "url": "https://github.com/stephenmk/yomichan-jlpt-vocab", "project_name": "yomichan-jlpt-vocab", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2021-09-01 18:36:57", "latest_commit": "2023-04-06 22:29:12", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "モーラバランス型日本語コーパス", "url": "https://github.com/mmorise/rohan4600", "project_name": "rohan4600", "stargazers_count": 50, "source": "GitHub", "score": -0.18521281584069424, "first_commit": "2021-07-31 23:43:43", "latest_commit": "2023-02-01 12:51:29", "languages": [], "model_or_dataset": "dataset" }, { "description": "fasttextとword2vecの比較と、実行スクリプト、学習スクリプトです", "url": "https://github.com/GINK03/fasttext-vs-word2vec-on-twitter-data", "project_name": "fasttext-vs-word2vec-on-twitter-data", "stargazers_count": 49, "source": "GitHub", "score": -0.1881834636163412, "first_commit": "2017-03-30 23:10:04", "latest_commit": "2017-08-23 10:53:09", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Python Implementation of EmbedRank", "url": "https://github.com/yagays/embedrank", "project_name": "embedrank", "stargazers_count": 48, "source": "GitHub", "score": -0.19115411139198818, "first_commit": "2019-02-01 11:40:50", "latest_commit": "2019-03-19 09:05:41", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Japanese LLaMa experiment", "url": "https://github.com/lighttransport/japanese-llama-experiment", "project_name": "japanese-llama-experiment", "stargazers_count": 48, "source": "GitHub", "score": -0.19115411139198818, "first_commit": "2023-06-28 17:43:53", "latest_commit": "2024-03-10 23:31:45", "languages": [ "Python", "C", "C++" ], "model_or_dataset": "model" }, { "description": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages", "url": "https://github.com/KoichiYasuoka/esupar", "project_name": "esupar", "stargazers_count": 47, "source": "GitHub", "score": -0.19412475916763514, "first_commit": "2021-09-18 07:28:30", "latest_commit": "2024-08-15 20:37:47", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "This repository supports YuzuAI's Rakuda leaderboard of Japanese LLMs, which is a Japanese-focused analogue of LMSYS' Vicuna eval.", "url": "https://github.com/yuzu-ai/japanese-llm-ranking", "project_name": "japanese-llm-ranking", "stargazers_count": 47, "source": "GitHub", "score": -0.19412475916763514, "first_commit": "2023-06-28 18:31:52", "latest_commit": "2024-03-04 18:17:06", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "tokenizer specified for Japanese", "url": "https://github.com/SamuraiT/tinysegmenter", "project_name": "tinysegmenter", "stargazers_count": 47, "source": "GitHub", "score": -0.19412475916763514, "first_commit": "2014-07-04 17:23:23", "latest_commit": "2015-11-03 21:49:19", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Javascript libraries to process text: Arabic, Japanese, etc.", "url": "https://github.com/kariminf/jslingua", "project_name": "jslingua", "stargazers_count": 47, "source": "GitHub", "score": -0.19412475916763514, "first_commit": "2016-03-22 10:52:37", "latest_commit": "2023-10-19 22:01:23", "languages": [ "JavaScript", "Python" ], "model_or_dataset": null }, { "description": "LLaVA-JP is a Japanese VLM trained by LLaVA method", "url": "https://github.com/tosiyuki/LLaVA-JP", "project_name": "LLaVA-JP", "stargazers_count": 46, "source": "GitHub", "score": -0.19709540694328212, "first_commit": "2023-12-01 12:26:17", "latest_commit": "2024-06-05 23:42:35", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "BERT and ELECTRA models of PyTorch implementations for Japanese text.", "url": "https://github.com/retarfi/language-pretraining", "project_name": "language-pretraining", "stargazers_count": 46, "source": "GitHub", "score": -0.19709540694328212, "first_commit": "2021-07-07 12:07:22", "latest_commit": "2023-05-19 23:15:30", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "このサンプルでは、Retrieval Augmented Generation パターンを使用して、独自のデータに対してChatGPT のような体験を作成するためのいくつかのアプローチを示しています。", "url": "https://github.com/nohanaga/azure-search-openai-demo", "project_name": "azure-search-openai-demo", "stargazers_count": 46, "source": "GitHub", "score": -0.19709540694328212, "first_commit": "2023-02-08 13:00:55", "latest_commit": "2023-12-07 18:16:07", "languages": [ "Python", "TypeScript", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Generate SKK/MeCab dictionary from Wikipedia(Japanese edition)", "url": "https://github.com/tokuhirom/jawiki-kana-kanji-dict", "project_name": "jawiki-kana-kanji-dict", "stargazers_count": 46, "source": "GitHub", "score": -0.19709540694328212, "first_commit": "2020-08-23 02:36:22", "latest_commit": "2024-08-09 14:54:51", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "GoogleIME用カタカナ語辞書プロジェクトのアーカイブです。Project archive of Google IME user dictionary from Katakana word ( Japanese loanword ) to English.", "url": "https://github.com/KEINOS/google-ime-user-dictionary-ja-en", "project_name": "google-ime-user-dictionary-ja-en", "stargazers_count": 45, "source": "GitHub", "score": -0.20006605471892908, "first_commit": "2009-12-11 10:24:53", "latest_commit": "2016-12-23 19:44:09", "languages": [], "model_or_dataset": "dataset" }, { "description": "DistilBERT model pre-trained on 131 GB of Japanese web text. The teacher model is BERT-base that built in-house at LINE.", "url": "https://github.com/line/LINE-DistilBERT-Japanese", "project_name": "LINE-DistilBERT-Japanese", "stargazers_count": 44, "source": "GitHub", "score": -0.20303670249457606, "first_commit": "2023-03-09 18:50:06", "latest_commit": "2023-03-22 15:09:22", "languages": [], "model_or_dataset": "model" }, { "description": "Japanese Sentences Involving Compositional Knowledge (JSICK) Dataset/JSICK-stress Test Set", "url": "https://github.com/verypluming/JSICK", "project_name": "JSICK", "stargazers_count": 44, "source": "GitHub", "score": -0.20303670249457606, "first_commit": "2021-05-24 18:12:15", "latest_commit": "2023-05-31 17:48:45", "languages": [], "model_or_dataset": "dataset" }, { "description": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context.", "url": "https://github.com/Greatdane/Convert-Numbers-to-Japanese", "project_name": "Convert-Numbers-to-Japanese", "stargazers_count": 43, "source": "GitHub", "score": -0.20600735027022302, "first_commit": "2017-03-24 12:30:39", "latest_commit": "2020-11-26 16:37:30", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Daily Dialogue, or 日本語日常対話コーパス in Japanese, is a high-quality multi-turn dialogue dataset containing daily conversations on five topics: dailylife, school, travel, health, and entertainment.", "url": "https://github.com/jqk09a/japanese-daily-dialogue", "project_name": "japanese-daily-dialogue", "stargazers_count": 43, "source": "GitHub", "score": -0.20600735027022302, "first_commit": "2023-03-15 16:53:41", "latest_commit": "2023-03-17 18:53:28", "languages": [], "model_or_dataset": "dataset" }, { "description": "Character Based Named Entity Recognition.", "url": "https://github.com/chakki-works/namaco", "project_name": "namaco", "stargazers_count": 41, "source": "GitHub", "score": -0.21194864582151696, "first_commit": "2017-10-11 09:53:23", "latest_commit": "2018-02-09 06:27:36", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "This repository contains the code for supervised fine-tuning of LLM-jp models.", "url": "https://github.com/llm-jp/llm-jp-sft", "project_name": "llm-jp-sft", "stargazers_count": 41, "source": "GitHub", "score": -0.21194864582151696, "first_commit": "2023-09-22 14:30:09", "latest_commit": "2024-06-13 13:17:38", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Normalize and fix common issues with Romaji-based Japanese names.", "url": "https://github.com/jeresig/node-romaji-name", "project_name": "node-romaji-name", "stargazers_count": 41, "source": "GitHub", "score": -0.21194864582151696, "first_commit": "2013-08-24 10:50:11", "latest_commit": "2023-12-27 13:27:03", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Monorepo for Kanji, Furigana, Japanese DB, and others", "url": "https://github.com/echamudi/japanese-toolkit", "project_name": "japanese-toolkit", "stargazers_count": 41, "source": "GitHub", "score": -0.21194864582151696, "first_commit": "2020-07-09 05:58:18", "latest_commit": "2023-01-08 23:53:43", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "Get Japanese dialogue corpus", "url": "https://github.com/knok/make-meidai-dialogue", "project_name": "make-meidai-dialogue", "stargazers_count": 40, "source": "GitHub", "score": -0.21491929359716394, "first_commit": "2016-11-10 13:45:38", "latest_commit": "2017-09-29 07:53:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語TTS(VITS)の学習と音声合成のGradio WebUI", "url": "https://github.com/litagin02/vits-japros-webui", "project_name": "vits-japros-webui", "stargazers_count": 40, "source": "GitHub", "score": -0.21491929359716394, "first_commit": "2023-09-30 17:09:14", "latest_commit": "2024-01-05 22:42:43", "languages": [ "Python", "Jupyter Notebook", "C++" ], "model_or_dataset": null }, { "description": "Japanese BERT trained on Aozora Bunko and Wikipedia, pre-tokenized by MeCab with UniDic & SudachiPy", "url": "https://github.com/akirakubo/bert-japanese-aozora", "project_name": "bert-japanese-aozora", "stargazers_count": 40, "source": "GitHub", "score": -0.21491929359716394, "first_commit": "2020-03-08 10:20:43", "latest_commit": "2020-08-08 12:06:20", "languages": [], "model_or_dataset": "model" }, { "description": "An open collection of annotated voices in Japanese language", "url": "https://github.com/koniwa/koniwa", "project_name": "koniwa", "stargazers_count": 40, "source": "GitHub", "score": -0.21491929359716394, "first_commit": "2021-10-29 21:19:06", "latest_commit": "2024-08-02 10:48:21", "languages": [ "Python", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "DocumentClassificationUsingBERT-Japanese", "url": "https://github.com/nekoumei/DocumentClassificationUsingBERT-Japanese", "project_name": "DocumentClassificationUsingBERT-Japanese", "stargazers_count": 40, "source": "GitHub", "score": -0.21491929359716394, "first_commit": "2019-12-16 00:55:48", "latest_commit": "2021-01-29 10:59:18", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": null }, { "description": "Japanese-BPEEncoder", "url": "https://github.com/tanreinama/Japanese-BPEEncoder", "project_name": "Japanese-BPEEncoder", "stargazers_count": 39, "source": "GitHub", "score": -0.2178899413728109, "first_commit": "2020-10-03 12:21:03", "latest_commit": "2021-09-12 09:58:42", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A web-app displaying the 2200 kanji characters taught in James Heisig's \"Remembering the Kanji\", 6th edition.", "url": "https://github.com/minosvasilias/kanjigrid", "project_name": "kanjigrid", "stargazers_count": 39, "source": "GitHub", "score": -0.2178899413728109, "first_commit": "2018-10-26 15:47:29", "latest_commit": "2018-11-19 14:14:00", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Codes to pre-train Japanese T5 models", "url": "https://github.com/megagonlabs/t5-japanese", "project_name": "t5-japanese", "stargazers_count": 39, "source": "GitHub", "score": -0.2178899413728109, "first_commit": "2021-08-25 09:55:16", "latest_commit": "2021-09-07 14:11:02", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Google日本語入力の顔文字辞書∩(,,Ò‿Ó,,)∩", "url": "https://github.com/tiwanari/emoticon", "project_name": "emoticon", "stargazers_count": 39, "source": "GitHub", "score": -0.2178899413728109, "first_commit": "2013-12-29 17:16:16", "latest_commit": "2020-05-07 13:36:42", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "This repository contains scripts to reproduce the LLM-jp corpus.", "url": "https://github.com/llm-jp/llm-jp-corpus", "project_name": "llm-jp-corpus", "stargazers_count": 39, "source": "GitHub", "score": -0.2178899413728109, "first_commit": "2023-06-14 13:21:33", "latest_commit": "2023-10-23 10:04:18", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "xvector model on jtubespeech", "url": "https://github.com/sarulab-speech/xvector_jtubespeech", "project_name": "xvector_jtubespeech", "stargazers_count": 38, "source": "GitHub", "score": -0.22086058914845788, "first_commit": "2022-03-08 11:00:20", "latest_commit": "2023-11-05 14:48:26", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Evaluating GPT-4 and ChatGPT on Japanese Medical Licensing Examinations", "url": "https://github.com/jungokasai/IgakuQA", "project_name": "IgakuQA", "stargazers_count": 38, "source": "GitHub", "score": -0.22086058914845788, "first_commit": "2023-03-31 10:29:20", "latest_commit": "2023-03-31 10:29:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese-BPEEncoder Version 2", "url": "https://github.com/tanreinama/Japanese-BPEEncoder_V2", "project_name": "Japanese-BPEEncoder_V2", "stargazers_count": 37, "source": "GitHub", "score": -0.22383123692410484, "first_commit": "2021-08-23 19:09:11", "latest_commit": "2023-01-15 12:43:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Python wrapper for KyTea", "url": "https://github.com/chezou/Mykytea-python", "project_name": "Mykytea-python", "stargazers_count": 36, "source": "GitHub", "score": -0.22680188469975182, "first_commit": "2011-07-15 11:45:10", "latest_commit": "2024-01-15 17:30:17", "languages": [ "Python", "C++" ], "model_or_dataset": null }, { "description": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit", "url": "https://github.com/yas-sim/handwritten-japanese-ocr", "project_name": "handwritten-japanese-ocr", "stargazers_count": 36, "source": "GitHub", "score": -0.22680188469975182, "first_commit": "2020-05-01 17:27:13", "latest_commit": "2022-04-05 11:16:31", "languages": [ "Python", "C++" ], "model_or_dataset": null }, { "description": "このリポジトリは日本語LLMのキャラクターロールプレイに関する性能を評価するために作成しました。", "url": "https://github.com/oshizo/japanese-llm-roleplay-benchmark", "project_name": "japanese-llm-roleplay-benchmark", "stargazers_count": 36, "source": "GitHub", "score": -0.22680188469975182, "first_commit": "2023-09-15 23:52:27", "latest_commit": "2023-11-03 22:01:24", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": null }, { "description": "Viterbi-based accelerated tokenizer (Python wrapper)", "url": "https://github.com/daac-tools/python-vibrato", "project_name": "python-vibrato", "stargazers_count": 35, "source": "GitHub", "score": -0.2297725324753988, "first_commit": "2022-12-08 12:29:38", "latest_commit": "2023-09-05 22:16:59", "languages": [ "Python", "Rust" ], "model_or_dataset": null }, { "description": "Mozc for Python: Kana-Kanji converter", "url": "https://github.com/ikegami-yukino/mozcpy", "project_name": "mozcpy", "stargazers_count": 35, "source": "GitHub", "score": -0.2297725324753988, "first_commit": "2022-08-21 02:20:37", "latest_commit": "2023-12-12 00:56:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Mecab + NEologd + Docker + Python3", "url": "https://github.com/p-geon/ja-tokenizer-docker-py", "project_name": "ja-tokenizer-docker-py", "stargazers_count": 35, "source": "GitHub", "score": -0.2297725324753988, "first_commit": "2022-05-08 13:45:30", "latest_commit": "2022-05-10 16:55:48", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "dictionary to find emotion related to text", "url": "https://github.com/sociocom/JIWC-Dictionary", "project_name": "JIWC-Dictionary", "stargazers_count": 35, "source": "GitHub", "score": -0.2297725324753988, "first_commit": "2020-07-31 16:28:26", "latest_commit": "2021-01-27 17:39:40", "languages": [], "model_or_dataset": "dataset" }, { "description": "wikiHow dataset (Japanese version)", "url": "https://github.com/Katsumata420/wikihow_japanese", "project_name": "wikihow_japanese", "stargazers_count": 35, "source": "GitHub", "score": -0.2297725324753988, "first_commit": "2020-06-29 03:11:23", "latest_commit": "2020-12-18 03:54:55", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Async Japanese Tokenizer Native Plugin for React Native for iOS and Android", "url": "https://github.com/craftzdog/react-native-japanese-tokenizer", "project_name": "react-native-japanese-tokenizer", "stargazers_count": 34, "source": "GitHub", "score": -0.23274318025104576, "first_commit": "2018-03-14 15:07:47", "latest_commit": "2023-06-19 09:34:01", "languages": [ "Java", "JavaScript" ], "model_or_dataset": null }, { "description": "PytorchでBERTの日本語学習済みモデルを利用する", "url": "https://github.com/yagays/pytorch_bert_japanese", "project_name": "pytorch_bert_japanese", "stargazers_count": 34, "source": "GitHub", "score": -0.23274318025104576, "first_commit": "2019-06-05 21:42:27", "latest_commit": "2019-06-07 14:27:41", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Wikipediaから作成した日本語名寄せデータセット", "url": "https://github.com/yagays/nayose-wikipedia-ja", "project_name": "nayose-wikipedia-ja", "stargazers_count": 34, "source": "GitHub", "score": -0.23274318025104576, "first_commit": "2020-03-09 09:16:39", "latest_commit": "2020-03-10 11:04:36", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "ベイズ階層言語モデルによる教師なし形態素解析", "url": "https://github.com/musyoku/python-npylm", "project_name": "python-npylm", "stargazers_count": 33, "source": "GitHub", "score": -0.23571382802669275, "first_commit": "2016-12-18 21:03:18", "latest_commit": "2019-01-30 16:35:14", "languages": [ "C++", "Python" ], "model_or_dataset": null }, { "description": "This is a English-Japanese lexicon for Machine Learning and Deep Learning terminology.", "url": "https://github.com/Machine-Learning-Tokyo/EN-JP-ML-Lexicon", "project_name": "EN-JP-ML-Lexicon", "stargazers_count": 33, "source": "GitHub", "score": -0.23571382802669275, "first_commit": "2019-05-27 16:29:35", "latest_commit": "2021-03-13 09:19:56", "languages": [], "model_or_dataset": null }, { "description": "BERT with SentencePiece for Japanese text.", "url": "https://github.com/alinear-corp/albert-japanese", "project_name": "albert-japanese", "stargazers_count": 33, "source": "GitHub", "score": -0.23571382802669275, "first_commit": "2018-12-27 20:05:33", "latest_commit": "2021-10-28 19:57:23", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "このリポジトリは、神戸市役所でのChatGPTの試行利用に向けて作成したフロー等をソリューション化し公開するものです。", "url": "https://github.com/City-of-Kobe/pva-aoai-integration-solution", "project_name": "pva-aoai-integration-solution", "stargazers_count": 33, "source": "GitHub", "score": -0.23571382802669275, "first_commit": "2023-06-20 13:50:54", "latest_commit": "2023-08-14 18:03:34", "languages": [], "model_or_dataset": null }, { "description": "A small version of UniDic for easy pip installs.", "url": "https://github.com/polm/unidic-lite", "project_name": "unidic-lite", "stargazers_count": 33, "source": "GitHub", "score": -0.23571382802669275, "first_commit": "2020-04-07 16:24:18", "latest_commit": "2020-09-01 22:50:07", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese", "url": "https://github.com/KoichiYasuoka/UniDic2UD", "project_name": "UniDic2UD", "stargazers_count": 32, "source": "GitHub", "score": -0.2386844758023397, "first_commit": "2019-08-27 00:45:01", "latest_commit": "2024-01-31 23:53:51", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese Vicuna QA Benchmark", "url": "https://github.com/ku-nlp/ja-vicuna-qa-benchmark", "project_name": "ja-vicuna-qa-benchmark", "stargazers_count": 32, "source": "GitHub", "score": -0.2386844758023397, "first_commit": "2023-08-11 15:38:05", "latest_commit": "2024-06-11 16:24:06", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "gpt-2 based text2text conversion model", "url": "https://github.com/tanreinama/text2text-japanese", "project_name": "text2text-japanese", "stargazers_count": 32, "source": "GitHub", "score": -0.2386844758023397, "first_commit": "2021-02-11 12:28:53", "latest_commit": "2021-07-22 14:26:45", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "A paraphrase database for Japanese text simplification", "url": "https://github.com/tmu-nlp/simple-jppdb", "project_name": "simple-jppdb", "stargazers_count": 32, "source": "GitHub", "score": -0.2386844758023397, "first_commit": "2017-03-09 19:29:19", "latest_commit": "2017-03-13 00:01:48", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Finetuning Whisper ASR model", "url": "https://github.com/sarulab-speech/whisper-asr-finetune", "project_name": "whisper-asr-finetune", "stargazers_count": 31, "source": "GitHub", "score": -0.24165512357798669, "first_commit": "2022-10-27 20:22:00", "latest_commit": "2022-12-04 21:29:47", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Java library to converts between Japanese Hiragana, Katakana, and Romaji scripts.", "url": "https://github.com/andree-surya/moji4j", "project_name": "moji4j", "stargazers_count": 31, "source": "GitHub", "score": -0.24165512357798669, "first_commit": "2016-07-08 09:13:16", "latest_commit": "2022-06-24 09:00:44", "languages": [ "Java", "Ruby" ], "model_or_dataset": null }, { "description": "JVS (Japanese versatile speech) コーパスの自作のラベル", "url": "https://github.com/Hiroshiba/jvs_hiho", "project_name": "jvs_hiho", "stargazers_count": 31, "source": "GitHub", "score": -0.24165512357798669, "first_commit": "2020-02-02 01:24:43", "latest_commit": "2021-02-11 15:29:19", "languages": [], "model_or_dataset": "dataset" }, { "description": "Janome を使ったテキストマイニング入門チュートリアルです。", "url": "https://github.com/mocobeta/janome-tutorial", "project_name": "janome-tutorial", "stargazers_count": 31, "source": "GitHub", "score": -0.24165512357798669, "first_commit": "2019-03-03 15:25:08", "latest_commit": "2019-03-03 16:46:00", "languages": [ "Jupyter Notebook", "JavaScript", "Perl" ], "model_or_dataset": null }, { "description": "MARINE : Multi-task leaRnIng-based JapaNese accent Estimation", "url": "https://github.com/6gsn/marine", "project_name": "marine", "stargazers_count": 30, "source": "GitHub", "score": -0.24462577135363364, "first_commit": "2022-09-05 20:23:40", "latest_commit": "2022-09-20 10:26:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "3行要約データセット", "url": "https://github.com/KodairaTomonori/ThreeLineSummaryDataset", "project_name": "ThreeLineSummaryDataset", "stargazers_count": 30, "source": "GitHub", "score": -0.24462577135363364, "first_commit": "2018-01-25 02:27:22", "latest_commit": "2018-04-04 22:24:47", "languages": [], "model_or_dataset": "dataset" }, { "description": "法律・判例関係のデータセット", "url": "https://github.com/japanese-law-analysis/data_set", "project_name": "data_set", "stargazers_count": 30, "source": "GitHub", "score": -0.24462577135363364, "first_commit": "2023-01-15 08:39:44", "latest_commit": "2024-05-14 17:37:05", "languages": [], "model_or_dataset": "dataset" }, { "description": "Yet another Python binding for Juman++/KNP", "url": "https://github.com/ku-nlp/rhoknp", "project_name": "rhoknp", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "2021-08-03 14:50:30", "latest_commit": "2024-07-16 09:26:45", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "A Japanese Parser", "url": "https://github.com/ku-nlp/knp", "project_name": "knp", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "1999-03-12 09:09:08", "latest_commit": "2023-11-01 21:02:34", "languages": [ "C", "C++", "Perl", "Java", "Python", "Ruby", "JavaScript" ], "model_or_dataset": null }, { "description": "Japanese analyzer uses kuromoji japanese tokenizer for ElasticSearch", "url": "https://github.com/suguru/elasticsearch-analysis-japanese", "project_name": "elasticsearch-analysis-japanese", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "2012-01-11 13:32:35", "latest_commit": "2012-03-06 23:36:46", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "Microsoft IMEなどで利用することを想定した「にじさんじ」関連用語の用語辞書です。", "url": "https://github.com/Umichang/nijisanji-ime-dic", "project_name": "nijisanji-ime-dic", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "2019-12-30 01:37:16", "latest_commit": "2024-08-14 12:35:13", "languages": [], "model_or_dataset": "dataset" }, { "description": "Another Anthy", "url": "https://github.com/fujiwarat/anthy-unicode", "project_name": "anthy-unicode", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "2013-06-30 11:09:24", "latest_commit": "2024-05-02 14:12:10", "languages": [ "C" ], "model_or_dataset": "dataset" }, { "description": "Google Colaboratoryで日本語のBERTを動かす方法です。", "url": "https://github.com/YutaroOgawa/BERT_Japanese_Google_Colaboratory", "project_name": "BERT_Japanese_Google_Colaboratory", "stargazers_count": 29, "source": "GitHub", "score": -0.24759641912928063, "first_commit": "2020-05-14 14:53:17", "latest_commit": "2022-01-25 11:58:44", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "This repository provides snippets to use RoBERTa pre-trained on Japanese corpus. Our dataset consists of Japanese Wikipedia and web-scrolled articles, 25GB in total. The released model is built based on that from HuggingFace.", "url": "https://github.com/informatix-inc/bert", "project_name": "bert", "stargazers_count": 28, "source": "GitHub", "score": -0.2505670669049276, "first_commit": "2022-04-05 17:03:10", "latest_commit": "2022-04-05 17:03:55", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Japanese data from the Google UDT 2.0.", "url": "https://github.com/megagonlabs/UD_Japanese-GSD", "project_name": "UD_Japanese-GSD", "stargazers_count": 28, "source": "GitHub", "score": -0.2505670669049276, "first_commit": "2016-10-12 10:33:42", "latest_commit": "2022-05-29 11:52:21", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "COMET-ATOMIC ja", "url": "https://github.com/nlp-waseda/comet-atomic-ja", "project_name": "comet-atomic-ja", "stargazers_count": 28, "source": "GitHub", "score": -0.2505670669049276, "first_commit": "2023-02-04 10:39:44", "latest_commit": "2024-03-08 22:31:37", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "日本語文法誤り訂正ツール", "url": "https://github.com/youichiro/transformer-copy", "project_name": "transformer-copy", "stargazers_count": 27, "source": "GitHub", "score": -0.25353771468057457, "first_commit": "2019-08-24 23:18:00", "latest_commit": "2020-09-25 23:12:54", "languages": [ "Python", "Lua", "C++" ], "model_or_dataset": null }, { "description": "Rust library of natural language dictionaries using character-wise double-array tries.", "url": "https://github.com/daac-tools/crawdad", "project_name": "crawdad", "stargazers_count": 27, "source": "GitHub", "score": -0.25353771468057457, "first_commit": "2022-03-20 23:22:50", "latest_commit": "2023-02-20 22:23:22", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Japanese text preprocessor for Text-to-Speech applications (OpenJTalk rewrite in rust language)", "url": "https://github.com/jpreprocess/jpreprocess", "project_name": "jpreprocess", "stargazers_count": 27, "source": "GitHub", "score": -0.25353771468057457, "first_commit": "2022-12-18 17:32:12", "latest_commit": "2024-08-14 09:22:07", "languages": [ "Rust", "Python", "TypeScript" ], "model_or_dataset": null }, { "description": "Small example scripts for working with Japanese texts in Python", "url": "https://github.com/olsgaard/Japanese_nlp_scripts", "project_name": "Japanese_nlp_scripts", "stargazers_count": 26, "source": "GitHub", "score": -0.25650836245622155, "first_commit": "2015-05-18 17:15:00", "latest_commit": "2019-06-30 18:33:13", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese Morphological Analyzer written in pure Rust", "url": "https://github.com/agatan/yoin", "project_name": "yoin", "stargazers_count": 26, "source": "GitHub", "score": -0.25650836245622155, "first_commit": "2017-01-15 23:54:52", "latest_commit": "2017-10-27 17:44:55", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Convert romaji into hiragana", "url": "https://github.com/koozaki/romaji-conv", "project_name": "romaji-conv", "stargazers_count": 26, "source": "GitHub", "score": -0.25650836245622155, "first_commit": "2020-07-05 01:29:36", "latest_commit": "2024-09-01 19:50:38", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "THE IDOLM@STER words dictionary for Japanese IME (by imas-db.jp)", "url": "https://github.com/maruamyu/imas-ime-dic", "project_name": "imas-ime-dic", "stargazers_count": 26, "source": "GitHub", "score": -0.25650836245622155, "first_commit": "2018-04-25 02:07:26", "latest_commit": "2024-04-19 01:05:34", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "サイバーセキュリティに関連する公的な組織の日英対応", "url": "https://github.com/SaitoLab/security_words", "project_name": "security_words", "stargazers_count": 26, "source": "GitHub", "score": -0.25650836245622155, "first_commit": "2020-04-27 12:23:28", "latest_commit": "2023-08-18 10:02:08", "languages": [], "model_or_dataset": "dataset" }, { "description": "Heteronym disambiguation library using a fine-tuned BERT model.", "url": "https://github.com/passaglia/yomikata", "project_name": "yomikata", "stargazers_count": 25, "source": "GitHub", "score": -0.2594790102318685, "first_commit": "2023-02-21 01:43:00", "latest_commit": "2023-10-03 09:10:45", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "New kana-kanji conversion engine", "url": "https://github.com/yoriyuki/nksnd", "project_name": "nksnd", "stargazers_count": 25, "source": "GitHub", "score": -0.2594790102318685, "first_commit": "2016-05-24 18:52:03", "latest_commit": "2018-05-17 08:54:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "東日本大震災発生から24時間以内につぶやかれたジオタグ付きツイートのデジタルアーカイブです。", "url": "https://github.com/wtnv-lab/tweetMapping", "project_name": "tweetMapping", "stargazers_count": 25, "source": "GitHub", "score": -0.2594790102318685, "first_commit": "2021-02-17 07:55:27", "latest_commit": "2023-12-08 21:42:44", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "IPAdic packaged for easy use from Python.", "url": "https://github.com/polm/ipadic-py", "project_name": "ipadic-py", "stargazers_count": 25, "source": "GitHub", "score": -0.2594790102318685, "first_commit": "2020-07-16 16:19:26", "latest_commit": "2021-10-31 04:47:19", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Japanese Movie Recommendation Dialogue dataset", "url": "https://github.com/ku-nlp/JMRD", "project_name": "JMRD", "stargazers_count": 25, "source": "GitHub", "score": -0.2594790102318685, "first_commit": "2022-07-15 09:43:44", "latest_commit": "2022-07-19 10:02:29", "languages": [], "model_or_dataset": "dataset" }, { "description": "VRChatにAI Botを作るためのリポジトリ", "url": "https://github.com/Geson-anko/vrchatbot", "project_name": "vrchatbot", "stargazers_count": 24, "source": "GitHub", "score": -0.26244965800751546, "first_commit": "2022-12-10 22:48:39", "latest_commit": "2022-12-20 17:18:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "LLM勉強会(LLM-jp)で開発しているLLM用のトークナイザー関連をまとめたリポジトリです.", "url": "https://github.com/llm-jp/llm-jp-tokenizer", "project_name": "llm-jp-tokenizer", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2023-07-13 12:52:22", "latest_commit": "2024-07-05 13:56:54", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "python版日本語意味役割付与システム(ASA)", "url": "https://github.com/Takeuchi-Lab-LM/python_asa", "project_name": "python_asa", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2018-07-25 04:13:21", "latest_commit": "2020-01-14 17:09:11", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese pitch accent utils", "url": "https://github.com/DJTB/hatsuon", "project_name": "hatsuon", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2018-04-06 12:06:12", "latest_commit": "2022-03-14 18:06:39", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "ChatGPT3.5を搭載した伺かゴースト「AI妹アイちゃん」です。利用には別途ChatGPTのAPIキーが必要です。", "url": "https://github.com/manju-summoner/AISisterAIChan", "project_name": "AISisterAIChan", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2023-03-30 18:40:44", "latest_commit": "2023-05-18 22:14:24", "languages": [], "model_or_dataset": null }, { "description": "Common LispによるSKK辞書サーバーとその拡張", "url": "https://github.com/tani/cl-skkserv", "project_name": "cl-skkserv", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2017-12-23 10:20:54", "latest_commit": "2024-02-11 10:42:23", "languages": [], "model_or_dataset": "dataset" }, { "description": "Accommodation Search Dialog Corpus (宿泊施設探索対話コーパス)", "url": "https://github.com/megagonlabs/asdc", "project_name": "asdc", "stargazers_count": 23, "source": "GitHub", "score": -0.26542030578316245, "first_commit": "2022-06-16 09:43:01", "latest_commit": "2023-08-09 12:03:48", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Rakuten MA (Python version)", "url": "https://github.com/ikegami-yukino/rakutenma-python", "project_name": "rakutenma-python", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": "2015-01-02 06:52:27", "latest_commit": "2017-05-22 16:45:10", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese dependency parser based on BERT", "url": "https://github.com/ku-nlp/bertknp", "project_name": "bertknp", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": "2021-02-10 09:20:17", "latest_commit": "2021-10-02 14:45:12", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "ILYS-aoba-chatbot", "url": "https://github.com/cl-tohoku/ILYS-aoba-chatbot", "project_name": "ILYS-aoba-chatbot", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": "2020-10-26 13:13:33", "latest_commit": "2021-10-01 22:55:52", "languages": [ "Python", "C++", "Lua" ], "model_or_dataset": "model" }, { "description": "Japanese BERT Pretrained Model", "url": "https://github.com/tanreinama/RoBERTa-japanese", "project_name": "RoBERTa-japanese", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": "2020-11-28 17:36:50", "latest_commit": "2021-11-13 10:37:23", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "かな漢字変換エンジン SKKのための新しい辞書形式", "url": "https://github.com/skk-dict/jisyo", "project_name": "jisyo", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese instruction data (日本語指示データ)", "url": "https://github.com/megagonlabs/instruction_ja", "project_name": "instruction_ja", "stargazers_count": 22, "source": "GitHub", "score": -0.26839095355880943, "first_commit": "2023-06-22 15:52:12", "latest_commit": "2023-07-13 16:02:15", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Yet another sentence-level tokenizer for the Japanese text", "url": "https://github.com/ikegami-yukino/sengiri", "project_name": "sengiri", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2019-10-05 03:46:43", "latest_commit": "2022-08-10 20:45:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Rapid Automatic Keyword Extraction algorithm for Japanese", "url": "https://github.com/kanjirz50/rake-ja", "project_name": "rake-ja", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2018-10-11 19:07:50", "latest_commit": "2018-10-11 19:27:37", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese rōmaji input schema for Rime IME", "url": "https://github.com/lazyfoxchan/rime-jaroomaji", "project_name": "rime-jaroomaji", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2023-03-21 21:07:42", "latest_commit": "2024-08-15 11:40:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "首都大日本語 Twitter コーパス", "url": "https://github.com/tmu-nlp/TwitterCorpus", "project_name": "TwitterCorpus", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2016-03-05 19:20:15", "latest_commit": "2016-03-14 19:55:35", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "2023年8月にメルボルン大学から公開された安全性評価データセット『Do-Not-Answer』を日本語LLMの評価においても使用できるように日本語に自動翻訳し、さらに日本文化も考慮して修正したデータセット。", "url": "https://github.com/kunishou/do-not-answer-ja", "project_name": "do-not-answer-ja", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2023-09-09 08:21:26", "latest_commit": "2023-12-16 02:34:12", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "作って学ぶ正規表現エンジン", "url": "https://github.com/makenowjust/kantan-regex-book", "project_name": "kantan-regex-book", "stargazers_count": 21, "source": "GitHub", "score": -0.27136160133445636, "first_commit": "2024-01-28 10:37:12", "latest_commit": "2024-03-23 21:55:27", "languages": [ "Ruby", "JavaScript" ], "model_or_dataset": null }, { "description": "Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.", "url": "https://github.com/daac-tools/python-vaporetto", "project_name": "python-vaporetto", "stargazers_count": 20, "source": "GitHub", "score": -0.27433224911010334, "first_commit": "2022-06-09 13:37:22", "latest_commit": "2023-09-05 22:15:48", "languages": [ "Python", "Rust" ], "model_or_dataset": null }, { "description": "Darts-clone python binding", "url": "https://github.com/rixwew/darts-clone-python", "project_name": "darts-clone-python", "stargazers_count": 20, "source": "GitHub", "score": -0.27433224911010334, "first_commit": "2018-11-17 00:57:52", "latest_commit": "2022-04-05 21:28:21", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Easy-to-use scripts to fine-tune GPT-2-JA with your own texts, to generate sentences, and to tweet them automatically.", "url": "https://github.com/discus0434/text-generation", "project_name": "text-generation", "stargazers_count": 20, "source": "GitHub", "score": -0.27433224911010334, "first_commit": "2022-07-17 11:55:29", "latest_commit": "2022-07-24 13:48:32", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "敬語変換タスクにおける評価用データセット", "url": "https://github.com/cl-tohoku/keigo_transfer_task", "project_name": "keigo_transfer_task", "stargazers_count": 20, "source": "GitHub", "score": -0.27433224911010334, "first_commit": "2022-06-14 23:06:57", "latest_commit": "2022-11-24 13:01:06", "languages": [], "model_or_dataset": "dataset" }, { "description": "日本語マルチタスク言語理解ベンチマーク Japanese Massive Multitask Language Understanding Benchmark", "url": "https://github.com/nlp-waseda/JMMLU", "project_name": "JMMLU", "stargazers_count": 20, "source": "GitHub", "score": -0.27433224911010334, "first_commit": "2024-01-09 16:43:45", "latest_commit": "2024-02-27 14:19:14", "languages": [], "model_or_dataset": "dataset" }, { "description": "A fast character conversion and transliteration library based on the scheme defined for Japan National Tax Agency (国税庁) 's", "url": "https://github.com/opencollector/jntajis-python", "project_name": "jntajis-python", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2021-08-20 17:32:20", "latest_commit": "2023-06-16 22:43:41", "languages": [ "Python", "C" ], "model_or_dataset": null }, { "description": "pygeonlp, A python module for geotagging Japanese texts.", "url": "https://github.com/geonlp-platform/pygeonlp", "project_name": "pygeonlp", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2021-06-26 17:45:22", "latest_commit": "2024-07-24 01:39:27", "languages": [ "Python", "C++" ], "model_or_dataset": null }, { "description": "A Rust library to convert Japanese Half-width-kana[半角カナ] and Wide-alphanumeric[全角英数] into normal ones", "url": "https://github.com/gemmarx/unicode-jp-rs", "project_name": "unicode-jp-rs", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2016-05-21 16:39:49", "latest_commit": "2020-04-11 12:01:21", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Utility collections for making Japanese text old-fashioned", "url": "https://github.com/hakatashi/kyujitai.js", "project_name": "kyujitai.js", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2014-09-06 17:03:02", "latest_commit": "2020-08-30 23:28:58", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "A Go library for Zenkaku/Hankaku conversion", "url": "https://github.com/ktnyt/go-moji", "project_name": "go-moji", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2018-01-15 23:57:06", "latest_commit": "2019-04-17 10:29:44", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "全国書誌データから作成した振り仮名のデータセット", "url": "https://github.com/ndl-lab/huriganacorpus-ndlbib", "project_name": "huriganacorpus-ndlbib", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2021-09-01 13:36:53", "latest_commit": "2021-09-21 14:20:03", "languages": [], "model_or_dataset": "dataset" }, { "description": "\"Proposal and Evaluation of Japanese Toxicity Schema\" provides a schema and dataset for toxicity in the Japanese language.", "url": "https://github.com/inspection-ai/japanese-toxic-dataset", "project_name": "japanese-toxic-dataset", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2023-01-10 17:19:58", "latest_commit": "2023-01-11 13:55:08", "languages": [], "model_or_dataset": "dataset" }, { "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) is the Japanese ad text generation dataset.", "url": "https://github.com/CyberAgentAILab/camera", "project_name": "camera", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2023-02-22 10:33:39", "latest_commit": "2024-08-13 09:20:33", "languages": [], "model_or_dataset": "dataset" }, { "description": "日本語フェイクニュースデータセット", "url": "https://github.com/tanreinama/Japanese-Fakenews-Dataset", "project_name": "Japanese-Fakenews-Dataset", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2021-05-02 15:40:10", "latest_commit": "2021-05-02 15:40:10", "languages": [], "model_or_dataset": "dataset" }, { "description": "The full-text search system for Aozora Bunko by Groonga. 青空文庫全文検索ライブラリ兼Webアプリ。", "url": "https://github.com/myokoym/aozorasearch", "project_name": "aozorasearch", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2016-10-15 17:22:45", "latest_commit": "2020-09-04 14:28:15", "languages": [ "Ruby", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "検索拡張(RAG)評価のための日本語Q&Aデータセット", "url": "https://github.com/hotchpotch/jqara", "project_name": "jqara", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2024-03-02 09:09:08", "latest_commit": "2024-08-10 11:54:50", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット", "url": "https://github.com/hotchpotch/jacwir", "project_name": "jacwir", "stargazers_count": 19, "source": "GitHub", "score": -0.2773028968857503, "first_commit": "2024-03-19 15:31:13", "latest_commit": "2024-07-24 11:09:03", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "A parser for Japanese number (Kanji, arabic) in the natural language.", "url": "https://github.com/takumakanari/japanese-numbers-python", "project_name": "japanese-numbers-python", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2016-07-29 09:20:05", "latest_commit": "2020-04-04 10:36:27", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語とグロンギ語の相互変換スクリプト", "url": "https://github.com/shogo82148/Grongish", "project_name": "Grongish", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2012-02-10 22:28:59", "latest_commit": "2022-04-21 21:31:37", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "専門用語抽出アルゴリズムの実装の練習", "url": "https://github.com/kanjirz50/termextract", "project_name": "termextract", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2018-09-26 22:20:04", "latest_commit": "2018-09-26 23:01:36", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "最小のサーチエンジン/PageRank/tf-idf", "url": "https://github.com/GINK03/minimal-search-engine", "project_name": "minimal-search-engine", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2019-06-25 01:58:26", "latest_commit": "2019-07-06 01:26:57", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Mozc UT Jawiki Dictionary is a dictionary generated from the Japanese Wikipedia for Mozc.", "url": "https://github.com/utuhiro78/mozcdic-ut-jawiki", "project_name": "mozcdic-ut-jawiki", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2023-01-15 17:43:20", "latest_commit": "2024-07-29 01:27:13", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "文の敬体(ですます調)、常体(である調)を解析するJavaScriptライブラリ", "url": "https://github.com/textlint-ja/analyze-desumasu-dearu", "project_name": "analyze-desumasu-dearu", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2015-10-10 01:42:33", "latest_commit": "2021-12-10 12:01:50", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Japanese Adversarial Natural Language Inference Dataset", "url": "https://github.com/verypluming/JaNLI", "project_name": "JaNLI", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2021-11-02 20:26:27", "latest_commit": "2023-05-31 17:50:04", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "日本語で書かれた技術書を収集した生コーパス/ツール", "url": "https://github.com/textlint-ja/technological-book-corpus-ja", "project_name": "technological-book-corpus-ja", "stargazers_count": 18, "source": "GitHub", "score": -0.2802735446613973, "first_commit": "2017-03-26 13:28:13", "latest_commit": "2023-07-12 03:26:27", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "The repository contains scripts and merge scripts that have been modified to adapt an Alpaca-Lora adapter for LoRA tuning when assuming the use of the \"rinna/japanese-gpt-neox...\" [gpt-neox] model converted to ggml.", "url": "https://github.com/yukaryavka/rinna_gpt-neox_ggml-lora", "project_name": "rinna_gpt-neox_ggml-lora", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2023-05-24 03:21:40", "latest_commit": "2023-05-25 05:38:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Neural Image Caption (NIC) on chainer, its pretrained models on English and Japanese image caption datasets.", "url": "https://github.com/yuyay/chainer_nic", "project_name": "chainer_nic", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2018-07-02 19:57:17", "latest_commit": "2018-12-14 17:26:49", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Make learning Japanese easier by adding readings for every kanji in the eBook", "url": "https://github.com/rabbit19981023/yomigana-ebook", "project_name": "yomigana-ebook", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2023-04-25 18:21:37", "latest_commit": "2024-02-21 01:18:17", "languages": [ "Python", "TypeScript" ], "model_or_dataset": null }, { "description": "The evaluation scripts of JMTEB (Japanese Massive Text Embedding Benchmark)", "url": "https://github.com/sbintuitions/jmteb", "project_name": "jmteb", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2024-03-15 10:28:52", "latest_commit": "2024-06-20 20:58:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Annotated Fuman Kaitori Center Corpus", "url": "https://github.com/ku-nlp/AnnotatedFKCCorpus", "project_name": "AnnotatedFKCCorpus", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2021-01-18 19:32:38", "latest_commit": "2023-12-18 14:27:29", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Evidence-based Explanation Dataset (AACL-IJCNLP 2020)", "url": "https://github.com/megagonlabs/ebe-dataset", "project_name": "ebe-dataset", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2020-10-14 08:15:37", "latest_commit": "2020-12-17 13:39:55", "languages": [], "model_or_dataset": "dataset" }, { "description": "English loanwords in Japanese", "url": "https://github.com/jamesohortle/loanwords_gairaigo", "project_name": "loanwords_gairaigo", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2019-10-21 16:49:45", "latest_commit": "2021-01-08 12:40:02", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "青空文庫振り仮名注釈付き音声コーパスのデータセット", "url": "https://github.com/ndl-lab/hurigana-speech-corpus-aozora", "project_name": "hurigana-speech-corpus-aozora", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2024-01-16 11:30:12", "latest_commit": "2024-01-31 16:04:24", "languages": [], "model_or_dataset": "dataset" }, { "description": "NIILC QA data", "url": "https://github.com/mynlp/niilc-qa", "project_name": "niilc-qa", "stargazers_count": 17, "source": "GitHub", "score": -0.28324419243704424, "first_commit": "2015-10-13 11:42:53", "latest_commit": "2015-11-20 10:35:39", "languages": [], "model_or_dataset": "dataset" }, { "description": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)", "url": "https://github.com/Mao-KU/JASS", "project_name": "JASS", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2020-02-19 11:19:42", "latest_commit": "2022-01-25 15:24:53", "languages": [], "model_or_dataset": null }, { "description": "Use custom tokenizers in spacy-transformers", "url": "https://github.com/megagonlabs/ginza-transformers", "project_name": "ginza-transformers", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2021-06-23 17:42:12", "latest_commit": "2022-08-09 18:19:33", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Latest version of MedEX/J (Japanese disease name extractor)", "url": "https://github.com/sociocom/MedNER-J", "project_name": "MedNER-J", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2020-07-28 18:27:41", "latest_commit": "2022-05-17 20:01:05", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "https://www.nlp.ecei.tohoku.ac.jp/projects/aio/", "url": "https://github.com/cl-tohoku/AIO2_DPR_baseline", "project_name": "AIO2_DPR_baseline", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2021-09-13 16:33:09", "latest_commit": "2022-01-08 23:15:11", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "教師なし品詞タグ推定", "url": "https://github.com/musyoku/unsupervised-pos-tagging", "project_name": "unsupervised-pos-tagging", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2017-01-07 09:24:36", "latest_commit": "2017-10-11 23:23:04", "languages": [ "C++", "Python" ], "model_or_dataset": null }, { "description": "A Python script for adding furigana to Japanese epub books using Mecab and Unidic.", "url": "https://github.com/Mumumu4/furigana4epub", "project_name": "furigana4epub", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2021-08-29 20:14:15", "latest_commit": "2021-09-11 14:03:11", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Laboro DistilBERT Japanese", "url": "https://github.com/laboroai/Laboro-DistilBERT-Japanese", "project_name": "Laboro-DistilBERT-Japanese", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2020-11-30 16:10:16", "latest_commit": "2020-12-17 15:26:01", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": "model" }, { "description": "OpenAIのAPIを利用して、設定したキャラクターと日本語で会話するチャットスクリプトです。", "url": "https://github.com/mutaguchi/character_chat", "project_name": "character_chat", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2023-03-19 03:36:06", "latest_commit": "2023-06-03 23:30:23", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "TEDxJP-10K ASR Evaluation Dataset", "url": "https://github.com/laboroai/TEDxJP-10K", "project_name": "TEDxJP-10K", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2021-01-14 13:14:28", "latest_commit": "2021-01-14 15:33:14", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Top 5000 Japanese family names, with readings, ordered by frequency.", "url": "https://github.com/siikamiika/japanese-family-names", "project_name": "japanese-family-names", "stargazers_count": 16, "source": "GitHub", "score": -0.2862148402126912, "first_commit": "2017-06-08 18:00:02", "latest_commit": "2017-06-09 01:50:19", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models", "url": "https://github.com/KoichiYasuoka/SuPar-UniDic", "project_name": "SuPar-UniDic", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2021-02-21 09:42:27", "latest_commit": "2024-06-28 15:56:24", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "ふりがなパッド", "url": "https://github.com/esrille/furiganapad", "project_name": "furiganapad", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2019-05-14 02:56:41", "latest_commit": "2024-05-23 04:55:05", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Rinna-3.6B、OpenCALM等の日本語対応LLM(大規模言語モデル)用の簡易Webインタフェースです", "url": "https://github.com/noir55/japanese_llm_simple_webui", "project_name": "japanese_llm_simple_webui", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2023-06-04 23:06:13", "latest_commit": "2024-05-12 22:10:56", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "GPT-4 を用いて、言語モデルの応答を自動評価するスクリプト", "url": "https://github.com/northern-system-service/gpt4-autoeval", "project_name": "gpt4-autoeval", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2023-12-18 13:38:36", "latest_commit": "2024-06-06 13:50:35", "languages": [ "Jupyter Notebook", "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "Mixtral-based Ja-En (En-Ja) Translation model", "url": "https://github.com/hpprc/llm-translator", "project_name": "llm-translator", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2023-12-31 00:47:57", "latest_commit": "2023-12-31 00:53:57", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "「ひらがな」または「カタカナ」を日本語で発音する際の音声記号(IPA)に変換するコマンド", "url": "https://github.com/amanoese/kana2ipa", "project_name": "kana2ipa", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2019-12-23 00:07:17", "latest_commit": "2020-10-25 04:42:14", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "ホロライブ(ホロライブプロダクション)に関する辞書ファイルです。./dictionary フォルダ内のテキストファイルを使って、IMEに単語を追加できます。詳細はREADME.mdをご覧ください。", "url": "https://github.com/heppokofrontend/hololive-dictionary", "project_name": "hololive-dictionary", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2021-06-13 09:49:48", "latest_commit": "2024-05-02 16:25:29", "languages": [ "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "Pixiv Encyclopedia Dictionary for Yomitan", "url": "https://github.com/MarvNC/pixiv-yomitan", "project_name": "pixiv-yomitan", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2024-02-09 19:59:45", "latest_commit": "2024-07-23 22:00:16", "languages": [ "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "Scripts for creating a Japanese-English parallel corpus and training NMT models", "url": "https://github.com/laboroai/Laboro-ParaCorpus", "project_name": "Laboro-ParaCorpus", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2021-09-29 18:37:50", "latest_commit": "2021-11-09 10:18:54", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "日本語NLPライブラリGiNZAのすゝめ", "url": "https://github.com/poyo46/ginza-examples", "project_name": "ginza-examples", "stargazers_count": 15, "source": "GitHub", "score": -0.2891854879883382, "first_commit": "2020-08-25 02:24:01", "latest_commit": "2021-01-29 00:04:15", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Sentence Breaker", "url": "https://github.com/hppRC/japanese-sentence-breaker", "project_name": "japanese-sentence-breaker", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2021-02-28 21:40:27", "latest_commit": "2021-02-28 22:47:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Examples to finetune encoder-only and encoder-decoder transformers for Japanese language (Hugging Face) Resources", "url": "https://github.com/tsmatz/huggingface-finetune-japanese", "project_name": "huggingface-finetune-japanese", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2022-10-24 18:13:22", "latest_commit": "2023-10-06 17:11:54", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Joint source channel model for Japanese Kana Kanji conversion, Chinese pinyin input and CJE mixed input.", "url": "https://github.com/yohokuno/jsc", "project_name": "jsc", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2012-08-23 16:39:41", "latest_commit": "2012-12-19 18:36:09", "languages": [ "C++" ], "model_or_dataset": null }, { "description": "aMLP Transformer Model for Japanese", "url": "https://github.com/tanreinama/aMLP-japanese", "project_name": "aMLP-japanese", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2021-11-13 16:17:10", "latest_commit": "2022-05-10 14:16:55", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "A chatbox application built using Nuxt 3 powered by Open AI Text completion endpoint. You can select different personality of your AI friend. The default will respond in Japanese. You can use this app to practice your Nihongo skills!", "url": "https://github.com/supershaneski/openai-chatfriend", "project_name": "openai-chatfriend", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2023-01-17 15:19:27", "latest_commit": "2023-04-03 10:19:35", "languages": [ "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "Mozc UT Place Name Dictionary is a dictionary converted from the Japan Post's ZIP code data for Mozc.", "url": "https://github.com/utuhiro78/mozcdic-ut-place-names", "project_name": "mozcdic-ut-place-names", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2023-01-15 18:01:48", "latest_commit": "2024-08-07 03:05:50", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "OpenAssistant のオープンソースデータ OASST1 を日本語に翻訳したデータセットになります。", "url": "https://github.com/kunishou/oasst1-89k-ja", "project_name": "oasst1-89k-ja", "stargazers_count": 14, "source": "GitHub", "score": -0.2921561357639852, "first_commit": "2023-05-07 05:27:23", "latest_commit": "2023-11-20 00:23:10", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists", "url": "https://github.com/mkartawijaya/dango", "project_name": "dango", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2021-06-20 13:18:31", "latest_commit": "2021-11-21 19:41:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020.", "url": "https://github.com/cl-tohoku/PheMT", "project_name": "PheMT", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2020-10-27 17:05:01", "latest_commit": "2021-02-18 14:05:26", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia.", "url": "https://github.com/chakki-works/entitypedia", "project_name": "entitypedia", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2017-11-08 13:54:21", "latest_commit": "2018-12-10 12:53:58", "languages": [ "Python", "Jupyter Notebook", "JavaScript" ], "model_or_dataset": null }, { "description": "BERT-based GEC tagging for Japanese", "url": "https://github.com/jonnyli1125/gector-ja", "project_name": "gector-ja", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2021-05-11 04:51:42", "latest_commit": "2021-07-16 18:55:03", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Negima is a Python package to extract phrases in Japanese text by using the part-of-speeches based rules you defined.", "url": "https://github.com/cocodrips/negima", "project_name": "negima", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2018-06-27 01:49:31", "latest_commit": "2018-08-20 12:31:43", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語データセットでのqlora instruction tuning学習サンプルコード", "url": "https://github.com/sonoisa/clip-japanese", "project_name": "clip-japanese", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2022-02-24 20:42:33", "latest_commit": "2023-03-12 18:43:17", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese input method for fcitx5, powered by azooKey engine", "url": "https://github.com/7ka-hiira/fcitx5-hazkey", "project_name": "fcitx5-hazkey", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2024-05-21 22:15:24", "latest_commit": "2024-08-01 12:39:04", "languages": [ "C++", "Swift" ], "model_or_dataset": "dataset" }, { "description": "デジタル化資料から作成したOCRテキストデータのngram頻度統計情報のデータセット", "url": "https://github.com/ndl-lab/ndlngramdata", "project_name": "ndlngramdata", "stargazers_count": 13, "source": "GitHub", "score": -0.2951267835396322, "first_commit": "2022-12-06 10:20:12", "latest_commit": "2023-01-10 10:38:37", "languages": [], "model_or_dataset": "dataset" }, { "description": "Convert English alphabet to Katakana", "url": "https://github.com/shihono/alphabet2kana", "project_name": "alphabet2kana", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2021-03-21 12:29:41", "latest_commit": "2024-08-10 11:10:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "AllenNLP integration for Shiba: Japanese CANINE model", "url": "https://github.com/shunk031/allennlp-shiba-model", "project_name": "allennlp-shiba-model", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2021-06-26 18:37:25", "latest_commit": "2021-06-27 00:42:45", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Testing of Neural Topic Modeling for Japanese articles", "url": "https://github.com/m3yrin/NTM", "project_name": "NTM", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2019-07-24 20:14:25", "latest_commit": "2019-07-24 22:26:58", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "[PyTorch] ClipCap for Japanese", "url": "https://github.com/Japanese-Image-Captioning/ClipCap-for-Japanese", "project_name": "ClipCap-for-Japanese", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2022-10-05 00:44:54", "latest_commit": "2022-10-05 02:08:58", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "軽量で規制も検閲もない日本語ローカル LLM『LightChatAssistant-TypeB』による、簡単なノベル生成アシスタントです。ローカル特権の永続生成 Generate forever で、当たりガチャを積み上げます。読み上げにも対応。", "url": "https://github.com/zuntan03/easynovelassistant", "project_name": "easynovelassistant", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2024-04-22 15:59:35", "latest_commit": "2024-07-05 17:29:36", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "深層学習を用いたクイズ自動生成(日本語T5モデル)", "url": "https://github.com/sonoisa/deep-question-generation", "project_name": "deep-question-generation", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2021-03-07 17:01:23", "latest_commit": "2023-03-12 18:47:52", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "このライブラリは、ひらがな・カタカナ、半角・全角の相互変換や判別を始めとした機能を提供します。", "url": "https://github.com/samunohito/kanaria", "project_name": "kanaria", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2019-01-07 22:16:32", "latest_commit": "2024-06-28 09:48:18", "languages": [ "Rust", "C#", "Kotlin" ], "model_or_dataset": null }, { "description": "Trials of pre-trained BERT models for the medical domain in Japanese.", "url": "https://github.com/ou-medinfo/medbertjp", "project_name": "medbertjp", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2020-10-23 03:07:15", "latest_commit": "2020-11-22 08:37:27", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": "model" }, { "description": "Pretrained models, codes and guidances to pretrain official ALBERT(https://github.com/google-research/albert) on Japanese Wikipedia Resources", "url": "https://github.com/nknytk/albert-japanese-tinysegmenter", "project_name": "albert-japanese-tinysegmenter", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2023-06-04 15:21:32", "latest_commit": "2023-09-26 23:07:55", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "EasyLightChatAssistant は軽量で検閲や規制のないローカル日本語モデルのLightChatAssistant を、KoboldCpp で簡単にお試しする環境です。", "url": "https://github.com/zuntan03/easylightchatassistant", "project_name": "easylightchatassistant", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2024-04-06 17:40:16", "latest_commit": "2024-04-24 00:49:12", "languages": [], "model_or_dataset": "model" }, { "description": "贵樣ばこゐ辞畫を使て正レい日本语を使ラことが出來ゑ。", "url": "https://github.com/Rinrin0413/Ayashiy-Nipongo-Dic", "project_name": "Ayashiy-Nipongo-Dic", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2021-12-14 11:50:25", "latest_commit": "2024-05-01 15:23:24", "languages": [], "model_or_dataset": "dataset" }, { "description": "A Web Crawled Corpus for Japanese-Chinese NMT", "url": "https://github.com/zhang-jinyi/Web-Crawled-Corpus-for-Japanese-Chinese-NMT", "project_name": "Web-Crawled-Corpus-for-Japanese-Chinese-NMT", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2021-11-12 23:19:26", "latest_commit": "2023-10-01 03:13:18", "languages": [], "model_or_dataset": "dataset" }, { "description": "Coursera Corpus Mining and Multistage Fine-Tuning for Improving Lectures Translation", "url": "https://github.com/shyyhs/CourseraParallelCorpusMining", "project_name": "CourseraParallelCorpusMining", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2020-01-09 18:09:50", "latest_commit": "2022-06-14 14:18:29", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "歴史資料の市民参加型翻刻プラットフォーム「みんなで翻刻」のテキストデータ置き場です。 / Transcription texts created on Minna de Honkoku (https://honkoku.org), a crowdsourced transcription platform for historical Japanese documents.", "url": "https://github.com/yuta1984/honkoku-data", "project_name": "honkoku-data", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2020-02-23 16:47:55", "latest_commit": "2024-06-14 17:43:19", "languages": [], "model_or_dataset": "dataset" }, { "description": "JCommonsenseMorality is a dataset created through crowdsourcing that reflects the commonsense morality of Japanese annotators.", "url": "https://github.com/Language-Media-Lab/commonsense-moral-ja", "project_name": "commonsense-moral-ja", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2022-12-29 13:18:05", "latest_commit": "2023-12-18 11:41:00", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Japanese semantic test suite (FraCaS counterpart and extensions)", "url": "https://github.com/DaisukeBekki/JSeM", "project_name": "JSeM", "stargazers_count": 12, "source": "GitHub", "score": -0.2980974313152791, "first_commit": "2019-09-08 18:33:50", "latest_commit": "2023-05-05 14:03:37", "languages": [ "Haskell", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "A powerful text cleaner for Japanese web texts", "url": "https://github.com/ku-nlp/text-cleaning", "project_name": "text-cleaning", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2020-02-10 15:31:27", "latest_commit": "2022-11-21 10:21:56", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "An ambiguous subtitles dataset for visual scene-aware machine translation", "url": "https://github.com/ku-nlp/VISA", "project_name": "VISA", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2022-03-25 17:15:42", "latest_commit": "2022-10-17 10:34:12", "languages": [], "model_or_dataset": null }, { "description": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです", "url": "https://github.com/ken11/bert-japanese-ner-finetuning", "project_name": "bert-japanese-ner-finetuning", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2021-11-14 22:57:16", "latest_commit": "2022-06-19 16:24:15", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "script to evaluate pre-trained Japanese word2vec model on Japanese similarity dataset", "url": "https://github.com/shihono/evaluate_japanese_w2v", "project_name": "evaluate_japanese_w2v", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2020-02-07 08:02:49", "latest_commit": "2023-11-03 21:09:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Example code for prefix-tuning GPT/GPT-NeoX models and for inference with trained prefixes", "url": "https://github.com/rinnakk/prefix-tuning-gpt", "project_name": "prefix-tuning-gpt", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2022-09-05 16:56:23", "latest_commit": "2023-03-22 14:13:22", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A curated list of resources dedicated to Python libraries of NLP for Japanese", "url": "https://github.com/ikegami-yukino/asa-python", "project_name": "asa-python", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2017-09-13 22:02:47", "latest_commit": "2019-02-16 23:52:13", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Entity Linker.", "url": "https://github.com/izuna385/jel", "project_name": "jel", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2021-05-01 15:53:49", "latest_commit": "2021-07-25 13:01:46", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "条件付確率場とベイズ階層言語モデルの統合による半教師あり形態素解析", "url": "https://github.com/musyoku/python-npycrf", "project_name": "python-npycrf", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2017-07-01 18:00:52", "latest_commit": "2018-03-23 00:04:16", "languages": [ "C++", "Python" ], "model_or_dataset": null }, { "description": "About", "url": "https://github.com/shibuiwilliam/aozora_classification", "project_name": "aozora_classification", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2017-03-05 12:30:50", "latest_commit": "2017-09-03 12:01:40", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A Japanese law parser", "url": "https://github.com/takuyaa/ja-law-parser", "project_name": "ja-law-parser", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2023-11-26 19:38:36", "latest_commit": "2024-01-14 19:59:45", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "最新の日本語Wikipediaのダンプデータから,MeCabを用いてIPA辞書と最新のNeologd辞書の両方で形態素解析を実施し,その結果に基づいた word2vec,fastText,GloVeの単語分散表現を学習するためのスクリプト", "url": "https://github.com/kamigaito/jawiki_word_vector_updater", "project_name": "jawiki_word_vector_updater", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2019-02-13 16:18:58", "latest_commit": "2020-05-07 02:25:19", "languages": [], "model_or_dataset": "model" }, { "description": "Code to train Sentence BERT Japanese model for Hugging Face Model Hub", "url": "https://github.com/colorfulscoop/sbert-ja", "project_name": "sbert-ja", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2021-07-31 01:11:17", "latest_commit": "2021-08-08 15:47:30", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "This is a Japanese text corpus that consists of Wikipedia articles with various linguistic annotations.", "url": "https://github.com/ku-nlp/WikipediaAnnotatedCorpus", "project_name": "WikipediaAnnotatedCorpus", "stargazers_count": 11, "source": "GitHub", "score": -0.3010680790909261, "first_commit": "2022-09-20 18:33:20", "latest_commit": "2024-09-06 23:21:41", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "A processor for KyotoCorpus, KWDLC, and AnnotatedFKCCorpus", "url": "https://github.com/ku-nlp/kyoto-reader", "project_name": "kyoto-reader", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2019-11-15 20:46:54", "latest_commit": "2024-06-26 12:45:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Zunda: Japanese Enhanced Modality Analyzer client for Python.", "url": "https://github.com/ikegami-yukino/zunda-python", "project_name": "zunda-python", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2019-02-24 01:08:40", "latest_commit": "2019-11-30 18:44:15", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "wikipedia 日本語の文を、各種日本語の embeddings や faiss index へと変換するスクリプト等。", "url": "https://github.com/hotchpotch/wikipedia-passages-jawiki-embeddings-utils", "project_name": "wikipedia-passages-jawiki-embeddings-utils", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2023-11-14 11:34:23", "latest_commit": "2024-03-29 08:18:44", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Jisho is a CLI tool & Rust library that provides a Japanese-English dictionary.", "url": "https://github.com/eagleflo/jisho", "project_name": "jisho", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "Japanese version of LIWC2015", "url": "https://github.com/tasukuigarashi/j-liwc2015", "project_name": "j-liwc2015", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2021-09-04 09:11:35", "latest_commit": "2022-11-15 15:37:27", "languages": [], "model_or_dataset": "dataset" }, { "description": "青空文庫及びサピエの点字データから作成した振り仮名のデータセット", "url": "https://github.com/ndl-lab/huriganacorpus-aozora", "project_name": "huriganacorpus-aozora", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2021-08-31 16:37:20", "latest_commit": "2024-01-17 18:05:54", "languages": [], "model_or_dataset": "dataset" }, { "description": "ディープラーニングによる自然言語処理(共立出版)のサポートページです", "url": "https://github.com/python-nlp-book/python-nlp-book", "project_name": "python-nlp-book", "stargazers_count": 10, "source": "GitHub", "score": -0.3040387268665731, "first_commit": "2023-01-28 18:34:33", "latest_commit": "2023-05-07 23:55:27", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Easy wrapper for the postal code data of Japan", "url": "https://github.com/nagataaaas/Jusho", "project_name": "Jusho", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2020-11-12 03:39:08", "latest_commit": "2024-06-04 16:03:08", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Comparison of Japanese Sentence Segmentation Tools", "url": "https://github.com/hkiyomaru/ja-senter-benchmark", "project_name": "ja-senter-benchmark", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2023-02-10 12:58:42", "latest_commit": "2023-02-27 17:58:52", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "To investigate various DNN text classifiers including MLP, CNN, RNN, BERT approaches.", "url": "https://github.com/Masao-Taketani/japanese_text_classification", "project_name": "japanese_text_classification", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2019-07-01 18:33:17", "latest_commit": "2020-01-15 19:12:53", "languages": [ "Jupyter Notebook", "Python" ], "model_or_dataset": null }, { "description": "EventGraph is a development platform for high-level NLP applications in Japanese.", "url": "https://github.com/ku-nlp/pyknp-eventgraph", "project_name": "pyknp-eventgraph", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2019-11-21 21:46:46", "latest_commit": "2022-09-26 12:21:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A summarizer for Japanese articles.", "url": "https://github.com/ryuryukke/japanese_summarizer", "project_name": "japanese_summarizer", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2020-08-18 20:44:35", "latest_commit": "2022-08-01 20:28:21", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "WordCloudでの日本語文章をMecab(形態素解析エンジン)を使用せずに形態素解析チックな表示を実現するスクリプト", "url": "https://github.com/aocattleya/WordCloud-Japanese", "project_name": "WordCloud-Japanese", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2019-05-11 13:59:53", "latest_commit": "2020-01-02 06:45:49", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語ワードネットを利用したDBアクセスライブラリ", "url": "https://github.com/hiraokusky/snark", "project_name": "snark", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2018-12-22 18:37:03", "latest_commit": "2020-03-11 22:01:51", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese verb/adjective inflections tool", "url": "https://github.com/SmashinFries/PyKatsuyou", "project_name": "PyKatsuyou", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2022-01-01 01:25:25", "latest_commit": "2023-09-04 17:26:36", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "JaSPICE: Automatic Evaluation Metric Using Predicate-Argument Structures for Image Captioning Models", "url": "https://github.com/keio-smilab23/JaSPICE", "project_name": "JaSPICE", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2023-03-01 19:14:54", "latest_commit": "2023-11-08 21:16:53", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Spelling Error Corrector using BERT(Masked-Language Model). BERTに基づいて日本語校正", "url": "https://github.com/er-ri/bertjsc", "project_name": "bertjsc", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2023-03-16 11:05:59", "latest_commit": "2024-08-03 12:15:17", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A morphological analyzer using mecab dictionary", "url": "https://github.com/nakagami/awabi", "project_name": "awabi", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2020-03-13 08:08:26", "latest_commit": "2024-07-01 16:35:36", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "This is a repository for the AI LAB article \"係り受けに基づく日本語単語埋込 (Dependency-based Japanese Word Embeddings)\" ( Article URL https://ai-lab.lapras.com/nlp/japanese-word-embedding/)", "url": "https://github.com/lapras-inc/dependency-based-japanese-word-embeddings", "project_name": "dependency-based-japanese-word-embeddings", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2019-07-31 15:42:14", "latest_commit": "2019-08-14 11:39:35", "languages": [], "model_or_dataset": "model" }, { "description": "AMI Meeting Parallel Corpus", "url": "https://github.com/tsuruoka-lab/AMI-Meeting-Parallel-Corpus", "project_name": "AMI-Meeting-Parallel-Corpus", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2020-12-11 14:22:19", "latest_commit": "2020-12-11 16:41:42", "languages": [], "model_or_dataset": "dataset" }, { "description": "JGLUE: Japanese General Language Understanding Evaluation for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_JGLUE", "project_name": "huggingface-datasets_JGLUE", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2023-02-25 13:33:13", "latest_commit": "2024-05-21 11:23:51", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Mecon Audio(Medical Conference Audio)は厚生労働省主催の先進医療会議の議事録の読み上げデータセットです。", "url": "https://github.com/elith-co-jp/meconaudio", "project_name": "meconaudio", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2023-04-12 13:43:26", "latest_commit": "2023-10-26 08:40:27", "languages": [], "model_or_dataset": "dataset" }, { "description": "文法誤り訂正に関する日本語文献を収集・分類するためのリポジトリ", "url": "https://github.com/gotutiyan/GEC-Info-ja", "project_name": "GEC-Info-ja", "stargazers_count": 9, "source": "GitHub", "score": -0.30700937464222006, "first_commit": "2022-07-02 01:07:27", "latest_commit": "2024-03-27 01:15:21", "languages": [], "model_or_dataset": null }, { "description": "Python binding for Jagger(C++ implementation of Pattern-based Japanese Morphological Analyzer)", "url": "https://github.com/lighttransport/jagger-python", "project_name": "jagger-python", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2023-12-27 22:09:07", "latest_commit": "2024-03-12 02:15:59", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語文を音素列へ変換するスクリプト", "url": "https://github.com/korguchi/text2phoneme", "project_name": "text2phoneme", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2023-04-25 22:53:08", "latest_commit": "2023-05-17 00:44:01", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese version of DNorm", "url": "https://github.com/sociocom/DNorm-J", "project_name": "DNorm-J", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2020-05-07 13:47:43", "latest_commit": "2022-06-30 12:09:11", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese Medical Information Extraction Toolkit", "url": "https://github.com/racerandom/JaMIE", "project_name": "JaMIE", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2019-10-09 11:44:21", "latest_commit": "2023-05-18 05:19:18", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Modifying LAVIS' BLIP2 Q-former with models pretrained on Japanese datasets.", "url": "https://github.com/ZhaoPeiduo/BLIP2-Japanese", "project_name": "BLIP2-Japanese", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2023-05-31 22:24:24", "latest_commit": "2024-01-16 08:54:50", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese / English Bilingual LLM", "url": "https://github.com/shisa-ai/shisa-v2", "project_name": "shisa-v2", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2024-03-29 15:12:36", "latest_commit": "2024-07-30 00:26:10", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "mecab-ipadic seed dictionary reader", "url": "https://github.com/takuyaa/mecab-ipadic-seed", "project_name": "mecab-ipadic-seed", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2016-07-18 01:26:14", "latest_commit": "2016-07-30 19:09:57", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "We pretrained a RoBERTa-based Japanese masked language model on paper abstracts from the academic database CiNii Articles.", "url": "https://github.com/EhimeNLP/AcademicRoBERTa", "project_name": "AcademicRoBERTa", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2022-09-05 15:58:57", "latest_commit": "2023-05-16 13:48:22", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "open source mozc dictionaryをMeCab辞書のフォーマットに変換したものです。", "url": "https://github.com/akirakubo/mecab-mozcdic", "project_name": "mecab-mozcdic", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2017-07-21 20:37:42", "latest_commit": "2018-01-12 16:07:57", "languages": [], "model_or_dataset": "dataset" }, { "description": "Anthy is a kana-kanji conversion engine for Japanese. It converts roma-ji to kana, and the kana text to a mixed kana and kanji.", "url": "https://github.com/netsphere-labs/anthy", "project_name": "anthy", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2010-05-14 14:19:43", "latest_commit": "2023-02-25 21:37:47", "languages": [ "C", "C++" ], "model_or_dataset": "dataset" }, { "description": "A Japanese Corpus of Many Specialized Domains (JCMS)", "url": "https://github.com/shigashiyama/jcms", "project_name": "jcms", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2022-11-07 15:44:38", "latest_commit": "2022-11-07 16:40:03", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "NDL古典籍OCR学習用データセット(みんなで翻刻加工データ)", "url": "https://github.com/ndl-lab/ndl-minhon-ocrdataset", "project_name": "ndl-minhon-ocrdataset", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2023-01-19 15:22:00", "latest_commit": "2024-02-07 14:16:11", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Word Familiarity Rate for 'Word List by Semantic Principles (WLSP)'", "url": "https://github.com/masayu-a/WLSP-familiarity", "project_name": "WLSP-familiarity", "stargazers_count": 8, "source": "GitHub", "score": -0.309980022417867, "first_commit": "2019-03-13 10:26:42", "latest_commit": "2024-06-30 15:05:18", "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese text parser to separate Hiragana/Katakana string into morae (syllables).", "url": "https://github.com/tachi-hi/jamorasep", "project_name": "jamorasep", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2023-03-08 22:54:34", "latest_commit": "2023-09-09 01:14:14", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "NDLOCR用テキスト認識モジュール", "url": "https://github.com/ndl-lab/text_recognition", "project_name": "text_recognition", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2022-03-30 13:32:26", "latest_commit": "2023-07-10 18:03:39", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Lookup japanese words by radical patterns", "url": "https://github.com/itayperl/kantan", "project_name": "kantan", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2015-07-15 12:45:17", "latest_commit": "2022-07-20 20:10:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Generates plain or tokenized text files from the Aozora Bunko", "url": "https://github.com/borh/aozora-corpus-generator", "project_name": "aozora-corpus-generator", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2017-10-09 16:11:26", "latest_commit": "2022-04-04 13:15:59", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Data Augmentation for Japanese Text on AugLy", "url": "https://github.com/chck/AugLy-jp", "project_name": "AugLy-jp", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2021-06-13 18:48:45", "latest_commit": "2021-09-30 13:16:24", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Juliusを使ったセグメンテーション支援ツール", "url": "https://github.com/Hiroshiba/julius4seg", "project_name": "julius4seg", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2018-02-20 23:19:39", "latest_commit": "2021-08-22 18:57:51", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Sentiment Analysis in Japanese. sentiment_ja with JavaScript", "url": "https://github.com/otodn/sentiment_ja_js", "project_name": "sentiment_ja_js", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2021-12-08 22:48:44", "latest_commit": "2021-12-10 00:56:29", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Sudachi向け万病辞書", "url": "https://github.com/yagays/manbyo-sudachi", "project_name": "manbyo-sudachi", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2021-04-05 15:59:36", "latest_commit": "2021-04-06 18:04:00", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "AWSサービス名のGoogle日本語入力向けの辞書", "url": "https://github.com/konyu/aws_dic_for_google_ime", "project_name": "aws_dic_for_google_ime", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2019-11-24 03:47:55", "latest_commit": "2019-11-30 09:28:33", "languages": [ "JavaScript", "Ruby" ], "model_or_dataset": "dataset" }, { "description": "Allows Sudachi to normalize from hiragana to katakana from any compound word list", "url": "https://github.com/po3rin/hirakanadic", "project_name": "hirakanadic", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2021-06-22 21:38:13", "latest_commit": "2023-07-08 17:22:15", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Dataset for the LREC-COLING 2024 paper \"A Gaze-grounded Visual Question Answering Dataset for Clarifying Ambiguous Japanese Questions\"", "url": "https://github.com/riken-grp/GazeVQA", "project_name": "GazeVQA", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2024-02-22 06:53:00", "latest_commit": "2024-05-20 11:17:25", "languages": [], "model_or_dataset": "dataset" }, { "description": "Google Colabで日本語テキスト推論を試す", "url": "https://github.com/verypluming/JapaneseNLI", "project_name": "JapaneseNLI", "stargazers_count": 7, "source": "GitHub", "score": -0.31295067019351397, "first_commit": "2020-03-10 22:43:24", "latest_commit": "2021-06-08 23:48:55", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Yet Another Japanese Dependency Structure Analyzer", "url": "https://github.com/ikegami-yukino/cabocha", "project_name": "cabocha", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2011-07-29 04:08:14", "latest_commit": "2022-08-17 22:24:13", "languages": [ "C++", "Java", "Perl", "Python", "Ruby", "C" ], "model_or_dataset": null }, { "description": "Converts character span label information to tokenized text-based label information.", "url": "https://github.com/ken11/noyaki", "project_name": "noyaki", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2021-05-23 18:41:17", "latest_commit": "2022-08-25 18:42:55", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "BERTによる日本語固有表現抽出のファインチューニング用プログラム", "url": "https://github.com/jurabiinc/bert-ner-japanese", "project_name": "bert-ner-japanese", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2022-09-26 18:20:34", "latest_commit": "2022-09-26 21:44:38", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Masked Language Model-based Scoring for Japanese and Vietnamese", "url": "https://github.com/minhpqn/jmlm_scoring", "project_name": "jmlm_scoring", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2021-08-13 09:18:33", "latest_commit": "2022-02-20 22:39:25", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Swallowプロジェクト 大規模言語モデル 評価スクリプト", "url": "https://github.com/swallow-llm/swallow-evaluation", "project_name": "swallow-evaluation", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2023-10-30 14:34:19", "latest_commit": "2024-07-13 21:29:54", "languages": [ "Python", "Jupyter Notebook", "C++" ], "model_or_dataset": null }, { "description": "A PyTorch implementation of the Japanese Predicate-Argument Structure (PAS) analyser presented in the paper of Matsubayashi & Inui (2018) with some improvements.", "url": "https://github.com/cl-tohoku/showcase", "project_name": "showcase", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2018-06-26 13:19:53", "latest_commit": "2018-06-26 16:53:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Feature annotator to morphemes and phrases based on KNP rule files (pure-Python)", "url": "https://github.com/megagonlabs/desuwa", "project_name": "desuwa", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2021-04-20 16:37:43", "latest_commit": "2022-05-23 12:27:37", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "ChirpText is a collection of text processing tools for Python.", "url": "https://github.com/letuananh/chirptext", "project_name": "chirptext", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2014-12-01 20:39:06", "latest_commit": "2022-10-04 21:57:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Mixture of Unigram Model and Infinite Mixture of Unigram Model in Python. (混合ユニグラムモデルと無限混合ユニグラムモデル)", "url": "https://github.com/KentoW/mixture-of-unigram-model", "project_name": "mixture-of-unigram-model", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2016-01-05 15:33:24", "latest_commit": "2017-06-16 13:33:57", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Reproduced package based on Masked Language Model Scoring (ACL2020).", "url": "https://github.com/Ryutaro-A/mlm-scoring-transformers", "project_name": "mlm-scoring-transformers", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2022-10-03 14:27:09", "latest_commit": "2022-12-14 20:07:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A tool for Japanese-English translation and English-Japanese translation by using FuguMT", "url": "https://github.com/tos-kamiya/tra-fugu", "project_name": "tra-fugu", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2023-02-28 21:41:49", "latest_commit": "2023-03-02 03:10:38", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Wikipediaの日本語記事を元に、ユーザの質問に回答するGradioベースのRAGのサンプル", "url": "https://github.com/lawofcycles/wikipedia-japanese-open-rag", "project_name": "wikipedia-japanese-open-rag", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2023-12-28 07:55:40", "latest_commit": "2024-01-06 19:46:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本語音声に対して音素ラベルをアラインメントするためのツールです", "url": "https://github.com/dwangomediavillage/pydomino", "project_name": "pydomino", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2024-05-27 10:52:07", "latest_commit": "2024-07-23 11:16:10", "languages": [ "JavaScript", "C++", "Python" ], "model_or_dataset": null }, { "description": "Magpieという手法とNemotron-4-340B-Instructを用いて合成対話データセットを作るコード", "url": "https://github.com/aratako/magpie-nemotron", "project_name": "magpie-nemotron", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2024-07-05 21:07:54", "latest_commit": "2024-07-05 23:35:16", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "[Mirror] CLI program for transliterating romaji text to either hiragana or katakana", "url": "https://github.com/gbrlsnchs/kana", "project_name": "kana", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2022-06-28 22:39:35", "latest_commit": "2023-02-10 19:03:27", "languages": [], "model_or_dataset": null }, { "description": "Japanese glossator for assisted reading of text using Ichiran", "url": "https://github.com/Netdex/niinii", "project_name": "niinii", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2021-06-27 18:31:51", "latest_commit": "2024-08-11 11:54:57", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "AITuberの基礎となる部分を開発しています", "url": "https://github.com/M-gen/AITuberDegikkoMirii", "project_name": "AITuberDegikkoMirii", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2023-03-07 21:35:01", "latest_commit": "2023-03-18 06:59:43", "languages": [ "C#" ], "model_or_dataset": null }, { "description": "Japanese Translation of Winograd Schema Challenge", "url": "https://github.com/ku-nlp/Winograd-Schema-Challenge-Ja", "project_name": "Winograd-Schema-Challenge-Ja", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2019-01-25 19:14:29", "latest_commit": "2019-01-25 23:52:18", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "This is the corpus of Japanese Text that general policy speech of prime minister of Japan", "url": "https://github.com/yuukimiyo/GeneralPolicySpeechOfPrimeMinisterOfJapan", "project_name": "GeneralPolicySpeechOfPrimeMinisterOfJapan", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2014-10-28 00:56:59", "latest_commit": "2020-01-14 09:53:30", "languages": [], "model_or_dataset": "dataset" }, { "description": "Dialogue Commonsense Graph in Japanese", "url": "https://github.com/nlp-waseda/dcsg-ja", "project_name": "dcsg-ja", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2023-01-04 17:36:44", "latest_commit": "2023-03-10 17:29:26", "languages": [], "model_or_dataset": "dataset" }, { "description": "なんとかデータベース( https://supleks.jp/ )からのスクレイピングツールと収集データ", "url": "https://github.com/nuko-yokohama/ramendb", "project_name": "ramendb", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2018-03-24 21:21:59", "latest_commit": "2024-08-06 10:47:04", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Code for J-CRe3 experiments (Ueda et al., LREC-COLING, 2024)", "url": "https://github.com/riken-grp/J-CRe3", "project_name": "J-CRe3", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2024-03-04 01:40:27", "latest_commit": "2024-06-14 18:24:05", "languages": [], "model_or_dataset": "dataset" }, { "description": "OCR処理プログラム研究開発事業において作成されたOCR学習用データセット", "url": "https://github.com/ndl-lab/pdmocrdataset-part2", "project_name": "pdmocrdataset-part2", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2022-05-24 10:37:30", "latest_commit": "2024-06-26 16:10:28", "languages": [], "model_or_dataset": "dataset" }, { "description": "「自然言語処理の教科書」サポートサイト", "url": "https://github.com/mamorlis/nlpbook", "project_name": "nlpbook", "stargazers_count": 6, "source": "GitHub", "score": -0.31592131796916095, "first_commit": "2024-05-16 08:44:03", "latest_commit": "2024-07-03 14:25:21", "languages": [], "model_or_dataset": null }, { "description": "Japanese Tokenizer for transformers library", "url": "https://github.com/retarfi/jptranstokenizer", "project_name": "jptranstokenizer", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2022-08-24 19:35:03", "latest_commit": "2024-02-03 03:07:59", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "How do different tokenizers perform on downstream tasks in scriptio continua languages?: A case study in Japanese-ACL SRW 2023", "url": "https://github.com/hitachi-nlp/compare-ja-tokenizer", "project_name": "compare-ja-tokenizer", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2023-06-13 18:08:18", "latest_commit": "2023-06-16 10:18:55", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Wikipedia Cleaner", "url": "https://github.com/hppRC/jawiki-cleaner", "project_name": "jawiki-cleaner", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2021-02-21 20:04:59", "latest_commit": "2021-02-21 21:41:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Hidden Markov Model (HMM) and Infinite Hidden Markov Model (iHMM) in Python. (隠れマルコフモデルと無限隠れマルコフモデル)", "url": "https://github.com/KentoW/hidden-markov-model", "project_name": "hidden-markov-model", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2016-10-17 14:33:56", "latest_commit": "2017-06-16 13:33:24", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Ngram language model in Python. (Nグラム言語モデル)", "url": "https://github.com/KentoW/Ngram-language-model", "project_name": "Ngram-language-model", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2017-12-05 14:45:11", "latest_commit": "2017-12-05 15:03:52", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Yet another Japanese IME for IBus/Linux", "url": "https://github.com/tokuhirom/akaza", "project_name": "akaza", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "Whisperのデコーダをllm-jp-1.3b-v1.0に置き換えた音声認識モデルを学習させるためのコード", "url": "https://github.com/tosiyuki/llm-jp-asr", "project_name": "llm-jp-asr", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2024-09-07 22:07:43", "latest_commit": "2024-09-07 22:57:13", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "elmo-japanese", "url": "https://github.com/cl-tohoku/elmo-japanese", "project_name": "elmo-japanese", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2019-10-01 12:16:29", "latest_commit": "2019-10-07 10:37:31", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "日本語タスクにおけるLLMを用いた疑似学習データ生成の検討", "url": "https://github.com/retrieva/japagen", "project_name": "japagen", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2024-01-12 10:53:54", "latest_commit": "2024-08-09 17:41:21", "languages": [], "model_or_dataset": null }, { "description": "This directory includes a giant Japanese-English subtitle corpus. The raw data comes from the Stanford’s JESC project.", "url": "https://github.com/DayuanJiang/giant_ja-en_parallel_corpus", "project_name": "giant_ja-en_parallel_corpus", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2019-08-04 12:01:19", "latest_commit": "2019-08-04 17:40:02", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "This repository contains the Japanese–English bilingual chat corpus BPersona-chat published in the paper Chat Translation Error Detection for Assisting Cross-lingual Communications at AACL-IJCNLP 2022's Workshop Eval4NLP 2022.", "url": "https://github.com/cl-tohoku/BPersona-chat", "project_name": "BPersona-chat", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2022-10-10 14:15:10", "latest_commit": "2023-01-12 17:39:24", "languages": [], "model_or_dataset": "dataset" }, { "description": "Chunked word annotation for ITA corpus", "url": "https://github.com/shirayu/ita-corpus-chuwa", "project_name": "ita-corpus-chuwa", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2021-07-16 21:19:53", "latest_commit": "2021-08-25 12:22:06", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "A cross-linguistic study of pronoun substitutes and address terms", "url": "https://github.com/matbahasa/ProSub", "project_name": "ProSub", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2021-09-12 18:55:13", "latest_commit": "2024-06-02 19:06:13", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "AllenNLP-NER-ja: AllenNLP による日本語を対象とした固有表現抽出", "url": "https://github.com/shunk031/allennlp-NER-ja", "project_name": "allennlp-NER-ja", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2022-05-05 10:28:23", "latest_commit": "2022-05-10 00:52:35", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Experiment for Japanese Text classification using chariot and PyTorch", "url": "https://github.com/ymym3412/chariot-PyTorch-Japanese-text-classification", "project_name": "chariot-PyTorch-Japanese-text-classification", "stargazers_count": 5, "source": "GitHub", "score": -0.31889196574480794, "first_commit": "2019-03-02 15:04:41", "latest_commit": "2019-03-19 02:51:30", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A tool to perform sentence segmentation on Japanese text", "url": "https://github.com/mkartawijaya/hasami", "project_name": "hasami", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2020-12-30 21:32:13", "latest_commit": "2021-02-21 14:39:11", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A BERT model for nagisa", "url": "https://github.com/taishi-i/nagisa_bert", "project_name": "nagisa_bert", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2022-09-26 03:45:52", "latest_commit": "2023-12-23 16:14:09", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "日本語文を絵文字だけの文に変換するなにか", "url": "https://github.com/mkan0141/toEmoji", "project_name": "toEmoji", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2018-02-25 14:52:07", "latest_commit": "2018-04-16 00:59:20", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese inflection converter", "url": "https://github.com/hkiyomaru/jinf", "project_name": "jinf", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2022-02-03 17:59:01", "latest_commit": "2022-12-27 10:28:22", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This repository provides the code for Japanese NLI model, a fine-tuned masked language model.", "url": "https://github.com/CyberAgentAILab/japanese-nli-model", "project_name": "japanese-nli-model", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2022-10-26 17:42:42", "latest_commit": "2022-10-26 17:42:42", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Kanji transliteration to hiragana/katakana/romaji, in Go", "url": "https://github.com/sarumaj/go-kakasi", "project_name": "go-kakasi", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2024-02-11 19:08:03", "latest_commit": "2024-03-03 18:23:56", "languages": [ "Go" ], "model_or_dataset": null }, { "description": "Japanese text normalizer for mecab-neologd", "url": "https://github.com/ikegami-yukino/neologdn-java", "project_name": "neologdn-java", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2017-02-20 18:07:05", "latest_commit": "2021-10-11 22:35:59", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "Azure OpenAIを活用したアプリケーション実装のリファレンスを目的として、アプリのサンプル(リファレンスアーキテクチャ、サンプルコードとデプロイ手順)を無償提供しています。", "url": "https://github.com/azure-samples/jp-azureopenai-samples", "project_name": "jp-azureopenai-samples", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2023-06-21 21:27:33", "latest_commit": "2024-08-16 11:31:48", "languages": [ "Python", "JavaScript", "TypeScript" ], "model_or_dataset": null }, { "description": "A Japanese Kanji Flashcard App built using Python and Langchain, enhanced with the intelligence of GPT-4.", "url": "https://github.com/adilmoujahid/kanji-flashcard-app-gpt4", "project_name": "kanji-flashcard-app-gpt4", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2023-10-17 23:33:19", "latest_commit": "2023-10-17 23:41:17", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "google-vs-deepl-je", "url": "https://github.com/Tzawa/google-vs-deepl-je", "project_name": "google-vs-deepl-je", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2020-03-22 19:45:11", "latest_commit": "2020-03-22 23:27:00", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Japanese-Wikipedia Wikification Corpus", "url": "https://github.com/wikiwikification/jawikicorpus", "project_name": "jawikicorpus", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2018-04-05 01:07:48", "latest_commit": "2018-11-24 16:44:02", "languages": [], "model_or_dataset": "dataset" }, { "description": "WRIME for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_wrime", "project_name": "huggingface-datasets_wrime", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2023-01-12 10:43:54", "latest_commit": "2023-01-15 12:39:01", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "日本語学習者のための科学技術業界でよく使われる片仮名と元の単語対照表", "url": "https://github.com/laoshubaby/japanese-technical-dict", "project_name": "japanese-technical-dict", "stargazers_count": 4, "source": "GitHub", "score": -0.32186261352045487, "first_commit": "2024-01-08 14:44:52", "latest_commit": "2024-06-18 00:54:03", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Simple Python package (CLI/Python API) for getting japanese readings (yomigana) and accents using MeCab.", "url": "https://github.com/34j/mecab-text-cleaner", "project_name": "mecab-text-cleaner", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2023-09-01 16:18:34", "latest_commit": "2024-03-29 00:06:20", "languages": [ "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "Yet Another Fast Japanese String Converter", "url": "https://github.com/Hizuru3/python-habachen", "project_name": "python-habachen", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2023-10-04 07:40:00", "latest_commit": "2024-01-21 10:29:31", "languages": [ "JavaScript", "Python", "Jupyter Notebook", "C" ], "model_or_dataset": null }, { "description": "Japanese Text Segmenter for Python written in Rust", "url": "https://github.com/alinear-corp/kuzukiri", "project_name": "kuzukiri", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-11-20 00:05:51", "latest_commit": "2024-06-11 16:43:31", "languages": [ "Python", "Rust", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Example codes for Japanese Realistic Textual Entailment Corpus", "url": "https://github.com/megagonlabs/jrte-corpus_example", "project_name": "jrte-corpus_example", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-08-23 09:46:30", "latest_commit": "2021-11-19 13:11:47", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Code to perform finetuning of the mBART model.", "url": "https://github.com/ken11/mbart-finetuning", "project_name": "mbart-finetuning", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-10-14 00:05:39", "latest_commit": "2021-10-14 00:16:10", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Twitter日本語評判分析データセットのためのツイートダウンローダ", "url": "https://github.com/tatHi/tweet_extructor", "project_name": "tweet_extructor", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2018-06-20 16:23:11", "latest_commit": "2022-08-28 13:30:18", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", "url": "https://github.com/CjangCjengh/japanese_chatbot", "project_name": "japanese_chatbot", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "A state-of-the-art open-source Japanese <--> English machine translation system based on the latest NMT research.", "url": "https://github.com/matthewbieda/jp-translate.cloud", "project_name": "jp-translate.cloud", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2022-02-21 12:23:42", "latest_commit": "2024-09-05 09:24:02", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Comparison code of various tokenizers", "url": "https://github.com/legalforce-research/tokenizer-speed-bench", "project_name": "tokenizer-speed-bench", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-10-28 21:38:49", "latest_commit": "2023-03-01 14:07:29", "languages": [ "Rust", "Java", "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "裁判例のデータ一覧を裁判所のホームページ(https://www.courts.go.jp/index.html) をスクレイピングして生成するソフトウェア", "url": "https://github.com/japanese-law-analysis/listup_precedent", "project_name": "listup_precedent", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2023-01-15 08:01:44", "latest_commit": "2024-05-11 22:30:27", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "a User-Extensible Morphological Analyzer for Japanese. 日本語形態素解析システム", "url": "https://github.com/thammin/juman-bin", "project_name": "juman-bin", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2017-05-11 13:18:41", "latest_commit": "2017-05-11 13:53:25", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Well, a different Japanese word everyday.", "url": "https://github.com/LuanRT/Japanese-Word-Of-The-Day", "project_name": "Japanese-Word-Of-The-Day", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-07-10 00:03:05", "latest_commit": "2021-08-11 03:03:38", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "Yet Another Japanese-Wikipedia Entity Vectors", "url": "https://github.com/wikiwikification/jawikivec", "project_name": "jawikivec", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2018-06-04 01:22:54", "latest_commit": "2018-11-24 17:10:01", "languages": [], "model_or_dataset": "model" }, { "description": "MonsterHunterのユーザー辞書を作りたい…", "url": "https://github.com/utubo/mh-dict-jp", "project_name": "mh-dict-jp", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2023-04-27 20:01:36", "latest_commit": "2023-11-27 13:46:48", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "Small Japanese-English Subtitle Corpus", "url": "https://github.com/yusugomori/jesc_small", "project_name": "jesc_small", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2019-07-06 12:36:45", "latest_commit": "2019-07-06 12:49:24", "languages": [], "model_or_dataset": "dataset" }, { "description": "日本語の対話データ for seq2seq etc", "url": "https://github.com/MokkeMeguru/japanese-corpus", "project_name": "japanese-corpus", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2018-08-19 04:18:55", "latest_commit": "2018-10-09 19:08:10", "languages": [], "model_or_dataset": "dataset" }, { "description": "An extension of the BSD corpus with audio and speaker attribute information", "url": "https://github.com/ku-nlp/speechBSD", "project_name": "speechBSD", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2022-05-30 11:17:29", "latest_commit": "2024-02-07 22:15:37", "languages": [], "model_or_dataset": "dataset" }, { "description": "言語処理学会年次大会講演の全リスト・機械可読版など", "url": "https://github.com/whym/anlp-jp-history", "project_name": "anlp-jp-history", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2015-08-12 17:40:32", "latest_commit": "2024-04-05 19:33:15", "languages": [ "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "日本語の言語モデルのハンズオン資料です", "url": "https://github.com/hnishi/handson-language-models", "project_name": "handson-language-models", "stargazers_count": 3, "source": "GitHub", "score": -0.32483326129610185, "first_commit": "2021-03-13 15:29:37", "latest_commit": "2021-03-18 19:33:34", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Python binding for J.DepP(C++ implementation of Japanese Dependency Parsers)", "url": "https://github.com/lighttransport/jdepp-python", "project_name": "jdepp-python", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2024-01-01 23:34:44", "latest_commit": "2024-02-14 22:09:31", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Convert japanese kana from ba-bi-bu-be-bo into va-vi-vu-ve-vo", "url": "https://github.com/eggplants/wiredify", "project_name": "wiredify", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-09-03 09:17:14", "latest_commit": "2023-12-19 01:01:46", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Utsuho is a Python module that facilitates bidirectional conversion between half-width katakana and full-width katakana in Japanese.", "url": "https://github.com/juno-rmks/utsuho", "project_name": "utsuho", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-03-27 11:07:55", "latest_commit": "2023-11-20 08:53:43", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Deploying sentiment analysis server with FastAPI and BERT", "url": "https://github.com/izuna385/Japanese-BERT-Sentiment-Analyzer", "project_name": "Japanese-BERT-Sentiment-Analyzer", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2021-03-17 22:22:31", "latest_commit": "2021-04-20 00:41:29", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Ishi: A volition classifier for Japanese", "url": "https://github.com/ku-nlp/ishi", "project_name": "ishi", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2019-12-21 17:24:25", "latest_commit": "2020-05-15 22:18:26", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "The official repository for \"UnihanLM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database\", AACL-IJCNLP 2020", "url": "https://github.com/JetRunner/unihan-lm", "project_name": "unihan-lm", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2020-09-14 16:41:10", "latest_commit": "2020-11-06 10:12:50", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Haystack + Elasticsearch + wikipedia(ja) を用いた、日本語の質問応答システムのサンプル", "url": "https://github.com/Shingo-Kamata/japanese_qa_demo_with_haystack_and_es", "project_name": "japanese_qa_demo_with_haystack_and_es", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-12-18 19:21:06", "latest_commit": "2022-12-19 03:57:34", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Trimatch: An (Exact|Prefix|Approximate) String Matching Library", "url": "https://github.com/tuem/trimatch", "project_name": "trimatch", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2021-08-30 17:26:15", "latest_commit": "2024-02-08 10:06:06", "languages": [ "C++" ], "model_or_dataset": null }, { "description": "A Tantivy tokenizer using Vibrato.", "url": "https://github.com/akr4/tantivy-vibrato", "project_name": "tantivy-vibrato", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-08-25 13:31:30", "latest_commit": "2023-01-19 10:12:17", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Finding all pairs of similar documents time- and memory-efficiently", "url": "https://github.com/legalforce-research/find-simdoc", "project_name": "find-simdoc", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-08-31 12:56:10", "latest_commit": "2022-09-27 11:39:27", "languages": [ "Rust", "Python" ], "model_or_dataset": null }, { "description": "Here provides benchmark tools to compare the performance of data structures for string matching.", "url": "https://github.com/legalforce-research/stringmatch-bench", "project_name": "stringmatch-bench", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-09-29 12:24:29", "latest_commit": "2022-09-30 11:36:55", "languages": [ "Python", "Rust" ], "model_or_dataset": null }, { "description": "Learn Japanese vocabs 🇯🇵 by doing quizzes on CLI!", "url": "https://github.com/tysonwu/japanki", "project_name": "japanki", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-10-04 17:43:27", "latest_commit": "2023-10-17 01:00:00", "languages": [ "Rust" ], "model_or_dataset": null }, { "description": "Predict pitch accent in Japanese", "url": "https://github.com/shirakaba/pitch-accent", "project_name": "pitch-accent", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-08-09 13:36:52", "latest_commit": "2023-09-08 08:19:43", "languages": [ "TypeScript", "Python", "JavaScript" ], "model_or_dataset": null }, { "description": "GPT-2 Japanese model for HuggingFace's transformers", "url": "https://github.com/colorfulscoop/gpt-ja", "project_name": "gpt-ja", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2021-06-13 13:14:18", "latest_commit": "2021-09-27 21:08:39", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "We pretrained a BART-based Japanese masked language model on paper abstracts from the academic database CiNii Articles", "url": "https://github.com/EhimeNLP/AcademicBART", "project_name": "AcademicBART", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-01-11 16:19:21", "latest_commit": "2024-07-11 22:09:11", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "電音IME: Microsoft IMEなどで利用することを想定した「電音部」関連用語の辞書", "url": "https://github.com/albno273/denonbu-ime-dic", "project_name": "denonbu-ime-dic", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2019-12-30 01:37:16", "latest_commit": "2022-11-13 23:09:25", "languages": [], "model_or_dataset": "dataset" }, { "description": "Anthy maintenance", "url": "https://github.com/xorgy/anthy", "project_name": "anthy", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2013-06-30 11:09:24", "latest_commit": "2013-07-27 22:45:26", "languages": [ "C" ], "model_or_dataset": "dataset" }, { "description": "日本の国会議員のデータ", "url": "https://github.com/sugi2000/Data-on-Japanese-Diet-Members", "project_name": "Data-on-Japanese-Diet-Members", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-07-02 22:58:11", "latest_commit": "2022-09-29 13:37:11", "languages": [], "model_or_dataset": "dataset" }, { "description": "2023年1月にリニューアルしたNDL Ngram Viewerのソースコード等一式", "url": "https://github.com/ndl-lab/ndlngramviewer_v2", "project_name": "ndlngramviewer_v2", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2022-12-26 13:29:58", "latest_commit": "2023-07-20 11:05:53", "languages": [ "Java", "TypeScript", "JavaScript", "Jupyter Notebook", "C++" ], "model_or_dataset": "dataset" }, { "description": "Japanese Livedoor news corpus for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_livedoor-news-corpus", "project_name": "huggingface-datasets_livedoor-news-corpus", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-01-17 23:16:18", "latest_commit": "2023-10-28 14:40:17", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "jpn_explainable_qa_dataset", "url": "https://github.com/aiishii/jpn_explainable_qa_dataset", "project_name": "jpn_explainable_qa_dataset", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "JEMHopQA (Japanese Explainable Multi-hop Question Answering) is a Japanese multi-hop QA dataset that can evaluate internal reasoning.", "url": "https://github.com/aiishii/jemhopqa", "project_name": "jemhopqa", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-08-01 02:07:45", "latest_commit": "2024-06-05 14:54:08", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Dataset of paper \"Verification of Chain-of-Thought Prompting in Japanese\"", "url": "https://github.com/nlp-waseda/chain-of-thought-ja-dataset", "project_name": "chain-of-thought-ja-dataset", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2023-03-03 13:17:07", "latest_commit": "2023-09-14 10:14:39", "languages": [], "model_or_dataset": "dataset" }, { "description": "【2024年版】BERTによるテキスト分類", "url": "https://github.com/hpprc/bert-classification-tutorial-2024", "project_name": "bert-classification-tutorial-2024", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": "2024-05-28 10:50:41", "latest_commit": "2024-07-08 17:10:46", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Fine-Tuning Google Gemma for Japanese Instructions", "url": "https://github.com/qianniu95/gemma2_2b_finetune_jp_tutorial/blob/main/Gemma2_2b_Japanese_finetuning_colab.ipynb", "project_name": "Gemma2_2b_Japanese_finetuning_colab.ipynb", "stargazers_count": 2, "source": "GitHub", "score": -0.32780390907174883, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード", "url": "https://github.com/aih-uth/joint-information-extraction-hs", "project_name": "joint-information-extraction-hs", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2021-11-05 12:32:31", "latest_commit": "2021-11-17 12:29:39", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "T5による会話生成", "url": "https://github.com/Jinyamyzk/t5_japanese_dialogue_generation", "project_name": "t5_japanese_dialogue_generation", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2021-11-19 18:32:46", "latest_commit": "2021-11-28 10:48:04", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "A framework for few-shot evaluation of autoregressive language models.", "url": "https://github.com/tdc-yamada-ya/lm-evaluation-harness-jp-stable", "project_name": "lm-evaluation-harness-jp-stable", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2020-08-27 18:08:04", "latest_commit": "2023-06-19 10:48:40", "languages": [ "Python", "C++" ], "model_or_dataset": null }, { "description": "A python library to convert Japanese to phoneme.", "url": "https://github.com/iory/japanese2phoneme", "project_name": "japanese2phoneme", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2021-02-09 16:27:24", "latest_commit": "2022-02-24 16:11:04", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This repository contains codes related to the experiments in \"An Experimental Evaluation of Japanese Tokenizers for Sentiment-Based Text Classification\"", "url": "https://github.com/arusl/anlp_nlp2021_d3-1", "project_name": "anlp_nlp2021_d3-1", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2021-03-12 02:34:47", "latest_commit": "2022-03-08 13:40:28", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "Japanese-Dialog-Transformerの応答候補に対して、KenLMによるN-gram言語モデルでスコアリングし、フィルタリング若しくはリランキングを行う。", "url": "https://github.com/TUT-SLP-lab/JDT-with-KenLM-scoring", "project_name": "JDT-with-KenLM-scoring", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2022-06-28 09:22:47", "latest_commit": "2022-07-01 21:29:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Aggregating Japanese words based on Juman++ and ConceptNet5.5", "url": "https://github.com/hkiyomaru/japanese-word-aggregation", "project_name": "japanese-word-aggregation", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2018-08-08 13:39:47", "latest_commit": "2018-08-20 07:51:01", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "[PyTorch] Show, Attend and Tell for Japanese", "url": "https://github.com/Japanese-Image-Captioning/SAT-for-Japanese", "project_name": "SAT-for-Japanese", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2022-10-04 23:48:47", "latest_commit": "2022-10-05 00:40:09", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A repo for evaluating Japanese LLMs ・ 日本語LLMを評価するレポ", "url": "https://github.com/lightblue-tech/japanese_llm_eval", "project_name": "japanese_llm_eval", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2024-03-28 14:56:01", "latest_commit": "2024-04-22 07:39:03", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "日本語データセットでのqlora instruction tuning学習サンプルコード", "url": "https://github.com/sosuke115/qlora_ja", "project_name": "qlora_ja", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-09-10 15:04:15", "latest_commit": "2024-07-13 10:01:12", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "A Japanese Parser (including historical Japanese)", "url": "https://github.com/komiya-lab/monaka", "project_name": "monaka", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2024-01-24 10:26:06", "latest_commit": "2024-07-16 14:16:00", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "日本の住所を都道府県/市区町村/町名/その他に分割するライブラリです", "url": "https://github.com/yuukitoriyama/japanese-address-parser", "project_name": "japanese-address-parser", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-11-17 23:16:38", "latest_commit": "2024-08-12 18:23:09", "languages": [ "Rust", "JavaScript" ], "model_or_dataset": null }, { "description": "MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", "url": "https://github.com/astremo/friendly_JA-Model", "project_name": "friendly_JA-Model", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2022-01-10 12:03:59", "latest_commit": "2022-05-22 14:42:46", "languages": [], "model_or_dataset": "model" }, { "description": "GUI for ChatGPT API For Japanese", "url": "https://github.com/gyokuro33/ChuanhuChatGPTJapanese", "project_name": "ChuanhuChatGPTJapanese", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-03-02 21:37:13", "latest_commit": "2023-03-07 14:10:10", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This Chrome extension can translate selected Japanese text to Hiragana by using ChatGPT.", "url": "https://github.com/franzwong/chrome-ext-translate-to-hiragana-with-chatgpt", "project_name": "chrome-ext-translate-to-hiragana-with-chatgpt", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-03-25 20:09:31", "latest_commit": "2023-04-01 16:05:53", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "ChatVRMはブラウザで簡単に3Dキャラクターと会話ができるデモアプリケーションです。", "url": "https://github.com/pixiv/chatvrm", "project_name": "chatvrm", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-04-28 17:25:29", "latest_commit": "2024-07-18 13:49:25", "languages": [ "TypeScript", "JavaScript" ], "model_or_dataset": null }, { "description": "A Chrome extention to replace the selected text softly", "url": "https://github.com/kmizu/sftly-replace", "project_name": "sftly-replace", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2023-05-04 20:51:22", "latest_commit": "2023-05-24 02:03:16", "languages": [ "JavaScript" ], "model_or_dataset": null }, { "description": "This repository contains source dictionary files to build dictionaries for JUMAN and Juman++.", "url": "https://github.com/ku-nlp/JumanDIC", "project_name": "JumanDIC", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2015-12-07 17:42:10", "latest_commit": "2022-08-18 19:01:36", "languages": [ "Python", "Ruby", "Perl" ], "model_or_dataset": "dataset" }, { "description": "うちなーぐち辞典(沖縄語辞典)", "url": "https://github.com/nanjakkun/uchinaaguchi_dict", "project_name": "uchinaaguchi_dict", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2024-03-12 06:53:05", "latest_commit": "2024-08-12 17:05:46", "languages": [ "Ruby", "TypeScript" ], "model_or_dataset": "dataset" }, { "description": "日本語の読みから Emoji に変換するための SKK 辞書 😂", "url": "https://github.com/ymrl/skk-jisyo.emoji-ja", "project_name": "skk-jisyo.emoji-ja", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2018-03-13 01:04:56", "latest_commit": "2018-03-13 02:01:32", "languages": [ "Ruby" ], "model_or_dataset": "dataset" }, { "description": "車両不具合情報に関するデータセット", "url": "https://github.com/rindybell/kokkosho_data", "project_name": "kokkosho_data", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2017-04-16 15:04:26", "latest_commit": "2019-07-09 23:36:27", "languages": [], "model_or_dataset": "dataset" }, { "description": "ISBN-13における日本語での出版物 (978-4-XXXXXXXXX) に関するデータ等", "url": "https://github.com/uribo/isbn4groups", "project_name": "isbn4groups", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2022-08-03 16:31:28", "latest_commit": "2024-06-25 14:11:40", "languages": [], "model_or_dataset": "dataset" }, { "description": "Repository for Japanese Document-level Relation Extraction Dataset (plan to be released in March).", "url": "https://github.com/youmima/jacred", "project_name": "jacred", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2024-01-11 16:26:25", "latest_commit": "2024-03-08 17:58:20", "languages": [], "model_or_dataset": "dataset" }, { "description": "JADES is a dataset for text simplification in Japanese, described in \"JADES: New Text Simplification Dataset in Japanese Targeted at Non-Native Speakers\" (the paper will be available soon).", "url": "https://github.com/naist-nlp/jades", "project_name": "jades", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2022-10-15 09:32:35", "latest_commit": "2022-12-13 15:57:29", "languages": [], "model_or_dataset": "dataset" }, { "description": "日本語情報検索チュートリアル", "url": "https://github.com/mpkato/japanese-ir-tutorial", "project_name": "japanese-ir-tutorial", "stargazers_count": 1, "source": "GitHub", "score": -0.3307745568473958, "first_commit": "2024-04-29 10:52:11", "latest_commit": "2024-06-05 18:56:44", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "数量表現や時間表現の抽出・正規化を行うNormalizeNumexpのPython実装", "url": "https://github.com/tkscode/pynormalizenumexp", "project_name": "pynormalizenumexp", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2021-10-11 21:02:26", "latest_commit": "2024-04-28 19:55:49", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "OCR system for recognizing modern Japanese magazines", "url": "https://github.com/ducanh841988/Kindai-OCR", "project_name": "Kindai-OCR", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2020-07-08 10:12:27", "latest_commit": "2023-07-12 12:14:52", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Training and evaluation scripts for JGLUE, a Japanese language understanding benchmark", "url": "https://github.com/nobu-g/JGLUE-benchmark", "project_name": "JGLUE-benchmark", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-03-18 01:19:37", "latest_commit": "2024-08-09 18:31:33", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Japanese Address Munger", "url": "https://github.com/alvations/yubin", "project_name": "yubin", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2019-10-28 07:11:54", "latest_commit": "2019-10-28 07:20:26", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Automatic Speech Recognition with deepspeech2 model in pytorch with support from Zakuro AI.", "url": "https://github.com/JeanMaximilienCadic/ASRDeepSpeech", "project_name": "ASRDeepSpeech", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2020-03-03 15:08:25", "latest_commit": "2022-09-26 00:11:29", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "部首文字正規化ライブラリ", "url": "https://github.com/yamamaya/radicalchar", "project_name": "radicalchar", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2022-11-29 19:17:38", "latest_commit": "2022-12-30 01:40:44", "languages": [ "Python", "C#", "JavaScript" ], "model_or_dataset": null }, { "description": "A Japanese text frontend processing toolkit", "url": "https://github.com/faruzan0820/natsume", "project_name": "natsume", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": null }, { "description": "Open source RAG with Llama Index for Japanese LLM in low resource settting", "url": "https://github.com/AkimParis/RAG-Japanese", "project_name": "RAG-Japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-11-14 14:06:31", "latest_commit": "2023-11-29 19:47:20", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "About", "url": "https://github.com/nobu-g/jglue-evaluation-scripts", "project_name": "jglue-evaluation-scripts", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-03-18 01:19:37", "latest_commit": "2024-08-09 18:31:33", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Open source RAG with Llama Index for Japanese LLM in low resource settting", "url": "https://github.com/akimfromparis/rag-japanese", "project_name": "rag-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-11-14 14:06:31", "latest_commit": "2023-11-29 19:47:20", "languages": [ "Jupyter Notebook" ], "model_or_dataset": null }, { "description": "連続部分文字列の単語判定を行います", "url": "https://github.com/toufu-24/substring-word-finder", "project_name": "substring-word-finder", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-05-08 01:40:50", "latest_commit": "2024-09-15 02:45:58", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "This project is a benchmarking tool for evaluating and comparing the performance of various Vision Language Models (VLMs). It uses two datasets: LLaVA-Bench-In-the-Wild and Japanese HERON Bench to measure model performance.", "url": "https://github.com/wandb/heron-vlm-leaderboard", "project_name": "heron-vlm-leaderboard", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-08-22 20:07:55", "latest_commit": "2024-09-29 02:35:38", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "Extend GNOME On-Screen Keyboard for Input Methods", "url": "https://github.com/esrille/oskim", "project_name": "oskim", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-02-24 15:08:36", "latest_commit": "2023-02-24 15:43:20", "languages": [ "JavaScript", "Python" ], "model_or_dataset": null }, { "description": "形態素解析器性能評価システム MevAL", "url": "https://github.com/teru-oka-1933/meval", "project_name": "meval", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2016-12-01 20:14:08", "latest_commit": "2019-08-14 08:37:48", "languages": [ "Java" ], "model_or_dataset": null }, { "description": "青空文庫全書籍のWord2Vecビルダー+構築済みモデル", "url": "https://github.com/eggplants/aovec", "project_name": "aovec", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2021-06-22 16:51:15", "latest_commit": "2023-02-01 02:27:34", "languages": [ "Python" ], "model_or_dataset": "model" }, { "description": "Official fine-tuning code for \"Emotion Analysis of Japanese Tweets and Comparison to Vaccinations in Japan\"", "url": "https://github.com/PatrickJohnRamos/BERT-Japan-vaccination", "project_name": "BERT-Japan-vaccination", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2022-04-05 16:46:38", "latest_commit": "2022-05-23 00:47:45", "languages": [ "Jupyter Notebook" ], "model_or_dataset": "model" }, { "description": "ChatGPTを使ってVRChat上でお喋り出来るようにするプログラム。", "url": "https://github.com/Yuchi-Games/VRChatGPT", "project_name": "VRChatGPT", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-03-21 19:53:54", "latest_commit": "2023-03-22 21:04:37", "languages": [ "Python" ], "model_or_dataset": null }, { "description": "ChatGPT の Prompt のサンプルです。", "url": "https://github.com/dahatake/chatgpt-prompt-sample-japanese", "project_name": "chatgpt-prompt-sample-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-04-13 14:22:50", "latest_commit": "2024-08-09 15:15:17", "languages": [], "model_or_dataset": null }, { "description": "Microsoft IMEなどで利用することを想定した、現状判明している全てのポケモンの名前を網羅した用語辞書です。", "url": "https://github.com/Umichang/pokemon-ime-dic", "project_name": "pokemon-ime-dic", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2020-01-10 23:13:19", "latest_commit": "2020-01-10 23:25:48", "languages": [], "model_or_dataset": "dataset" }, { "description": "Convert data from Japanese dictionary websites and applications into portable file formats", "url": "https://github.com/stephenmk/jitenbot", "project_name": "jitenbot", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese Input Method \"azooKey\" for Desktop, supporting macOS", "url": "https://github.com/ensan-hcl/azookey-desktop", "project_name": "azookey-desktop", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2021-09-07 21:50:12", "latest_commit": "2024-08-13 22:08:17", "languages": [ "Swift" ], "model_or_dataset": "dataset" }, { "description": "Kana-Kanji Conversion Module written in Swift", "url": "https://github.com/ensan-hcl/azookeykanakanjiconverter", "project_name": "azookeykanakanjiconverter", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-05-28 11:51:25", "latest_commit": "2024-08-11 00:53:16", "languages": [ "Swift" ], "model_or_dataset": "dataset" }, { "description": "openjtalk形式のユーザー辞書", "url": "https://github.com/warihima/kanayomi-dict", "project_name": "kanayomi-dict", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "漢字データベースの辞書関連データ", "url": "https://github.com/cjkvi/cjkvi-dict", "project_name": "cjkvi-dict", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2011-01-28 01:11:01", "latest_commit": "2017-09-20 21:50:09", "languages": [], "model_or_dataset": "dataset" }, { "description": "Parallel Universal Dependencies.", "url": "https://github.com/megagonlabs/UD_Japanese-PUD", "project_name": "UD_Japanese-PUD", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2017-05-23 10:31:45", "latest_commit": "2020-05-16 10:57:47", "languages": [], "model_or_dataset": "dataset" }, { "description": "禁止用語や単語レベルを考慮した日英対訳コーパスです。", "url": "https://github.com/marmooo/graded-enja-corpus", "project_name": "graded-enja-corpus", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2021-06-05 11:55:23", "latest_commit": "2023-03-14 00:24:51", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "日本語Wikipediaで使用される頻出単語のリスト", "url": "https://github.com/maeda6uiui-backup/WikipediaWordFrequencyList", "project_name": "WikipediaWordFrequencyList", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2022-04-17 16:35:32", "latest_commit": "2022-04-17 16:44:19", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "friendly_JA is a parallel Japanese-to-Japanese corpus aimed at making Japanese easier by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", "url": "https://github.com/astremo/friendly_JA-Corpus", "project_name": "friendly_JA-Corpus", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "COPA Dataset in Japanese", "url": "https://github.com/nlp-titech/copa-japanese", "project_name": "copa-japanese", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-01-13 09:04:08", "latest_commit": "2023-02-24 11:28:31", "languages": [], "model_or_dataset": "dataset" }, { "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) for huggingface datasets", "url": "https://github.com/shunk031/huggingface-datasets_CAMERA", "project_name": "huggingface-datasets_CAMERA", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-03-17 23:02:32", "latest_commit": "2023-03-17 23:49:35", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "FactCheckSentenceNLIデータセット", "url": "https://github.com/nlp-waseda/FactCheckSentenceNLI-FCSNLI-", "project_name": "FactCheckSentenceNLI-FCSNLI-", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2021-02-26 14:08:54", "latest_commit": "2021-03-03 11:15:47", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "EaST-MELD is an English-Japanese dataset for emotion-aware speech translation based on MELD.", "url": "https://github.com/ku-nlp/EaST-MELD", "project_name": "EaST-MELD", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2023-04-12 00:16:46", "latest_commit": "2023-06-23 11:09:20", "languages": [], "model_or_dataset": "dataset" }, { "description": "Construct large-scale Japanese audio corpus at home", "url": "https://github.com/reazon-research/reazonspeech", "project_name": "reazonspeech", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2022-10-19 10:08:01", "latest_commit": "2024-08-01 17:38:15", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "Dataset of UniMorph in Japanese", "url": "https://github.com/cl-tohoku/j-unimorph", "project_name": "j-unimorph", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2024-01-10 20:05:15", "latest_commit": "2024-05-12 20:42:38", "languages": [ "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "JMED-LLM: Japanese Medical Evaluation Dataset for Large Language Models", "url": "https://github.com/sociocom/jmed-llm", "project_name": "jmed-llm", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2024-07-11 20:01:36", "latest_commit": "2024-08-10 08:49:25", "languages": [ "Python" ], "model_or_dataset": "dataset" }, { "description": "Plain text format for Japanese law", "url": "https://github.com/yamachig/lawtext", "project_name": "lawtext", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2017-12-06 23:09:02", "latest_commit": "2024-08-10 23:37:12", "languages": [ "TypeScript", "JavaScript" ], "model_or_dataset": "dataset" }, { "description": "話題に基づく語義曖昧性解消評価セット", "url": "https://github.com/nut-jnlp/japanesetopicwsd", "project_name": "japanesetopicwsd", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2018-09-10 16:40:34", "latest_commit": "2018-09-27 13:45:10", "languages": [], "model_or_dataset": "dataset" }, { "description": "Jamp: Controlled Japanese Temporal Inference Dataset for Evaluating Generalization Capacity of Language Models", "url": "https://github.com/tomo-vv/temporalNLI_dataset", "project_name": "temporalNLI_dataset", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2022-05-13 20:55:04", "latest_commit": "2023-07-22 20:27:45", "languages": [], "model_or_dataset": "dataset" }, { "description": "e-Gov 法令検索で配布されている「全ての法令データ」を定期的にダウンロードし、アーカイブしています", "url": "https://github.com/kissge/elaws-history", "project_name": "elaws-history", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": null, "latest_commit": null, "languages": [], "model_or_dataset": "dataset" }, { "description": "Japanese-RP-BenchはLLMの日本語ロールプレイ能力を測定するためのベンチマークです。", "url": "https://github.com/Aratako/Japanese-RP-Bench", "project_name": "Japanese-RP-Bench", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2024-09-21 18:07:41", "latest_commit": "2024-09-29 14:57:44", "languages": [ "Python", "Jupyter Notebook" ], "model_or_dataset": "dataset" }, { "description": "環境構築手順とソースコード", "url": "https://github.com/hiroshi-matsuda-rit/nlp2024-tutorial-3", "project_name": "nlp2024-tutorial-3", "stargazers_count": 0, "source": "GitHub", "score": -0.33374520462304275, "first_commit": "2024-03-05 09:03:21", "latest_commit": "2024-04-02 14:38:06", "languages": [], "model_or_dataset": null } ]