michaelfeil
/

ct2fast-open-llama-7b-open-instruct

@@ -21,7 +21,7 @@ pip install hf-hub-ctranslate2>=2.0.8 ctranslate2>=3.16.0
 ```
 Converted on 2023-06-15 using
 ```
-ct2-transformers-converter --model VMware/open-llama-7b-open-instruct --output_dir /home/michael/tmp-ct2fast-open-llama-7b-open-instruct --force --copy_files README.md tokenizer_config.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
 ```
 Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
@@ -30,17 +30,17 @@ and [hf-hub-ctranslate2>=2.0.8](https://github.com/michaelfeil/hf-hub-ctranslate
 - `compute_type=int8`  for `device="cpu"`
 ```python
-from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
 from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-open-llama-7b-open-instruct"
-# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
 model = GeneratorCT2fromHfHub(
         # load in int8 on CUDA
         model_name_or_path=model_name,
         device="cuda",
         compute_type="int8_float16",
-        # tokenizer=AutoTokenizer.from_pretrained("VMware/open-llama-7b-open-instruct")
 )
 outputs = model.generate(
     text=["def fibonnaci(", "User: How are you doing? Bot:"],

 ```
 Converted on 2023-06-15 using
 ```
+ct2-transformers-converter --model VMware/open-llama-7b-open-instruct --output_dir ~/tmp-ct2fast-open-llama-7b-open-instruct --force --copy_files README.md tokenizer_config.json generation_config.json special_tokens_map.json .gitattributes --quantization int8_float16 --trust_remote_code
 ```
 Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
 - `compute_type=int8`  for `device="cpu"`
 ```python
 from transformers import AutoTokenizer
 model_name = "michaelfeil/ct2fast-open-llama-7b-open-instruct"
+from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
 model = GeneratorCT2fromHfHub(
         # load in int8 on CUDA
         model_name_or_path=model_name,
         device="cuda",
         compute_type="int8_float16",
+        # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
 )
 outputs = model.generate(
     text=["def fibonnaci(", "User: How are you doing? Bot:"],