norallm
/

normistral-7b-warm

@@ -296,6 +296,53 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
 model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm").cuda().eval()
 # Now we will define the zero-shot prompt template
 prompt = """Engelsk: {0}
 Bokmål:"""

 tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
 model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm").cuda().eval()
+# Now we will define the zero-shot prompt template
+prompt = """Engelsk: {0}
+Bokmål:"""
+# A function that will take care of generating the output
+@torch.no_grad()
+def generate(text):
+    text = prompt.format(text)
+    input_ids = tokenizer(text, return_tensors='pt').input_ids.cuda()
+    prediction = model.generate(
+        input_ids,
+        max_new_tokens=64,
+        do_sample=False,
+        eos_token_id=tokenizer('\n').input_ids
+    )
+    return tokenizer.decode(prediction[0, input_ids.size(1):]).strip()
+# Now you can simply call the generate function with an English text you want to translate:
+generate("I'm super excited about this Norwegian NORA model! Can it translate these sentences?")
+# > this should output: 'Jeg er super spent på denne norske NORA modellen! Kan den oversette disse setningene?'
+```
+_____
+## Example usage with low GPU usage
+Install bitsandbytes if you want to load in 8bit
+```python
+pip install bitsandbytes
+pip install accelerate
+```
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# First, we will have to import the tokenizer and the language model
+tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
+model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm",
+                                                device_map='auto',
+                                                load_in_8bit=True,
+                                                torch_dtype=torch.float16)
+# This setup needs about 8gb VRAM
+# Setting load_in_8bit = False, 15gb VRAM
+# Using torch.float32 and load_in_8bit = False, 21gb VRAM
 # Now we will define the zero-shot prompt template
 prompt = """Engelsk: {0}
 Bokmål:"""