davda54 buzzcraft commited on
Commit
6623d6f
1 Parent(s): bc98a54

Added example to run on smaller GPUS (#1)

Browse files

- Added example to run on smaller GPUS (81517c5df8b8c846eae830f9935111aee3ec5608)


Co-authored-by: Aleksander Strand <[email protected]>

Files changed (1) hide show
  1. README.md +47 -0
README.md CHANGED
@@ -296,6 +296,53 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
296
  tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
297
  model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm").cuda().eval()
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  # Now we will define the zero-shot prompt template
300
  prompt = """Engelsk: {0}
301
  Bokmål:"""
 
296
  tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
297
  model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm").cuda().eval()
298
 
299
+ # Now we will define the zero-shot prompt template
300
+ prompt = """Engelsk: {0}
301
+ Bokmål:"""
302
+
303
+ # A function that will take care of generating the output
304
+ @torch.no_grad()
305
+ def generate(text):
306
+ text = prompt.format(text)
307
+ input_ids = tokenizer(text, return_tensors='pt').input_ids.cuda()
308
+ prediction = model.generate(
309
+ input_ids,
310
+ max_new_tokens=64,
311
+ do_sample=False,
312
+ eos_token_id=tokenizer('\n').input_ids
313
+ )
314
+ return tokenizer.decode(prediction[0, input_ids.size(1):]).strip()
315
+
316
+ # Now you can simply call the generate function with an English text you want to translate:
317
+ generate("I'm super excited about this Norwegian NORA model! Can it translate these sentences?")
318
+ # > this should output: 'Jeg er super spent på denne norske NORA modellen! Kan den oversette disse setningene?'
319
+ ```
320
+
321
+ _____
322
+ ## Example usage with low GPU usage
323
+ Install bitsandbytes if you want to load in 8bit
324
+
325
+ ```python
326
+ pip install bitsandbytes
327
+ pip install accelerate
328
+ ```
329
+
330
+
331
+ ```python
332
+ from transformers import AutoTokenizer, AutoModelForCausalLM
333
+ import torch
334
+
335
+ # First, we will have to import the tokenizer and the language model
336
+ tokenizer = AutoTokenizer.from_pretrained("norallm/normistral-7b-warm")
337
+ model = AutoModelForCausalLM.from_pretrained("norallm/normistral-7b-warm",
338
+ device_map='auto',
339
+ load_in_8bit=True,
340
+ torch_dtype=torch.float16)
341
+ # This setup needs about 8gb VRAM
342
+ # Setting load_in_8bit = False, 15gb VRAM
343
+ # Using torch.float32 and load_in_8bit = False, 21gb VRAM
344
+
345
+
346
  # Now we will define the zero-shot prompt template
347
  prompt = """Engelsk: {0}
348
  Bokmål:"""