pankajmathur
/

orca_alpaca_3b

@@ -52,38 +52,41 @@ tokenizer = LlamaTokenizer.from_pretrained(model_path)
 model = LlamaForCausalLM.from_pretrained(
     model_path, torch_dtype=torch.float16, device_map='auto',
 )
-# check more details here https://github.com/openlm-research/open_llama
-tokenizer.bos_token_id, tokenizer.eos_token_id = 1,2
-# same prompt as provided by Orca Research Paper
-system = 'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.'
-instruction = 'Use the given data to calculate the median.'
-input = '[7, 3, 8, 2, 10]'
-prompt_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
-#prompt_no_input = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Response:\n"
-tokens = tokenizer.encode(prompt_no_input)
-tokens = torch.LongTensor(tokens).unsqueeze(0)
-tokens = tokens.to('cuda')
-instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'generate_len': 1024}
-# instance = {'input_ids': tokens,'top_k': 50, 'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024}
-with torch.no_grad():
-    rest = model.generate(
             input_ids=tokens,
             max_length=length+instance['generate_len'],
             use_cache=True,
             do_sample=True,
-            top_p=instance['top_p'],
-            top_k=instance['top_k'],
-            # temperature=instance['temperature']
-        )
-output = rest[0][length:]
-string = tokenizer.decode(output, skip_special_tokens=True)
-print(f'[!] Response: {string}')
 ```
@@ -93,3 +96,19 @@ Next Goals:
 3) Try better GPU for training, couldn't get 8xA100 (40GB), I guess they are in hot demand now.
 4) Provide more options for Text generation UI. (may be https://github.com/oobabooga/text-generation-webui)
 6) Provide 4bit GGML/GPTQ quantized model (may be [TheBloke](https://huggingface.co/TheBloke) can help here)

 model = LlamaForCausalLM.from_pretrained(
     model_path, torch_dtype=torch.float16, device_map='auto',
 )
+#generate text function
+def generate_text(system, instruction, input=None):
+    if input:
+        prompt = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+    else:
+        prompt = f"### System:\n{system}\n\n#\n\n### User:\n{instruction}\n\n### Response:\n"
+    tokens = tokenizer.encode(prompt)
+    tokens = torch.LongTensor(tokens).unsqueeze(0)
+    tokens = tokens.to('cuda')
+    instance = {'input_ids': tokens,'top_p': 1.0, 'temperature':0.7, 'generate_len': 1024}
+    length = len(tokens[0])
+    with torch.no_grad():
+        rest = model.generate(
             input_ids=tokens,
             max_length=length+instance['generate_len'],
             use_cache=True,
             do_sample=True,
+            top_p=instance['top_p'],
+            temperature=instance['temperature']
+        )
+    output = rest[0][length:]
+    string = tokenizer.decode(output, skip_special_tokens=True)
+    print(f'[!] Response: {string}')
+# same prompt as provided by Orca Research Paper
+system = 'You are an AI assistant. User will you give you a task. Your goal is to complete the task as faithfully as you can. While performing the task think step-by-step and justify your steps.'
+instruction = 'Use the given data to calculate the median.'
+input = '[7, 3, 8, 2, 10]'
+generate_text(system, instruction, input)
 ```
 3) Try better GPU for training, couldn't get 8xA100 (40GB), I guess they are in hot demand now.
 4) Provide more options for Text generation UI. (may be https://github.com/oobabooga/text-generation-webui)
 6) Provide 4bit GGML/GPTQ quantized model (may be [TheBloke](https://huggingface.co/TheBloke) can help here)
+**P.S. I am #opentowork and #collaboration, please reach out to me at [email protected]**
+Reference:
+If you found [alpaca_orca_open_llama_3b](psmathur/alpaca_orca_open_llama_3b) useful in your research or applications, please kindly cite using the following BibTeX:
+@misc{alpaca_orca_open_llama_3b,
+  author = {Pankaj Mathur},
+  title = {alpaca_orca_open_llama_3b: A custom explain tuned Alpaca Model Based On OpenLLaMA},
+  year = {2023},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/pankajarm/alpaca_orca_open_llama_3b}},
+}