macadeliccc commited on
Commit
2a74d95
1 Parent(s): 97061bc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +20 -61
README.md CHANGED
@@ -51,87 +51,46 @@ Please give ideas and a detailed plan about how to assemble and train an army of
51
  Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
52
 
53
  ```python
54
- # Import necessary libraries
55
- from transformers import AutoTokenizer, AutoModelForCausalLM
56
 
57
- # Load tokenizer and model
58
- tokenizer = AutoTokenizer.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
59
- model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
60
-
61
- # Define a function to generate responses with adjustable hyperparameters
62
- def generate_response(messages, max_length=50, num_return_sequences=1, temperature=1.0, top_k=50, top_p=1.0):
63
  """
64
- Generate a response from the model based on the input chat messages and hyperparameters.
65
 
66
  Args:
67
- messages (list): List of message dictionaries with 'role' and 'content'.
68
- max_length (int): Maximum length of the model's response.
69
- num_return_sequences (int): Number of response sequences to generate.
70
- temperature (float): Sampling temperature for model generation.
71
- top_k (int): The number of highest probability vocabulary tokens to keep for top-k filtering.
72
- top_p (float): If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
73
 
74
  Returns:
75
  str: The generated response from the model.
76
  """
77
- # Apply chat template to input messages
78
- gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
79
 
80
- # Generate a response
81
- output = model.generate(**gen_input,
82
- max_length=max_length,
83
- num_return_sequences=num_return_sequences,
84
- temperature=temperature,
85
- top_k=top_k,
86
- top_p=top_p)
87
 
88
  # Decode the generated tokens to a string
89
- response = tokenizer.decode(output[0], skip_special_tokens=True)
90
-
91
  return response
92
 
93
- # Example chat messages
94
- messages = [
95
- {"role": "system", "content": "You are Dolphin, an AI assistant."},
96
- {"role": "user", "content": "Write a quicksort algorithm in python"}
97
- ]
 
98
 
99
- # Generate and print the response
100
- response = generate_response(messages, max_length=100, temperature=0.8)
101
- print("Response:\n", response)
102
  ```
103
 
104
  [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example
105
 
106
  ## Eval
107
 
108
- **Full Precision**
109
-
110
- | Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
111
- |----------|-------|------|-----:|--------|-----:|---|-----:|
112
- |arc_easy |Yaml |none | 0|acc |0.8413|± |0.0075|
113
- | | |none | 0|acc_norm|0.8056|± |0.0081|
114
- |boolq |Yaml |none | 0|acc |0.8694|± |0.0059|
115
- |hellaswag |Yaml |none | 0|acc |0.6484|± |0.0048|
116
- | | |none | 0|acc_norm|0.8354|± |0.0037|
117
- |openbookqa|Yaml |none | 0|acc |0.3500|± |0.0214|
118
- | | |none | 0|acc_norm|0.4660|± |0.0223|
119
- |piqa |Yaml |none | 0|acc |0.8210|± |0.0089|
120
- | | |none | 0|acc_norm|0.8303|± |0.0088|
121
- |winogrande|Yaml |none | 0|acc |0.7577|± |0.0120|
122
-
123
- **4-bit (bnb)**
124
-
125
- | Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
126
- |----------|-------|------|-----:|--------|-----:|---|-----:|
127
- |boolq |Yaml |none | 0|acc |0.8700|± |0.0059|
128
- |hellaswag |Yaml |none | 0|acc |0.6356|± |0.0048|
129
- | | |none | 0|acc_norm|0.8270|± |0.0038|
130
- |openbookqa|Yaml |none | 0|acc |0.3320|± |0.0211|
131
- | | |none | 0|acc_norm|0.4620|± |0.0223|
132
- |piqa |Yaml |none | 0|acc |0.8123|± |0.0091|
133
- | | |none | 0|acc_norm|0.8259|± |0.0088|
134
- |winogrande|Yaml |none | 0|acc |0.7490|± |0.0122|
135
 
136
 
137
  evaluation [colab](https://colab.research.google.com/drive/1FpwgsGzCR4tORTxAwUxpN3PcP22En2xk?usp=sharing)
 
51
  Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
52
 
53
  ```python
54
+ from transformers import AutoModelForCausalLM, AutoTokenizer
 
55
 
56
+ def generate_response(prompt):
 
 
 
 
 
57
  """
58
+ Generate a response from the model based on the input prompt.
59
 
60
  Args:
61
+ prompt (str): Prompt for the model.
 
 
 
 
 
62
 
63
  Returns:
64
  str: The generated response from the model.
65
  """
66
+ # Tokenize the input prompt
67
+ inputs = tokenizer(prompt, return_tensors="pt")
68
 
69
+ # Generate output tokens
70
+ outputs = model.generate(**inputs, max_new_tokens=256, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
 
 
 
 
 
71
 
72
  # Decode the generated tokens to a string
73
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
+
75
  return response
76
 
77
+ # Load the model and tokenizer
78
+ model_id = "macadeliccc/piccolo-2x7b"
79
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
80
+ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
81
+
82
+ prompt = "Write a quicksort algorithm in python"
83
 
84
+ # Generate and print responses for each language
85
+ print("Response:")
86
+ print(generate_response(prompt), "\n")
87
  ```
88
 
89
  [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example
90
 
91
  ## Eval
92
 
93
+ TODO
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
 
96
  evaluation [colab](https://colab.research.google.com/drive/1FpwgsGzCR4tORTxAwUxpN3PcP22En2xk?usp=sharing)