macadeliccc commited on
Commit
1ed3a5f
1 Parent(s): 8242848

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -15
README.md CHANGED
@@ -46,29 +46,54 @@ Please give ideas and a detailed plan about how to assemble and train an army of
46
  Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
47
 
48
  ```python
 
49
  from transformers import AutoTokenizer, AutoModelForCausalLM
50
 
51
  # Load tokenizer and model
52
  tokenizer = AutoTokenizer.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
53
  model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
54
- # model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo", load_in_4bit=True)
55
- # Define the chat messages
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  messages = [
57
- {"role": "system", "content": "You are Dolphin, an AI assistant"},
58
- {"role": "user", "content": "Hello, who are you?"}
59
  ]
60
 
61
- # Apply chat template to input messages
62
- gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
63
-
64
- # Generate a response
65
- output = model.generate(**gen_input)
66
-
67
- # Decode the generated tokens to a string
68
- response = tokenizer.decode(output[0], skip_special_tokens=True)
69
-
70
- # Print the response
71
- print("Response:", response)
72
  ```
73
 
74
  [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example
 
46
  Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
47
 
48
  ```python
49
+ # Import necessary libraries
50
  from transformers import AutoTokenizer, AutoModelForCausalLM
51
 
52
  # Load tokenizer and model
53
  tokenizer = AutoTokenizer.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
54
  model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
55
+
56
+ # Define a function to generate responses with adjustable hyperparameters
57
+ def generate_response(messages, max_length=50, num_return_sequences=1, temperature=1.0, top_k=50, top_p=1.0):
58
+ """
59
+ Generate a response from the model based on the input chat messages and hyperparameters.
60
+
61
+ Args:
62
+ messages (list): List of message dictionaries with 'role' and 'content'.
63
+ max_length (int): Maximum length of the model's response.
64
+ num_return_sequences (int): Number of response sequences to generate.
65
+ temperature (float): Sampling temperature for model generation.
66
+ top_k (int): The number of highest probability vocabulary tokens to keep for top-k filtering.
67
+ top_p (float): If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
68
+
69
+ Returns:
70
+ str: The generated response from the model.
71
+ """
72
+ # Apply chat template to input messages
73
+ gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
74
+
75
+ # Generate a response
76
+ output = model.generate(**gen_input,
77
+ max_length=max_length,
78
+ num_return_sequences=num_return_sequences,
79
+ temperature=temperature,
80
+ top_k=top_k,
81
+ top_p=top_p)
82
+
83
+ # Decode the generated tokens to a string
84
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
85
+
86
+ return response
87
+
88
+ # Example chat messages
89
  messages = [
90
+ {"role": "system", "content": "You are Dolphin, an AI assistant."},
91
+ {"role": "user", "content": "Write a quicksort algorithm in python"}
92
  ]
93
 
94
+ # Generate and print the response
95
+ response = generate_response(messages, max_length=100, temperature=0.8)
96
+ print("Response:\n", response)
 
 
 
 
 
 
 
 
97
  ```
98
 
99
  [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example