File size: 4,506 Bytes
f277a4e c99dd0b 7e020ce f277a4e 5cd6a92 2fead36 5cd6a92 2fead36 7e020ce c99dd0b 5cd6a92 2fead36 d6cca84 2fead36 d6cca84 2fead36 d6cca84 2fead36 d6cca84 2fead36 d6cca84 2fead36 d6cca84 2fead36 5cd6a92 2fead36 d6cca84 2fead36 fc774f1 29f7911 c99dd0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
---
library_name: transformers
tags:
- biology
- chemistry
- biological materials
- materials science
- engineering
- materials informatics
- scientific AI
- AI4science
---
## Inference example
```
model_name='lamm-mit/BioinspiredLlama-3-1-8B-128k'
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
device_map="auto",
torch_dtype =torch.bfloat16,
attn_implementation="flash_attention_2"
)
model.config.use_cache = True
tokenizer = AutoTokenizer.from_pretrained(model_name)
```
Check https://huggingface.co/lamm-mit/BioinspiredLlama-3-1-8B-128k-dominant-protein-SS-structure for further examples use of the model for protein structural features prediction, including a fine-tuning script.
#### Function to interact with the model
```
def generate_response (text_input="What is spider silk?",
system_prompt='',
num_return_sequences=1,
temperature=1., #the higher the temperature, the more creative the model becomes
max_new_tokens=127,device='cuda',
add_special_tokens = False, #since tokenizer.apply_chat_template adds <|begin_of_text|> template already, set to False
num_beams=1,eos_token_id= [
128001,
128008,
128009
], verbatim=False,
top_k = 50,
top_p = 0.9,
repetition_penalty=1.1,
messages=[],
):
if messages==[]: #start new messages dictionary
if system_prompt != '': #include system prompt if provided
messages.extend ([ {"role": "system", "content": system_prompt}, ])
messages.extend ( [ {"role": "user", "content": text_input}, ])
else: #if messages provided, will extend (make sure to add previous response as assistant message)
messages.append ({"role": "user", "content": text_input})
text_input = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer([text_input], add_special_tokens = add_special_tokens, return_tensors ='pt' ).to(device)
if verbatim:
print (inputs)
with torch.no_grad():
outputs = model.generate(**inputs,
max_new_tokens=max_new_tokens,
temperature=temperature,
num_beams=num_beams,
top_k = top_k,eos_token_id=eos_token_id,
top_p =top_p,
num_return_sequences = num_return_sequences,
do_sample =True, repetition_penalty=repetition_penalty,
)
outputs=outputs[:, inputs["input_ids"].shape[1]:]
return tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True), messages
```
Usage:
```
res,_= generate_response (text_input = "What is collagen?", system_prompt = 'You are a materials scientist.',
num_return_sequences=1,
temperature=1., #the higher the temperature, the more creative the model becomes
max_new_tokens=127,
num_beams=1,
top_k = 50, top_p =0.9, repetition_penalty=1.1,
)
print (res[0])
```
To realize multi-turn interactions, see this example:
```
res, messages = generate_response (text_input="What is spider silk?", messages=[])
messages.append ({"role": "assistant", "content": res[0]}, ) #append result to messages dict
print (res)
res, messages = generate_response (text_input="Explain this result in detail.", messages=messages)
messages.append ({"role": "assistant", "content": res[0]}, ) #append result to messages dict
print (res)
res, messages = generate_response (text_input="Provide this in JSON format.", messages=messages)
messages.append ({"role": "assistant", "content": res[0]}) #append result to messages dict
print (res)
```
## Fine-tuning script
See (https://huggingface.co/lamm-mit/BioinspiredLlama-3-1-8B-128k-dominant-protein-SS-structure) for a Colab fine-tuning script. |