erorr
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaConfig
Load the configuration
config = LlamaConfig.from_pretrained("ISTA-DASLab/Llama-3.2-1B-AQLM-PV-2Bit-2x8")
Adjust the rope_scaling to match the expected format
config.rope_scaling = {
"type": "dynamic", # Define the type appropriately
"factor": 32.0 # Retain the factor value if it applies
}
Load the model with the adjusted configuration
quantized_model = AutoModelForCausalLM.from_pretrained(
"ISTA-DASLab/Llama-3.2-1B-AQLM-PV-2Bit-2x8",
config=config,
torch_dtype="float16",
device_map="auto",
low_cpu_mem_usage=True,
)
Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Llama-3.2-1B-AQLM-PV-2Bit-2x8")
print("Model and tokenizer loaded successfully.")
ValueError Traceback (most recent call last)
in <cell line: 4>()
2
3 # Load the configuration
----> 4 config = LlamaConfig.from_pretrained("ISTA-DASLab/Llama-3.2-1B-AQLM-PV-2Bit-2x8")
5
6 # Adjust the rope_scaling to match the expected format
3 frames
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/configuration_llama.py in _rope_scaling_validation(self)
178
179 if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
--> 180 raise ValueError(
181 "rope_scaling
must be a dictionary with with two fields, type
and factor
, "
182 f"got {self.rope_scaling}"
ValueError: rope_scaling
must be a dictionary with with two fields, type
and factor
, got {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}
/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:797: FutureWarning: resume_download
is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use force_download=True
.
warnings.warn(
/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning:
The secret HF_TOKEN
does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
warnings.warn(
ValueError Traceback (most recent call last)
in <cell line: 3>()
1 from transformers import AutoTokenizer, AutoModelForCausalLM
2
----> 3 quantized_model = AutoModelForCausalLM.from_pretrained(
4 "ISTA-DASLab/Llama-3.2-1B-AQLM-PV-2Bit-2x8",
5 torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
4 frames
/usr/local/lib/python3.10/dist-packages/transformers/models/llama/configuration_llama.py in _rope_scaling_validation(self)
178
179 if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
--> 180 raise ValueError(
181 "rope_scaling
must be a dictionary with with two fields, type
and factor
, "
182 f"got {self.rope_scaling}"
ValueError: rope_scaling
must be a dictionary with with two fields, type
and factor
, got {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}
ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16
run
but
Llama-3.2-1B-AQLM-PV-2Bit-2x8
not run
run in colab t4
from transformers import AutoTokenizer, AutoModelForCausalLM
quantized_model = AutoModelForCausalLM.from_pretrained(
"ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16",
torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
)
tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16")
%%time
output = quantized_model.generate(tokenizer("The relationship between humans and AI ", return_tensors="pt")["input_ids"].cuda(), min_new_tokens=128, max_new_tokens=128)
print(tokenizer.decode(output[0]))
import json
import textwrap
system_prompt = "A chat between a curious user and an blog writing assistant. "
def get_prompt(human_prompt):
prompt_template=f"{system_prompt}\n\nUSER: {human_prompt} \nASSISTANT: "
return prompt_template
def remove_human_text(text):
return text.split('USER:', 1)[0]
def parse_text(data):
for item in data:
text = item['generated_text']
assistant_text_index = text.find('ASSISTANT:')
if assistant_text_index != -1:
assistant_text = text[assistant_text_index+len('ASSISTANT:'):].strip()
assistant_text = remove_human_text(assistant_text)
wrapped_text = textwrap.fill(assistant_text, width=100)
print("#####", wrapped_text)
# return assistant_text
from transformers import GenerationConfig, pipeline
pipe = pipeline(
"text-generation",
model=quantized_model,
tokenizer=tokenizer,
max_length=1200,
temperature=0.7,
top_p=0.95,
do_sample=True,
)
prompt = '''Write a short and engaging blog post of travelling in Bohol Island.
'''
raw_output = pipe(get_prompt(prompt))
parse_text(raw_output)
1m
from transformers import GenerationConfig, pipeline
pipe = pipeline(
"text-generation",
model=quantized_model,
tokenizer=tokenizer,
max_length=1200,
temperature=0.7,
Setting pad_token_id
to eos_token_id
:128001 for open-end generation.
Here an more the an the Angeles T States Angeles the� the areas� States as� that that men:// more
one important M and the an and G R important States the the an the president an of that on P by
States of a-old a more the� and as States America on on on-old also the president Angeles American
the more and an more the more the on:// important in ago of more P men the and://:// R B is later
more leaders more R of leaders B an is more the States was States that the majority://-old:// a-old
Arabia Carolina on in States and:// in more the is all G or more Arabia States T a� later in States
in States and States an on Arabia Angeles men and Carolina B also more is of a later:// the that
Arabia the P that president-old Angeles M and in by Arabia States and of in a on:// ago:// more and�
president and a a a more B leaders is more� a in and-old later:// and P and in://:// a in R.util or
important that R R a the American Carolina Angeles://:// was M a also more areas Carolina Arabia and
T also also the areas and:// Angeles T an in P B a on B more a and the://:// Angeles R a�:// on P
later was president:// hundred more of majority R B an the a later B R was America on also more is-
old of is the a States of areas Angeles Arabia’s more that a H by P the the States States or on a T
in G majority in later on B later more also more on the a-old or important Angeles-old the more-old
Angeles majority States and R or more men-old the the in R important later ago Carolina-old States
and and that in� the men leaders as the is an that the later majority States D a president a.util
America more States G more president more:// men an American G States on� a a more R all:// on later
the-old:// that Arabia B� a the:// a a or-old president R that States that the more leaders American
the and all and also more one America-old an more men States American T in in States more:// Angeles
B States the-old M H more a later by�� an men areas of all and more it-old on Arabia.util that T a
the-old:// the more Angeles Angeles more more America R on ago the a.util Angeles� R Angeles
majority an in a-old H an a men the also.util the more the� a States on by� all areas the the the H
the important M ago men-old more later on an American:// Arabia Angeles the later Arabia States that
later also was:// president the and-old that America the as P a and Arabia president:// is Angeles
or was leaders on on and important.util also T areas more� R of States an G://://�� in H R a in://
leaders American in the and or a more and of more G of the in:// the the more B M R and:// men an D
B more and Angeles Angeles a-old American the the-old P a P an is the in in ago American M on on
America://:// States Angeles later Angeles Angeles more at important of States more and the all the
M D:// a men the and M-old:// Angeles in more and-old a in in and:// leaders and was Arabia Minister
B the Arabia and more leaders States Arabia the the also the� a and a the Angeles American G of R D
or and:// a the the� of States an R T it more later:// R and more a more in a:// States also the T
is more president an on M more more R more in B Angeles:// States-old areas and more men later the a
T more the M T other B T majority of was of more a on the the� more the R more a in in American
leaders is R more the://� president:// America on the Angeles://:// on more the� and on P on P the B
a:// G R in majority more that that majority the a on also America by the more the a an important M
an Angeles America Angeles in the a a T-old important American more an P majority also://-old that
the more the on that important more was more M�� is also-old G an is:// P the Angeles other other
the of in and an more more more H of Arabia-old president a majority the a more men the B is is
areas as in a America States://-old-old later://://://.util is other as G more the president Arabia-
old B on the also in more the the-old leaders more and:// Arabia States a on States an was a G
president States more P the of on more the the was G:// later R that T the� more president leaders
by the the on the that also more:// Angeles the States majority� in Carolina-old the and majority
president is men P in a-old:// more� a a P important that the the of://-old the and:// important and
Arabia States men a as more the and all majority.util more of the more more more the:// majority-old
more ago the the an in a or the:// ago States Arabia States president B president that and that-old
more the and the the-old Arabia Angeles an more the-old a the or Arabia Angeles or on also ago
Angeles H in in the America in more M G a all of a also Carolina States is majority:// B and
Arabia:// America important Angeles:// an H an on G more a:// Angeles Angeles American:// States
and:// Angeles Angeles more the in the that B more more� an more more-old� is a-old R the president
a D the more the the all� R other a that States States on a of in on in that Angeles Angeles the
and-old a� men B States on States more the Angeles Francisco majority a
from transformers import AutoTokenizer, AutoModelForCausalLM
quantized_model = AutoModelForCausalLM.from_pretrained(
"ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16",
torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True, force_download=True,
)
tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3-8B-Instruct-AQLM-2Bit-1x16")
quantized_model.generation_config.pad_token_id = quantized_model.generation_config.eos_token_id
%%time
output = quantized_model.generate(tokenizer("The relationship between humans and AI ", return_tensors="pt")["input_ids"].cuda(), min_new_tokens=128, max_new_tokens=128)
print(tokenizer.decode(output[0]))
<|begin_of_text|>The relationship between humans and AI Thedef solve49. Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question
CPU times: user 9.53 s, sys: 175 ms, total: 9.71 s
Wall time: 9.8 s
how fix?
%%time
output = quantized_model.generate(
tokenizer("The relationship between humans and AI ", return_tensors="pt")["input_ids"].cuda(),
min_new_tokens=128,
max_new_tokens=128,
temperature=0.3, # تخفيض درجة الحرارة
do_sample=True, # تخفيض top_p
repetition_penalty=1.2 # زيادة معاقبة التكرار
)
print(tokenizer.decode(output[0]))
<|begin_of_text|>The relationship between humans and AI Thedef solve49. Ã0
president Is (…Question: approve4251Question:
Ã3 def solve56,RNA polymerxinomn =def create aRN Ã5 the Ã4Question: hearigans not def sorter todef solve89%9 ( ( (implify |def find. Ã7
wasdef my. Ã8…Question: realizeuta-Ranges4 (.
States - Ã2Question:;amp5116 ( ( ( ( ( ( ( ( ( ( ( ( ( ( ( What IWhat’sهماذاriel isDesigned withdef make06000 Let ( ( ( (
CPU times: user 11.4 s, sys: 153 ms, total: 11.6 s
Wall time: 12.7 s
%%time
output = quantized_model.generate(tokenizer("The relationship between humans and AI ", return_tensors="pt")["input_ids"].cuda(), min_new_tokens=128, max_new_tokens=128)
print(tokenizer.decode(output[0]))
<|begin_of_text|>The relationship between humans and AI Thedef solve49. Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question: Ã0Question
CPU times: user 9.53 s, sys: 175 ms, total: 9.71 s
Wall time: 9.8 s
What is the solution to the problem of bad text output?