linglingdan commited on
Commit
a44f087
1 Parent(s): 1e50f4b

Upload run_language_modeling_task.py

Browse files
Files changed (1) hide show
  1. run_language_modeling_task.py +83 -0
run_language_modeling_task.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from argparse import ArgumentParser
2
+
3
+ import datasets
4
+ import torch
5
+ from transformers import AutoTokenizer
6
+
7
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
8
+ from auto_gptq.eval_tasks import LanguageModelingTask
9
+
10
+
11
+ DATASET = "/root/ld/ld_project/AutoGPTQ/examples/quantization/dataset/raw_datasets"
12
+ WITH_INPUT_TEMPLATE = "Instruction:\n{instruction}\n\nInput:\n{input}\n\nOutput:\n"
13
+ WITHOUT_INPUT_TEMPLATE = "<s><用户>{instruction}<AI>Output:"
14
+
15
+
16
+ def ds_refactor_fn(samples):
17
+ instruction_data = samples["instruction"]
18
+ input_data = samples["input"]
19
+ output_data = samples["output"]
20
+
21
+ new_samples = {"prompt": [], "output": []}
22
+ for instruction_txt, input_txt, output_txt in zip(instruction_data, input_data, output_data):
23
+ if input_txt:
24
+ prompt = WITH_INPUT_TEMPLATE.format(instruction=instruction_txt, input=input_txt)
25
+ else:
26
+ prompt = WITHOUT_INPUT_TEMPLATE.format(instruction=instruction_txt)
27
+ new_samples["prompt"].append(prompt)
28
+ new_samples["output"].append(output_txt)
29
+
30
+ return new_samples
31
+
32
+
33
+ def main():
34
+ parser = ArgumentParser()
35
+ parser.add_argument("--base_model_dir", type=str,default='/root/ld/ld_model_pretrained/miniCPM-bf16')
36
+ parser.add_argument("--quantized_model_dir", type=str,default='/root/ld/ld_project/AutoGPTQ/examples/quantization/minicpm_2b_4bit')
37
+ parser.add_argument(
38
+ "--num_samples",
39
+ type=int,
40
+ default=200,
41
+ help="how many samples will be sampled to evaluation",
42
+ )
43
+ parser.add_argument("--sample_max_len", type=int, default=1024, help="max tokens for each sample")
44
+ parser.add_argument("--block_max_len", type=int, default=2048, help="max tokens for each data block")
45
+ parser.add_argument("--use_triton", action="store_true")
46
+ args = parser.parse_args()
47
+
48
+ tokenizer = AutoTokenizer.from_pretrained(args.base_model_dir)
49
+
50
+ model = AutoGPTQForCausalLM.from_pretrained(args.base_model_dir, BaseQuantizeConfig(),trust_remote_code=True)
51
+ model.to("cuda:0")
52
+
53
+ task = LanguageModelingTask(
54
+ model=model,
55
+ tokenizer=tokenizer,
56
+ data_name_or_path=DATASET,
57
+ prompt_col_name="prompt",
58
+ label_col_name="output",
59
+
60
+ **{
61
+ "num_samples": args.num_samples, # how many samples will be sampled to evaluation
62
+ "sample_max_len": args.sample_max_len, # max tokens for each sample
63
+ "block_max_len": args.block_max_len, # max tokens for each data block
64
+ "load_fn": datasets.load_dataset, # function to load dataset
65
+ "preprocess_fn": ds_refactor_fn, # function to preprocess dataset
66
+ "truncate_prompt": False, # truncate label when sample's length exceed sample_max_len
67
+ },
68
+ )
69
+
70
+ print(f"eval result for base model: {task.run()}")
71
+ task.model = None
72
+ model.cpu()
73
+ del model
74
+ torch.cuda.empty_cache()
75
+
76
+ model = AutoGPTQForCausalLM.from_quantized(args.quantized_model_dir, device="cuda:0", use_triton=args.use_triton,trust_remote_code=True)
77
+ task.model = model
78
+ task.device = model.device
79
+ print(f"eval result for quantized model: {task.run()}")
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()