AIDXteam commited on
Commit
ca800c4
β€’
1 Parent(s): 00fc7e4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -10
README.md CHANGED
@@ -61,13 +61,52 @@ tags:
61
 
62
  # ❺ μ‚¬μš© 방법
63
  <pre><code>
64
- from transformers import AutoModel, AutoTokenizer
 
 
 
 
 
 
 
 
65
 
66
- tokenizer = AutoTokenizer.from_pretrained("")
67
- model = AutoModel.from_pretrained("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- inputs = tokenizer("μ•ˆλ…•ν•˜μ„Έμš”", return_tensors="pt")
70
- outputs = model(**inputs)
71
  </code></pre>
72
 
73
  ## βœ… ktdsλŠ” openchat 외에도 LlaMA, Polyglot, EEVE λ“± λŒ€ν‘œμ μΈ LLM에 λ‹€μ–‘ν•œ μ˜μ—­μ˜ ν•œκ΅­μ˜ 문화와 지식을 νŒŒμΈνŠœλ‹ν•œ LLM을 μ œκ³΅ν•  μ˜ˆμ •μž…λ‹ˆλ‹€.
@@ -130,13 +169,51 @@ may produce biased responses if trained on biased data.
130
 
131
  # ❺ Usage Instructions
132
  <pre><code>
133
- from transformers import AutoModel, AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- tokenizer = AutoTokenizer.from_pretrained("")
136
- model = AutoModel.from_pretrained("")
137
 
138
- inputs = tokenizer("Hello?", return_tensors="pt")
139
- outputs = model(**inputs)
140
  </code></pre>
141
 
142
  ## KTDS plans to provide fine-tuned LLMs (Large Language Models) across various domains of Korean culture and knowledge,
 
61
 
62
  # ❺ μ‚¬μš© 방법
63
  <pre><code>
64
+ import os
65
+ import os.path as osp
66
+ import sys
67
+ import fire
68
+ import json
69
+ from typing import List, Union
70
+ import pandas as pd
71
+ import torch
72
+ from torch.nn import functional as F
73
 
74
+ import transformers
75
+ from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl, BitsAndBytesConfig
76
+ from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
77
+ from transformers import LlamaForCausalLM, LlamaTokenizer
78
+ from transformers import AutoModelForCausalLM, AutoTokenizer
79
+
80
+ from datasets import load_dataset
81
+
82
+ from peft import (
83
+ LoraConfig,
84
+ get_peft_model,
85
+ set_peft_model_state_dict
86
+ )
87
+ from peft import PeftModel
88
+ import re
89
+ import ast
90
+
91
+ device = 'auto' #@param {type: "string"}
92
+ model = '' #@param {type: "string"}
93
+ model = AutoModelForCausalLM.from_pretrained(
94
+ model,
95
+ quantization_config=bnb_config,
96
+ #load_in_4bit=True, # Quantization Load
97
+ device_map=device)
98
+
99
+ tokenizer = AutoTokenizer.from_pretrained(base_LLM_model)
100
+
101
+ input_text = "μ•ˆλ…•ν•˜μ„Έμš”."
102
+ inputs = tokenizer(input_text, return_tensors="pt")
103
+ inputs = inputs.to("cuda:0")
104
+
105
+ with torch.no_grad():
106
+ outputs = model.generate(**inputs, max_length=1024)
107
+
108
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
109
 
 
 
110
  </code></pre>
111
 
112
  ## βœ… ktdsλŠ” openchat 외에도 LlaMA, Polyglot, EEVE λ“± λŒ€ν‘œμ μΈ LLM에 λ‹€μ–‘ν•œ μ˜μ—­μ˜ ν•œκ΅­μ˜ 문화와 지식을 νŒŒμΈνŠœλ‹ν•œ LLM을 μ œκ³΅ν•  μ˜ˆμ •μž…λ‹ˆλ‹€.
 
169
 
170
  # ❺ Usage Instructions
171
  <pre><code>
172
+ import os
173
+ import os.path as osp
174
+ import sys
175
+ import fire
176
+ import json
177
+ from typing import List, Union
178
+ import pandas as pd
179
+ import torch
180
+ from torch.nn import functional as F
181
+
182
+ import transformers
183
+ from transformers import TrainerCallback, TrainingArguments, TrainerState, TrainerControl, BitsAndBytesConfig
184
+ from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
185
+ from transformers import LlamaForCausalLM, LlamaTokenizer
186
+ from transformers import AutoModelForCausalLM, AutoTokenizer
187
+
188
+ from datasets import load_dataset
189
+
190
+ from peft import (
191
+ LoraConfig,
192
+ get_peft_model,
193
+ set_peft_model_state_dict
194
+ )
195
+ from peft import PeftModel
196
+ import re
197
+ import ast
198
+
199
+ device = 'auto' #@param {type: "string"}
200
+ model = '' #@param {type: "string"}
201
+ model = AutoModelForCausalLM.from_pretrained(
202
+ model,
203
+ quantization_config=bnb_config,
204
+ #load_in_4bit=True, # Quantization Load
205
+ device_map=device)
206
+
207
+ tokenizer = AutoTokenizer.from_pretrained(base_LLM_model)
208
+
209
+ input_text = "μ•ˆλ…•ν•˜μ„Έμš”."
210
+ inputs = tokenizer(input_text, return_tensors="pt")
211
+ inputs = inputs.to("cuda:0")
212
 
213
+ with torch.no_grad():
214
+ outputs = model.generate(**inputs, max_length=1024)
215
 
216
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
217
  </code></pre>
218
 
219
  ## KTDS plans to provide fine-tuned LLMs (Large Language Models) across various domains of Korean culture and knowledge,