Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
from dataclasses import dataclass, asdict | |
from ctransformers import AutoModelForCausalLM, AutoConfig | |
class GenerationConfig: | |
temperature: float | |
top_k: int | |
top_p: float | |
repetition_penalty: float | |
max_new_tokens: int | |
reset: bool | |
stream: bool | |
threads: int | |
stop: list[str] | |
def format_prompt(user_prompt: str): | |
return f"""### Instruction: | |
{user_prompt} | |
### Response:""" | |
def generate(llm: AutoModelForCausalLM, | |
generation_config: GenerationConfig, | |
prompt: str): | |
return llm(format_prompt(prompt), **asdict(generation_config)) | |
def generate_code(prompt, model_name, max_tokens, temperature): | |
from_local = False | |
model_path = model_name | |
config_path = model_name | |
if from_local: | |
config_folder = model_name.split("/")[0] | |
config_path = os.path.abspath(f"models/{config_folder}") | |
model_path = os.path.abspath(f"models/{model_name}.bin") | |
config = AutoConfig.from_pretrained( | |
config_path, | |
) | |
llm = AutoModelForCausalLM.from_pretrained( | |
model_path, | |
model_type="replit", | |
config=config, | |
) | |
generation_config = GenerationConfig( | |
temperature=temperature, | |
top_k=50, | |
top_p=0.9, | |
repetition_penalty=1.0, | |
max_new_tokens=max_tokens, # adjust as needed | |
reset=True, # reset history (cache) | |
stream=True, # streaming per word/token | |
threads=os.cpu_count(), # adjust for your CPU | |
stop=["<|endoftext|>"], | |
) | |
generator = generate(llm, generation_config, prompt) | |
output = "" | |
for word in generator: | |
print(word) | |
output += word | |
return output | |