Spaces:
Runtime error
Runtime error
File size: 1,731 Bytes
c82eb8a 6cd98fb c82eb8a 6cd98fb c82eb8a 6cd98fb c82eb8a 6cd98fb c82eb8a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
from pathlib import Path
from dataclasses import dataclass, asdict
from ctransformers import AutoModelForCausalLM, AutoConfig
@dataclass
class GenerationConfig:
temperature: float
top_k: int
top_p: float
repetition_penalty: float
max_new_tokens: int
reset: bool
stream: bool
threads: int
stop: list[str]
def format_prompt(user_prompt: str):
return f"""### Instruction:
{user_prompt}
### Response:"""
def generate(llm: AutoModelForCausalLM,
generation_config: GenerationConfig,
prompt: str):
return llm(format_prompt(prompt), **asdict(generation_config))
def generate_code(prompt, model_name, max_tokens, temperature):
from_local = False
model_path = model_name
config_path = model_name
if from_local:
config_folder = model_name.split("/")[0]
config_path = os.path.abspath(f"models/{config_folder}")
model_path = os.path.abspath(f"models/{model_name}.bin")
config = AutoConfig.from_pretrained(
config_path,
)
llm = AutoModelForCausalLM.from_pretrained(
model_path,
model_type="replit",
config=config,
)
generation_config = GenerationConfig(
temperature=temperature,
top_k=50,
top_p=0.9,
repetition_penalty=1.0,
max_new_tokens=max_tokens, # adjust as needed
reset=True, # reset history (cache)
stream=True, # streaming per word/token
threads=os.cpu_count(), # adjust for your CPU
stop=["<|endoftext|>"],
)
generator = generate(llm, generation_config, prompt)
output = ""
for word in generator:
print(word)
output += word
return output
|