Spaces:

chichomitko
/

sysprompt

Running on Zero

App Files Files Community

Ventsislav Muchinov commited on 29 days ago

Commit

df914b7

•

1 Parent(s): 1f2e6c9

Upload app.py

Browse files

Files changed (1) hide show

app.py +19 -34

app.py CHANGED Viewed

@@ -12,30 +12,30 @@ DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
-model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
-model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    device_map="auto",
-    trust_remote_code=True,
-    token=ACCESS_TOKEN)
-tokenizer = AutoTokenizer.from_pretrained(
-    model_id,
-    trust_remote_code=True,
-    token=ACCESS_TOKEN)
-tokenizer.use_default_system_prompt = False
 @spaces.GPU
 def generate(
     message: str,
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.01,
     top_p: float = 0.01,
-    top_k: int = 50,
-    repetition_penalty: float = 1.0,
 ) -> Iterator[str]:
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
@@ -60,10 +60,8 @@ def generate(
         eos_token_id=terminators,
         do_sample=True,
         top_p=top_p,
-        top_k=top_k,
         temperature=temperature,
         num_beams=1,
-        repetition_penalty=repetition_penalty,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
@@ -77,6 +75,7 @@ def generate(
 chat_interface = gr.Interface(
     fn=generate,
     inputs=[
         gr.Textbox(lines=2, placeholder="Prompt", label="Prompt"),
     ],
     outputs="text",
@@ -102,24 +101,10 @@ chat_interface = gr.Interface(
             maximum=1.0,
             step=0.01,
             value=0.01,
-        ),
-        gr.Slider(
-            label="Top-k",
-            minimum=1,
-            maximum=1000,
-            step=1,
-            value=50,
-        ),
-        gr.Slider(
-            label="Repetition penalty",
-            minimum=1.0,
-            maximum=2.0,
-            step=0.05,
-            value=1.0,
-        ),
     ],
     title="Model testing",
     description="Provide system settings and a prompt to interact with the model.",
 )
-chat_interface.queue(max_size=20).launch(share = True)

 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 ACCESS_TOKEN = os.getenv("HF_TOKEN", "")
 @spaces.GPU
 def generate(
+    model: str,
     message: str,
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.01,
     top_p: float = 0.01,
 ) -> Iterator[str]:
+    model_id = model
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16,
+        device_map="auto",
+        trust_remote_code=True,
+        token=ACCESS_TOKEN)
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_id,
+        trust_remote_code=True,
+        token=ACCESS_TOKEN)
+    tokenizer.use_default_system_prompt = False
     conversation = []
     if system_prompt:
         conversation.append({"role": "system", "content": system_prompt})
         eos_token_id=terminators,
         do_sample=True,
         top_p=top_p,
         temperature=temperature,
         num_beams=1,
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
 chat_interface = gr.Interface(
     fn=generate,
     inputs=[
+        gr.Textbox(lines=1, placeholder="Model", label="Model name"),
         gr.Textbox(lines=2, placeholder="Prompt", label="Prompt"),
     ],
     outputs="text",
             maximum=1.0,
             step=0.01,
             value=0.01,
+        ),
     ],
     title="Model testing",
     description="Provide system settings and a prompt to interact with the model.",
 )
+chat_interface.queue(max_size=20).launch()