xiaoheiqaq commited on
Commit
f18e470
1 Parent(s): e615317

initial commit

Browse files
Files changed (2) hide show
  1. .gitignore +3 -0
  2. app.py +87 -59
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ .env
3
+ prompt.txt
app.py CHANGED
@@ -1,63 +1,91 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
  import gradio as gr
2
+ import os
3
+ import requests
4
+ import dotenv
5
+ dotenv.load_dotenv()
6
+ from openai import OpenAI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ model = os.getenv('MODEL')
9
+ api_key = os.getenv('API_KEY')
10
+ username = os.getenv('USERNAME')
11
+ password = os.getenv('PASSWORD')
12
+ system_prompt_text = os.getenv('SYSTEM_PROMPT')
13
+
14
+ client = OpenAI(api_key=api_key)
15
+
16
+ def predict(message, history, max_new_tokens, top_k, top_p, temperature):
17
+ history_openai_format = []
18
+ global system_prompt_text, model
19
+ history_openai_format.append({"role": "system", "content": system_prompt_text })
20
+ for human, assistant in history:
21
+ history_openai_format.append({"role": "user", "content": f"instruction: {human}" })
22
+ history_openai_format.append({"role": "assistant", "content":assistant})
23
+ history_openai_format.append({"role": "user", "content": f"instruction: {message}"})
24
+
25
+ print(history_openai_format)
26
+ response = client.chat.completions.create(model=model,
27
+ messages= history_openai_format,
28
+ top_p=top_p,
29
+ temperature=temperature,
30
+ stream=True)
31
+
32
+ partial_message = ""
33
+ for chunk in response:
34
+ if chunk.choices[0].delta.content is not None:
35
+ partial_message = partial_message + chunk.choices[0].delta.content
36
+ yield partial_message
37
+
38
+
39
+
40
+ def update_system_prompt(new_content):
41
+ global system_prompt_text
42
+ system_prompt_text = new_content
43
+
44
+ with gr.Blocks(fill_height=True) as demo:
45
+ max_new_tokens_slider = gr.Slider(
46
+ minimum=1, maximum=500, value=50, step=1,
47
+ label="Max New Tokens (The maximum number of tokens to generate in the response. This limits the length of the generated text.)",
48
+ render=False
49
+ )
50
+
51
+ top_k_slider = gr.Slider(
52
+ minimum=0, maximum=100, value=50, step=1,
53
+ label="Top K (The number of highest probability vocabulary tokens to keep for top-k filtering. This controls the diversity of the generated text by limiting the number of token options at each step.)",
54
+ render=False
55
+ )
56
+
57
+ top_p_slider = gr.Slider(
58
+ minimum=0.0, maximum=1.0, value=1.0, step=0.01,
59
+ label="Top P (The cumulative probability threshold for nucleus sampling. This controls the diversity of the generated text by sampling tokens from the smallest possible set whose cumulative probability is above the threshold.)",
60
+ render=False
61
+ )
62
+
63
+ temperature_slider = gr.Slider(
64
+ minimum=0.0, maximum=2.0, value=0.9, step=0.01,
65
+ label="Temperature (The sampling temperature to use. This controls the randomness of predictions by scaling the logits before applying softmax. Lower values make the model more deterministic, while higher values increase diversity.)",
66
+ render=False
67
+ )
68
+
69
+ gr.ChatInterface(
70
+ predict,
71
+ cache_examples=False,
72
+ additional_inputs=[max_new_tokens_slider, top_k_slider, top_p_slider, temperature_slider],
73
+ examples=[ ["I'm in a bad mood.", None, None, None, None],
74
+ ["Do you have any hobbies or interests outside of work?", None, None, None, None],
75
+ ["Who created you?", None, None, None, None],
76
+ ["Please introduce yourself.", None, None, None, None],
77
+ ["Do you have any plans for the future?", None, None, None, None],
78
+ ["Does Emi play the piano?", None, None, None, None],
79
+ ["Can you feel pain?", None, None, None, None],
80
+ ["Do you feel like AI?", None, None, None, None],
81
+ ["Can you work 24/7?", None, None, None, None],
82
+ ["Do you ever update?", None, None, None, None]]
83
+ )
84
+
85
+ system_prompt = gr.Textbox(value=system_prompt_text, info="System Message:", placeholder="你是Emi",
86
+ interactive=True, lines=5)
87
+ system_prompt.change(
88
+ fn=update_system_prompt, inputs=system_prompt)
89
 
90
  if __name__ == "__main__":
91
+ demo.launch(auth=(username, password))