Spaces:

kanhatakeyama
/

tanuki8x8bchat

Running

App Files Files Community

kanhatakeyama

misdelivery commited on Sep 1

Commit

d00978d

•

1 Parent(s): f5bd293

入出力の保存、応答の評価、データセットのアップロード機能を追加 (#1)

Browse files

- Update app.py (e9a3cc77b668e43928dda52e52c45b92ee5cba05)
- Update app.py (102675c1baee0e0c9c538b32adc079e5de4d56a9)

Co-authored-by: Kanta Hayashi <[email protected]>

Files changed (1) hide show

app.py +85 -21

app.py CHANGED Viewed

@@ -1,7 +1,13 @@
 import gradio as gr
-# from huggingface_hub import InferenceClient
 from openai import OpenAI
 import os
 openai_api_key = os.getenv('api_key')
 openai_api_base = os.getenv('url')
 model_name = "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0"
@@ -14,6 +20,54 @@ client = OpenAI(
     base_url=openai_api_base,
 )
 def respond(
     message,
@@ -35,8 +89,7 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_tokens=max_tokens,
@@ -44,24 +97,33 @@ def respond(
         temperature=temperature,
         top_p=top_p,
     ):
-        token = message.choices[0].delta.content
-        # response += token
-        if token is not None:
-            response += (token)
-        if response.find("### 指示:")>0:
-            response=response.replace("### 指示:","")
-            break
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 description = """
 ### [Tanuki-8x8B-dpo-v1.0](https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0)との会話(期間限定での公開)
-- 人工知能開発のため､原則として**このChatBotの入出力データは全て著作権フリー(CC0)で公開予定です**ので､ご注意ください｡著作物､個人情報､機密情報､誹謗中傷などのデータを入力しないでください｡
 - **上記の条件に同意する場合のみ**､以下のChatbotを利用してください｡
 """
@@ -71,8 +133,8 @@ FOOTER = """### 注意
 - コンテクスト長が4096までなので､あまり会話が長くなると､エラーで停止します｡ページを再読み込みしてください｡
 - GPUサーバー���不安定なので､応答しないことがあるかもしれません｡"""
 def run():
     chatbot = gr.Chatbot(
         elem_id="chatbot",
         scale=1,
@@ -82,7 +144,7 @@ def run():
     )
     with gr.Blocks(fill_height=True) as demo:
         gr.Markdown(HEADER)
-        gr.ChatInterface(
             fn=respond,
             stop_btn="Stop Generation",
             cache_examples=False,
@@ -92,9 +154,11 @@ def run():
                 label="Parameters", open=False, render=False
             ),
             additional_inputs=[
-                gr.Textbox(value="以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。",
-                           label="System message(試験用: 変えると出力が壊れる可能性)",
                                               render=False,),
                 gr.Slider(
                     minimum=1,
                     maximum=4096,
@@ -125,10 +189,10 @@ def run():
             ],
             analytics_enabled=False,
         )
         gr.Markdown(FOOTER)
-    demo.queue(max_size=256, api_open=False)
-    demo.launch(share=False, quiet=True)
 if __name__ == "__main__":
     run()

 import gradio as gr
 from openai import OpenAI
 import os
+import json
+from datetime import datetime
+from zoneinfo import ZoneInfo
+import uuid
+from pathlib import Path
+from huggingface_hub import CommitScheduler
 openai_api_key = os.getenv('api_key')
 openai_api_base = os.getenv('url')
 model_name = "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0"
     base_url=openai_api_base,
 )
+# Define the file where to save the data. Use UUID to make sure not to overwrite existing data from a previous run.
+feedback_file = Path("user_feedback/") / f"data_{uuid.uuid4()}.json"
+feedback_folder = feedback_file.parent
+# Schedule regular uploads. Remote repo and local folder are created if they don't already exist.
+scheduler = CommitScheduler(
+    repo_id="kanhatakeyama/TanukiChat",  # Replace with your actual repo ID
+    repo_type="dataset",
+    folder_path=feedback_folder,
+    path_in_repo="data",
+    every=1,  # Upload every 1 minutes
+)
+def save_or_update_conversation(conversation_id, message, response, message_index, liked=None):
+    """
+    Save or update conversation data in a JSON Lines file.
+    If the entry already exists (same id and message_index), update the 'label' field.
+    Otherwise, append a new entry.
+    """
+    with scheduler.lock:
+        # Read existing data
+        data = []
+        if feedback_file.exists():
+            with feedback_file.open("r") as f:
+                data = [json.loads(line) for line in f if line.strip()]
+        # Find if an entry with the same id and message_index exists
+        entry_index = next((i for i, entry in enumerate(data) if entry['id'] == conversation_id and entry['message_index'] == message_index), None)
+        if entry_index is not None:
+            # Update existing entry
+            data[entry_index]['label'] = liked
+        else:
+            # Append new entry
+            data.append({
+                "id": conversation_id,
+                "timestamp": datetime.now(ZoneInfo("Asia/Tokyo")).isoformat(),
+                "prompt": message,
+                "completion": response,
+                "message_index": message_index,
+                "label": liked
+            })
+        # Write updated data back to file
+        with feedback_file.open("w") as f:
+            for entry in data:
+                f.write(json.dumps(entry) + "\n")
 def respond(
     message,
     messages.append({"role": "user", "content": message})
     response = ""
+    for chunk in client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
     ):
+        if chunk.choices[0].delta.content is not None:
+            response += chunk.choices[0].delta.content
         yield response
+    # Save conversation after the full response is generated
+    message_index = len(history)
+    save_or_update_conversation(conversation_id, message, response, message_index)
+def vote(data: gr.LikeData, history, conversation_id):
+    """
+    Update user feedback (like/dislike) in the local file.
+    """
+    message_index = data.index[0]
+    liked = data.liked
+    save_or_update_conversation(conversation_id, None, None, message_index, liked)
+def create_conversation_id():
+    return str(uuid.uuid4())
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 description = """
 ### [Tanuki-8x8B-dpo-v1.0](https://huggingface.co/weblab-GENIAC/Tanuki-8x8B-dpo-v1.0)との会話(期間限定での公開)
+- 人工知能開発のため､原則として**このChatBotの入出力データは全て著作権フリー(CC0)で公開する**ため､ご注意ください｡著作物､個人情報､機密情報､誹謗中傷などのデータを入力しないでください｡
+- データセットはこちらで公開しています。　 https://huggingface.co/datasets/kanhatakeyama/TanukiChat
 - **上記の条件に同意する場合のみ**､以下のChatbotを利用してください｡
 """
 - コンテクスト長が4096までなので､あまり会話が長くなると､エラーで停止します｡ページを再読み込みしてください｡
 - GPUサーバー���不安定なので､応答しないことがあるかもしれません｡"""
 def run():
+    conversation_id = gr.State(create_conversation_id)
     chatbot = gr.Chatbot(
         elem_id="chatbot",
         scale=1,
     )
     with gr.Blocks(fill_height=True) as demo:
         gr.Markdown(HEADER)
+        chat_interface = gr.ChatInterface(
             fn=respond,
             stop_btn="Stop Generation",
             cache_examples=False,
                 label="Parameters", open=False, render=False
             ),
             additional_inputs=[
+                additional_inputs=[
+                    gr.Textbox(value="以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。",
+                           label="System message(試験用: 変えると性能が低下する可能性があります。)",
                                               render=False,),
+                conversation_id,
                 gr.Slider(
                     minimum=1,
                     maximum=4096,
             ],
             analytics_enabled=False,
         )
+        chatbot.like(vote, [chatbot, conversation_id], None)
         gr.Markdown(FOOTER)
+    demo.queue(max_size=256, api_open=True)
+    demo.launch(share=True, quiet=True)
 if __name__ == "__main__":
     run()