Spaces:

tastypear
/

sia-chat-adapter

Sleeping

tastypear commited on Aug 10

Commit

d456b01

•

1 Parent(s): 7313605

support gemma

Files changed (1) hide show

main.py CHANGED Viewed

@@ -48,6 +48,14 @@ def proxy():
     model = json_data['model']
     chat_api = f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions"
     # Try to use the largest ctx
     if not 'max_tokens' in json_data:
         json_data['max_tokens'] = 2**32-1
@@ -59,7 +67,7 @@ def proxy():
             inputs = int(info.split("Given: ")[1].split("`")[0])
             json_data['max_tokens'] = max_ctx - inputs - 1
         except Exception as e:
-            print(e)
     if not 'seed' in json_data:
         json_data['seed'] = random.randint(1,2**32)

     model = json_data['model']
     chat_api = f"https://api-inference.huggingface.co/models/{model}/v1/chat/completions"
+    # gemma does not support system prompt
+    # add system prompt before user message
+    if model.startswith('google/gemma') and json_data["messages"][0]['role']=='system':
+            system_prompt = json_data["messages"][0]['content']
+            first_user_content = json_data["messages"][1]['content']
+            json_data["messages"][1]['content'] = f'System: {system_prompt}\n\n---\n\n{first_user_content}'
+            json_data["messages"] = json_data["messages"][1:]
     # Try to use the largest ctx
     if not 'max_tokens' in json_data:
         json_data['max_tokens'] = 2**32-1
             inputs = int(info.split("Given: ")[1].split("`")[0])
             json_data['max_tokens'] = max_ctx - inputs - 1
         except Exception as e:
+            print(info)
     if not 'seed' in json_data:
         json_data['seed'] = random.randint(1,2**32)