Uhhy commited on
Commit
3eeafd2
1 Parent(s): 0692f71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -29
app.py CHANGED
@@ -1,12 +1,10 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
- import uvicorn
6
  import re
 
7
  from spaces import GPU
8
-
9
- app = FastAPI()
10
 
11
  global_data = {
12
  'models': {},
@@ -44,6 +42,7 @@ model_configs = [
44
  {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"}
45
  ]
46
 
 
47
  class ModelManager:
48
  def __init__(self):
49
  self.models = {}
@@ -83,7 +82,7 @@ def remove_duplicates(text):
83
  seen_lines.add(line)
84
  return '\n'.join(unique_lines)
85
 
86
- @GPU(duration=0)
87
  def generate_model_response(model, inputs):
88
  try:
89
  response = model(inputs)
@@ -92,30 +91,36 @@ def generate_model_response(model, inputs):
92
  print(f"Error generating model response: {e}")
93
  return ""
94
 
95
- @app.post("/generate")
96
- async def generate(request: ChatRequest):
97
- try:
98
- inputs = normalize_input(request.message)
99
- with ThreadPoolExecutor() as executor:
100
- futures = [
101
- executor.submit(generate_model_response, model, inputs)
102
- for model in global_data['models'].values()
103
- ]
104
- responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
105
- unique_responses = remove_repetitive_responses(responses)
106
- return unique_responses
107
- except Exception as e:
108
- print(f"Error generating responses: {e}")
109
- raise HTTPException(status_code=500, detail="Error generating responses")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- @app.middleware("http")
112
- async def process_request(request: Request, call_next):
113
- try:
114
- response = await call_next(request)
115
- return response
116
- except Exception as e:
117
- print(f"Request error: {e}")
118
- raise HTTPException(status_code=500, detail="Internal Server Error")
119
 
120
  def remove_repetitive_responses(responses):
121
  unique_responses = {}
@@ -125,4 +130,5 @@ def remove_repetitive_responses(responses):
125
  return unique_responses
126
 
127
  if __name__ == "__main__":
128
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
1
  from pydantic import BaseModel
2
  from llama_cpp import Llama
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
4
  import re
5
+ import httpx
6
  from spaces import GPU
7
+ import asyncio
 
8
 
9
  global_data = {
10
  'models': {},
 
42
  {"repo_id": "Ffftdtd5dtft/Mistral-Nemo-Instruct-2407-Q2_K-GGUF", "filename": "mistral-nemo-instruct-2407-q2_k.gguf", "name": "Mistral Nemo Instruct 2407"}
43
  ]
44
 
45
+
46
  class ModelManager:
47
  def __init__(self):
48
  self.models = {}
 
82
  seen_lines.add(line)
83
  return '\n'.join(unique_lines)
84
 
85
+ @GPU(duration=0)
86
  def generate_model_response(model, inputs):
87
  try:
88
  response = model(inputs)
 
91
  print(f"Error generating model response: {e}")
92
  return ""
93
 
94
+ async def handle_request(request):
95
+ if request.method == "POST" and request.url.path == "/generate":
96
+ try:
97
+ chat_request = ChatRequest(**request.json())
98
+ inputs = normalize_input(chat_request.message)
99
+ with ThreadPoolExecutor() as executor:
100
+ futures = [
101
+ executor.submit(generate_model_response, model, inputs)
102
+ for model in global_data['models'].values()
103
+ ]
104
+ responses = [{'model': model_name, 'response': future.result()} for model_name, future in zip(global_data['models'].keys(), as_completed(futures))]
105
+ unique_responses = remove_repetitive_responses(responses)
106
+ return httpx.Response(status_code=200, json=unique_responses)
107
+ except Exception as e:
108
+ print(f"Error handling request: {e}")
109
+ return httpx.Response(status_code=500, json={"error": f"Error handling request: {e}"})
110
+
111
+ else:
112
+ return httpx.Response(status_code=404, text="Not Found")
113
+
114
+
115
+ async def run_server(port: int):
116
+ async with httpx.AsyncClient(base_url=f"http://localhost:{port}") as client:
117
+ while True:
118
+ request = await client.get("/") # You might need to adjust this based on your expected requests
119
+ response = await handle_request(request)
120
+ print(f"Received request: {request}")
121
+ print(f"Sending response: {response}")
122
+ await asyncio.sleep(1) # Adjust the sleep duration as needed
123
 
 
 
 
 
 
 
 
 
124
 
125
  def remove_repetitive_responses(responses):
126
  unique_responses = {}
 
130
  return unique_responses
131
 
132
  if __name__ == "__main__":
133
+ port = 7860
134
+ asyncio.run(run_server(port))