Uhhy commited on
Commit
14c6d65
β€’
1 Parent(s): f31f86e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -14
app.py CHANGED
@@ -5,7 +5,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
8
- from difflib import SequenceMatcher
9
  import re
10
  import spaces
11
 
@@ -44,30 +43,28 @@ class ModelManager:
44
  self.models = []
45
  self.loaded = False
46
 
 
47
  def load_model(self, model_config):
48
  print(f"Cargando modelo: {model_config['name']}...")
49
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
50
 
 
51
  def load_all_models(self):
52
  if self.loaded:
53
- print("Modelos ya estΓ‘n cargados. No es necesario volver a cargarlos.")
54
  return self.models
55
 
56
- print("Iniciando carga de modelos...")
57
  with ThreadPoolExecutor() as executor:
58
  futures = [executor.submit(self.load_model, config) for config in model_configs]
59
  models = []
60
- for future in tqdm(as_completed(futures), total=len(model_configs), desc="Cargando modelos", unit="modelo"):
61
  try:
62
  model = future.result()
63
  models.append(model)
64
- print(f"Modelo cargado exitosamente: {model['name']}")
65
  except Exception as e:
66
- print(f"Error al cargar el modelo: {e}")
67
 
68
  self.models = models
69
  self.loaded = True
70
- print("Todos los modelos han sido cargados.")
71
  return self.models
72
 
73
  model_manager = ModelManager()
@@ -118,7 +115,6 @@ def remove_repetitive_responses(responses):
118
  return unique_responses
119
 
120
  def select_best_response(responses):
121
- print("Filtrando respuestas...")
122
  responses = remove_repetitive_responses(responses)
123
  responses = [remove_duplicates(response['response']) for response in responses]
124
  unique_responses = list(dict.fromkeys(responses))
@@ -130,8 +126,6 @@ async def generate_chat(request: ChatRequest):
130
  if not request.message.strip():
131
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
132
 
133
- print(f"Procesando solicitud: {request.message}")
134
-
135
  responses = []
136
  num_models = len(global_data['models'])
137
 
@@ -142,14 +136,12 @@ async def generate_chat(request: ChatRequest):
142
  response = future.result()
143
  responses.append(response)
144
  except Exception as exc:
145
- print(f"Error en la generaciΓ³n de respuesta: {exc}")
146
 
147
  if not responses:
148
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
149
 
150
  best_response = select_best_response(responses)
151
-
152
- print(f"Mejor respuesta seleccionada: {best_response}")
153
 
154
  return {
155
  "best_response": best_response,
@@ -157,4 +149,4 @@ async def generate_chat(request: ChatRequest):
157
  }
158
 
159
  if __name__ == "__main__":
160
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
  from tqdm import tqdm
6
  import uvicorn
7
  from dotenv import load_dotenv
 
8
  import re
9
  import spaces
10
 
 
43
  self.models = []
44
  self.loaded = False
45
 
46
+ @spaces.GPU(duration=0)
47
  def load_model(self, model_config):
48
  print(f"Cargando modelo: {model_config['name']}...")
49
  return {"model": Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename']), "name": model_config['name']}
50
 
51
+ @spaces.GPU(duration=0)
52
  def load_all_models(self):
53
  if self.loaded:
 
54
  return self.models
55
 
 
56
  with ThreadPoolExecutor() as executor:
57
  futures = [executor.submit(self.load_model, config) for config in model_configs]
58
  models = []
59
+ for future in as_completed(futures):
60
  try:
61
  model = future.result()
62
  models.append(model)
 
63
  except Exception as e:
64
+ pass
65
 
66
  self.models = models
67
  self.loaded = True
 
68
  return self.models
69
 
70
  model_manager = ModelManager()
 
115
  return unique_responses
116
 
117
  def select_best_response(responses):
 
118
  responses = remove_repetitive_responses(responses)
119
  responses = [remove_duplicates(response['response']) for response in responses]
120
  unique_responses = list(dict.fromkeys(responses))
 
126
  if not request.message.strip():
127
  raise HTTPException(status_code=400, detail="The message cannot be empty.")
128
 
 
 
129
  responses = []
130
  num_models = len(global_data['models'])
131
 
 
136
  response = future.result()
137
  responses.append(response)
138
  except Exception as exc:
139
+ pass
140
 
141
  if not responses:
142
  raise HTTPException(status_code=500, detail="Error: No se generaron respuestas.")
143
 
144
  best_response = select_best_response(responses)
 
 
145
 
146
  return {
147
  "best_response": best_response,
 
149
  }
150
 
151
  if __name__ == "__main__":
152
+ uvicorn.run(app, host="0.0.0.0", port=8000)