Tonic commited on
Commit
b3be2a9
β€’
1 Parent(s): 3aba01d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -3,6 +3,8 @@ import torch
3
  import torch.nn.functional as F
4
  from torch import Tensor
5
  from transformers import AutoTokenizer, AutoModel
 
 
6
  import gradio as gr
7
  import os
8
 
@@ -33,6 +35,12 @@ tasks = {
33
  'TRECCOVID': 'Given a query on COVID-19, retrieve documents that answer the query',
34
  }
35
 
 
 
 
 
 
 
36
  tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
37
  model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
38
 
@@ -56,7 +64,24 @@ def load_corpus_from_json(file_path):
56
  with open(file_path, 'r') as file:
57
  data = json.load(file)
58
  return data
59
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  @spaces.GPU
61
  def compute_embeddings(selected_task, input_text):
62
  try:
@@ -160,8 +185,11 @@ def format_response(embeddings):
160
  }
161
 
162
  def generate_and_format_embeddings(selected_task, input_text):
163
- embeddings = compute_embeddings(selected_task, input_text)
164
- return format_response(embeddings)
 
 
 
165
 
166
  def app_interface():
167
  corpus_sentences = []
 
3
  import torch.nn.functional as F
4
  from torch import Tensor
5
  from transformers import AutoTokenizer, AutoModel
6
+ import threading
7
+ import queue
8
  import gradio as gr
9
  import os
10
 
 
35
  'TRECCOVID': 'Given a query on COVID-19, retrieve documents that answer the query',
36
  }
37
 
38
+
39
+ # Global queue for embedding requests
40
+ embedding_request_queue = queue.Queue()
41
+ embedding_response_queue = queue.Queue()
42
+
43
+
44
  tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-mistral-7b-instruct')
45
  model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
46
 
 
64
  with open(file_path, 'r') as file:
65
  data = json.load(file)
66
  return data
67
+
68
+
69
+ def embedding_worker():
70
+ while True:
71
+ # Wait for an item in the queue
72
+ item = embedding_request_queue.get()
73
+ if item is None:
74
+ break
75
+ selected_task, input_text = item
76
+ embeddings = compute_embeddings(selected_task, input_text)
77
+ formatted_response = format_response(embeddings)
78
+
79
+ embedding_response_queue.put(formatted_response)
80
+ embedding_request_queue.task_done()
81
+
82
+ threading.Thread(target=embedding_worker, daemon=True).start()
83
+
84
+
85
  @spaces.GPU
86
  def compute_embeddings(selected_task, input_text):
87
  try:
 
185
  }
186
 
187
  def generate_and_format_embeddings(selected_task, input_text):
188
+ embedding_request_queue.put((selected_task, input_text))
189
+ response = embedding_response_queue.get()
190
+ embedding_response_queue.task_done()
191
+ return response
192
+
193
 
194
  def app_interface():
195
  corpus_sentences = []