Spaces:
Build error
Build error
Upload 16 files (#4)
Browse files- Upload 16 files (dfc16db4714fd788e32c96d47a067edfc4a6b547)
- app.py +11 -1
- requirements.txt +1 -0
- utils/__pycache__/__init__.cpython-38.pyc +0 -0
- utils/__pycache__/entity_extraction.cpython-38.pyc +0 -0
- utils/__pycache__/models.cpython-38.pyc +0 -0
- utils/__pycache__/prompts.cpython-38.pyc +0 -0
- utils/__pycache__/retriever.cpython-38.pyc +0 -0
- utils/__pycache__/transcript_retrieval.cpython-38.pyc +0 -0
- utils/__pycache__/vector_index.cpython-38.pyc +0 -0
- utils/models.py +8 -0
app.py
CHANGED
@@ -24,6 +24,7 @@ from utils.models import (
|
|
24 |
get_flan_t5_model,
|
25 |
get_mpnet_embedding_model,
|
26 |
get_sgpt_embedding_model,
|
|
|
27 |
get_spacy_model,
|
28 |
get_splade_sparse_embedding_model,
|
29 |
get_t5_model,
|
@@ -247,7 +248,7 @@ with st.sidebar:
|
|
247 |
|
248 |
# Choose encoder model
|
249 |
|
250 |
-
encoder_models_choice = ["MPNET", "SGPT", "Hybrid MPNET - SPLADE"]
|
251 |
with st.sidebar:
|
252 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
253 |
|
@@ -281,6 +282,15 @@ elif encoder_model == "SGPT":
|
|
281 |
pinecone_index = pinecone.Index(pinecone_index_name)
|
282 |
retriever_model = get_sgpt_embedding_model()
|
283 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
elif encoder_model == "Hybrid MPNET - SPLADE":
|
285 |
pinecone.init(
|
286 |
api_key=st.secrets["pinecone_hybrid_splade_mpnet"],
|
|
|
24 |
get_flan_t5_model,
|
25 |
get_mpnet_embedding_model,
|
26 |
get_sgpt_embedding_model,
|
27 |
+
get_instructor_embedding_model,
|
28 |
get_spacy_model,
|
29 |
get_splade_sparse_embedding_model,
|
30 |
get_t5_model,
|
|
|
248 |
|
249 |
# Choose encoder model
|
250 |
|
251 |
+
encoder_models_choice = ["MPNET", "Instructor", "SGPT", "Hybrid MPNET - SPLADE"]
|
252 |
with st.sidebar:
|
253 |
encoder_model = st.selectbox("Select Encoder Model", encoder_models_choice)
|
254 |
|
|
|
282 |
pinecone_index = pinecone.Index(pinecone_index_name)
|
283 |
retriever_model = get_sgpt_embedding_model()
|
284 |
|
285 |
+
elif encoder_model == "Instructor":
|
286 |
+
# Connect to pinecone environment
|
287 |
+
pinecone.init(
|
288 |
+
api_key=st.secrets["pinecone_instructor"], environment="us-west4-gcp-free"
|
289 |
+
)
|
290 |
+
pinecone_index_name = "week13-instructor-xl"
|
291 |
+
pinecone_index = pinecone.Index(pinecone_index_name)
|
292 |
+
retriever_model = get_instructor_embedding_model()
|
293 |
+
|
294 |
elif encoder_model == "Hybrid MPNET - SPLADE":
|
295 |
pinecone.init(
|
296 |
api_key=st.secrets["pinecone_hybrid_splade_mpnet"],
|
requirements.txt
CHANGED
@@ -11,3 +11,4 @@ transformers
|
|
11 |
streamlit
|
12 |
streamlit-scrollable-textbox
|
13 |
openai
|
|
|
|
11 |
streamlit
|
12 |
streamlit-scrollable-textbox
|
13 |
openai
|
14 |
+
InstructorEmbedding
|
utils/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (181 Bytes). View file
|
|
utils/__pycache__/entity_extraction.cpython-38.pyc
ADDED
Binary file (4.04 kB). View file
|
|
utils/__pycache__/models.cpython-38.pyc
ADDED
Binary file (4.28 kB). View file
|
|
utils/__pycache__/prompts.cpython-38.pyc
ADDED
Binary file (16.1 kB). View file
|
|
utils/__pycache__/retriever.cpython-38.pyc
ADDED
Binary file (4.27 kB). View file
|
|
utils/__pycache__/transcript_retrieval.cpython-38.pyc
ADDED
Binary file (658 Bytes). View file
|
|
utils/__pycache__/vector_index.cpython-38.pyc
ADDED
Binary file (1.77 kB). View file
|
|
utils/models.py
CHANGED
@@ -9,6 +9,7 @@ import spacy_transformers
|
|
9 |
import streamlit_scrollable_textbox as stx
|
10 |
import torch
|
11 |
from sentence_transformers import SentenceTransformer
|
|
|
12 |
from tqdm import tqdm
|
13 |
from transformers import (
|
14 |
AutoModelForMaskedLM,
|
@@ -95,6 +96,13 @@ def get_sgpt_embedding_model():
|
|
95 |
return model
|
96 |
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
@st.experimental_memo
|
99 |
def save_key(api_key):
|
100 |
return api_key
|
|
|
9 |
import streamlit_scrollable_textbox as stx
|
10 |
import torch
|
11 |
from sentence_transformers import SentenceTransformer
|
12 |
+
from InstructorEmbedding import INSTRUCTOR
|
13 |
from tqdm import tqdm
|
14 |
from transformers import (
|
15 |
AutoModelForMaskedLM,
|
|
|
96 |
return model
|
97 |
|
98 |
|
99 |
+
@st.experimental_singleton
|
100 |
+
def get_instructor_embedding_model():
|
101 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
102 |
+
model = INSTRUCTOR("hkunlp/instructor-large")
|
103 |
+
return model
|
104 |
+
|
105 |
+
|
106 |
@st.experimental_memo
|
107 |
def save_key(api_key):
|
108 |
return api_key
|