Spaces:
Paused
Paused
isimorfizam
commited on
Commit
•
f710ff8
1
Parent(s):
a452220
Add vector database
Browse files- .gitattributes +4 -0
- README.md +3 -12
- app.py +3 -308
- chroma/chroma.sqlite3 +3 -0
- chroma/f7fa132c-535b-432c-a08e-0733244a743d/data_level0.bin +3 -0
- chroma/f7fa132c-535b-432c-a08e-0733244a743d/header.bin +3 -0
- chroma/f7fa132c-535b-432c-a08e-0733244a743d/index_metadata.pickle +3 -0
- chroma/f7fa132c-535b-432c-a08e-0733244a743d/length.bin +3 -0
- chroma/f7fa132c-535b-432c-a08e-0733244a743d/link_lists.bin +3 -0
- requirements.txt +3 -186
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.md filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.py filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.sqlite3 filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.txt filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,12 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: gray
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.33.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58993ad738efab98d5036aa7ac4ced3eef97a74f064ee15c44f7c7ad766e6fa2
|
3 |
+
size 239
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,308 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
from transformers.utils import is_flash_attn_2_available
|
5 |
-
from transformers import BitsAndBytesConfig
|
6 |
-
import pandas as pd
|
7 |
-
import os
|
8 |
-
import torch
|
9 |
-
import numpy as np
|
10 |
-
from scipy import sparse
|
11 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
12 |
-
from scipy import sparse
|
13 |
-
from langchain_community.vectorstores import Chroma
|
14 |
-
from langchain_community.embeddings.sentence_transformer import (
|
15 |
-
SentenceTransformerEmbeddings,
|
16 |
-
)
|
17 |
-
|
18 |
-
# SET TO WIDE LAYOUT
|
19 |
-
st.set_page_config(layout="wide")
|
20 |
-
|
21 |
-
#_______________________________________________SET VARIABLES_____________________________________________________
|
22 |
-
|
23 |
-
MODEL_ID = 'google/gemma-2b-it'
|
24 |
-
CHUNK_SIZE = 1000
|
25 |
-
OVERLAP_SIZE = 100
|
26 |
-
EMBEDDING = "all-MiniLM-L6-v2"
|
27 |
-
COLLECTION_NAME = f'vb_summarizer_{EMBEDDING}_test'
|
28 |
-
CHROMA_DATA_PATH = 'feedback_360'
|
29 |
-
|
30 |
-
#_______________________________________________LOAD MODELS_____________________________________________________
|
31 |
-
# LOAD MODEL
|
32 |
-
@st.cache_resource
|
33 |
-
def load_model(model_id) :
|
34 |
-
|
35 |
-
HF_TOKEN = os.environ['HF_TOKEN']
|
36 |
-
print(torch.backends.mps.is_available())
|
37 |
-
#device = torch.device("mps") if torch.backends.mps.is_available() else "cpu"
|
38 |
-
device = 'cpu'
|
39 |
-
print(device)
|
40 |
-
|
41 |
-
if device=='cpu' :
|
42 |
-
print('Warning! No GPU available')
|
43 |
-
|
44 |
-
# IMPORT MODEL
|
45 |
-
|
46 |
-
print(model_id)
|
47 |
-
|
48 |
-
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
|
49 |
-
bnb_4bit_compute_dtype=torch.float16)
|
50 |
-
|
51 |
-
# if (is_flash_attn_2_available()) and (torch.cuda.get_device_capability(0)[0] >= 8):
|
52 |
-
# attn_implementation = "flash_attention_2"
|
53 |
-
# else:
|
54 |
-
# attn_implementation = "sdpa"
|
55 |
-
# print(f"[INFO] Using attention implementation: {attn_implementation}")
|
56 |
-
|
57 |
-
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_id, token=HF_TOKEN)
|
58 |
-
|
59 |
-
llm_model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_id,
|
60 |
-
token=HF_TOKEN,
|
61 |
-
torch_dtype=torch.float16,
|
62 |
-
#quantization_config=quantization_config if quantization_config else None,
|
63 |
-
low_cpu_mem_usage=False,) # use full memory
|
64 |
-
#attn_implementation=attn_implementation) # which attention version to use
|
65 |
-
llm_model.to(device)
|
66 |
-
return llm_model, tokenizer, device
|
67 |
-
|
68 |
-
# LOAD VECTORSTORE
|
69 |
-
@st.cache_resource
|
70 |
-
def load_data(embedding) :
|
71 |
-
# CREATE EMBEDDING
|
72 |
-
embedding_function = SentenceTransformerEmbeddings(model_name=embedding)
|
73 |
-
db3 = Chroma(collection_name = COLLECTION_NAME, persist_directory="./chroma", embedding_function = embedding_function)
|
74 |
-
return db3
|
75 |
-
|
76 |
-
|
77 |
-
# Create a text element and let the reader know the data is loading.
|
78 |
-
model_load_state = st.text('Loading model...')
|
79 |
-
# Load 10,000 rows of data into the dataframe.
|
80 |
-
llm_model, tokenizer, device = load_model(MODEL_ID)
|
81 |
-
# Notify the reader that the data was successfully loaded.
|
82 |
-
model_load_state.text('Loading model...done!')
|
83 |
-
|
84 |
-
# Create a text element and let the reader know the data is loading.
|
85 |
-
data_load_state = st.text('Loading data...')
|
86 |
-
# Load 10,000 rows of data into the dataframe.
|
87 |
-
vectorstore = load_data(EMBEDDING)
|
88 |
-
# Notify the reader that the data was successfully loaded.
|
89 |
-
data_load_state.text('Loading data...done!')
|
90 |
-
|
91 |
-
|
92 |
-
#_______________________________________________SUMMARIZATION_____________________________________________________
|
93 |
-
# INFERENCE
|
94 |
-
# def prompt_formatter(reviews, type_of_doc):
|
95 |
-
# return f"""You are a summarization bot.
|
96 |
-
# You will receive {type_of_doc} and you will extract all relevant information from {type_of_doc} and return one paragraph in which you will summarize what was said.
|
97 |
-
# {type_of_doc} are listed below under inputs.
|
98 |
-
# Inputs: {reviews}
|
99 |
-
# Answer :
|
100 |
-
# """
|
101 |
-
# def prompt_formatter(reviews, type_of_doc):
|
102 |
-
# return f"""You are a summarization bot.
|
103 |
-
# You will receive {type_of_doc} and you will summarize what was said in the input.
|
104 |
-
# {type_of_doc} are listed below under inputs.
|
105 |
-
# Inputs: {reviews}
|
106 |
-
# Answer :
|
107 |
-
# """
|
108 |
-
def prompt_formatter(reviews):
|
109 |
-
return f"""You are a summarization bot.
|
110 |
-
You will receive reviews of Clockify from different users.
|
111 |
-
You will summarize what these reviews said while keeping the information about each of the user.
|
112 |
-
Reviews are listed below.
|
113 |
-
Reviews: {reviews}
|
114 |
-
Answer :
|
115 |
-
"""
|
116 |
-
|
117 |
-
def mirror_mirror(inputs, prompt_formatter, tokenizer):
|
118 |
-
print('Mirror_mirror')
|
119 |
-
prompt = prompt_formatter(inputs)
|
120 |
-
input_ids = tokenizer(prompt, return_tensors="pt").to(device)
|
121 |
-
outputs = llm_model.generate(**input_ids,
|
122 |
-
temperature=0.3,
|
123 |
-
do_sample=True,
|
124 |
-
max_new_tokens=275)
|
125 |
-
output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
126 |
-
return prompt, output_text.replace(prompt, '')
|
127 |
-
|
128 |
-
|
129 |
-
def summarization(example : str, results_df : pd.DataFrame = pd.DataFrame()) -> pd.DataFrame :
|
130 |
-
|
131 |
-
# INFERENCE
|
132 |
-
results = []
|
133 |
-
for cnt in range(0,2) :
|
134 |
-
|
135 |
-
prompt, result = mirror_mirror(example, prompt_formatter, tokenizer)
|
136 |
-
list_temp = [result, example]
|
137 |
-
tokenized = tokenizer(list_temp, return_tensors="pt", padding = True)
|
138 |
-
A = tokenized.input_ids.numpy()
|
139 |
-
A = sparse.csr_matrix(A)
|
140 |
-
score = cosine_similarity(A)[0,1]
|
141 |
-
#print(cosine_similarity(A)[0,1])
|
142 |
-
#print(cosine_similarity(A)[1,0])
|
143 |
-
print(score)
|
144 |
-
if score>0.1 :
|
145 |
-
fin_result = result
|
146 |
-
max_score = score
|
147 |
-
break
|
148 |
-
|
149 |
-
results.append(result)
|
150 |
-
#print(result+'\n\n')
|
151 |
-
|
152 |
-
# tokenize results and example together
|
153 |
-
try :
|
154 |
-
fin_result
|
155 |
-
except :
|
156 |
-
# if fin_result not already defined, use the best of available results
|
157 |
-
# add example to results so tokenization is done together (due to padding limitations)
|
158 |
-
results.append(example)
|
159 |
-
tokenized = tokenizer(results, return_tensors="pt", padding = True)
|
160 |
-
A = tokenized.input_ids.numpy()
|
161 |
-
A = sparse.csr_matrix(A)
|
162 |
-
# calculate cosine similarity of each pair
|
163 |
-
# keep only example X result column
|
164 |
-
scores = cosine_similarity(A)[:,2]
|
165 |
-
# final result is the one with greaters cos_score
|
166 |
-
fin_result = results[np.argmax(scores)]
|
167 |
-
max_score = max(scores)
|
168 |
-
|
169 |
-
#print(fin_result)
|
170 |
-
# save final result and its attributes
|
171 |
-
row = pd.DataFrame({'model' : MODEL_ID, 'prompt' : prompt, 'reviews' : example, 'summarization' : fin_result, 'score' : [max_score] })
|
172 |
-
results_df = pd.concat([results_df,row], ignore_index = True)
|
173 |
-
|
174 |
-
return results_df
|
175 |
-
|
176 |
-
def create_filter(group:str=None, platform:str=None, ReviewerPosition:str=None, Industry:str=None, CompanySize:str=None,
|
177 |
-
UsagePeriod:str=None, LinkedinVerified:str=None, Date:str=None, Rating:str=None) :
|
178 |
-
keys = ['group', 'Platform', 'ReviewerPosition', 'Industry', 'CompanySize',
|
179 |
-
'UsagePeriod', 'LinkedinVerified', 'Date', 'Rating']
|
180 |
-
input_keys = [group,platform, ReviewerPosition, Industry, CompanySize, UsagePeriod, LinkedinVerified, Date, Rating]
|
181 |
-
|
182 |
-
# create filter dict
|
183 |
-
filter_dict = {}
|
184 |
-
for key, in_key in zip(keys, input_keys) :
|
185 |
-
if not in_key == None and not in_key == ' ':
|
186 |
-
filter_dict[key] = {'$eq' : in_key}
|
187 |
-
|
188 |
-
print(filter_dict)
|
189 |
-
return filter_dict
|
190 |
-
|
191 |
-
#_______________________________________________UI_____________________________________________________
|
192 |
-
|
193 |
-
st.title("Mirror, mirror, on the cloud, what do Clockify users say aloud?")
|
194 |
-
st.subheader("--Clockify review summarizer--")
|
195 |
-
|
196 |
-
col1, col2, col3 = st.columns(3, gap = 'small')
|
197 |
-
|
198 |
-
with col1:
|
199 |
-
platform = st.selectbox(label = 'Platform',
|
200 |
-
options = [' ', 'Capterra', 'Chrome Extension', 'GetApp', 'AppStore', 'GooglePlay',
|
201 |
-
'Firefox Extension', 'JIRA Plugin', 'Trustpilot', 'G2',
|
202 |
-
'TrustRadius']
|
203 |
-
)
|
204 |
-
|
205 |
-
with col2:
|
206 |
-
company_size = st.selectbox(label = 'Company Size',
|
207 |
-
options = [' ', '1-10 employees', 'Self-employed', 'self-employed',
|
208 |
-
'Small-Business(50 or fewer emp.)', '51-200 employees',
|
209 |
-
'Mid-Market(51-1000 emp.)', '11-50 employees',
|
210 |
-
'501-1,000 employees', '10,001+ employees', '201-500 employees',
|
211 |
-
'1,001-5,000 employees', '5,001-10,000 employees',
|
212 |
-
'Enterprise(> 1000 emp.)', 'Unknown', '1001-5000 employees']
|
213 |
-
)
|
214 |
-
|
215 |
-
with col3:
|
216 |
-
linkedin_verified = st.selectbox(label = 'Linkedin Verified',
|
217 |
-
options = [' ', 'True', 'False'],
|
218 |
-
placeholder = 'Choose an option'
|
219 |
-
)
|
220 |
-
|
221 |
-
num_to_return = int(st.number_input(label = 'Number of documents to return', min_value = 2, max_value = 50, step = 1))
|
222 |
-
|
223 |
-
# group = st.selectbox(label = 'Review Platform Group',
|
224 |
-
# options = ['Software Review Platforms', 'Browser Extension Stores', 'Mobile App Stores', 'Plugin Marketplace']
|
225 |
-
# )
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
default_value = "Clockify"
|
230 |
-
|
231 |
-
query = st.text_area("Query", default_value, height = 50)
|
232 |
-
#type_of_doc = st.text_area("Type of text", 'text', height = 25)
|
233 |
-
|
234 |
-
# result = ''
|
235 |
-
# score = ''
|
236 |
-
# reviews = ''
|
237 |
-
|
238 |
-
if 'result' not in st.session_state:
|
239 |
-
st.session_state['result'] = ''
|
240 |
-
|
241 |
-
if 'score' not in st.session_state:
|
242 |
-
st.session_state['score'] = ''
|
243 |
-
|
244 |
-
if 'reviews' not in st.session_state:
|
245 |
-
st.session_state['reviews'] = ''
|
246 |
-
|
247 |
-
col11, col21 = st.columns(2, gap = 'small')
|
248 |
-
|
249 |
-
with col11:
|
250 |
-
button_query = st.button('Conquer and query!')
|
251 |
-
with col21:
|
252 |
-
button_summarize = st.button('Summon the summarizer!')
|
253 |
-
|
254 |
-
|
255 |
-
if button_query :
|
256 |
-
print('Querying')
|
257 |
-
# create filter from drop-downs
|
258 |
-
filter_dict = create_filter(#group = group,
|
259 |
-
platform = platform,
|
260 |
-
CompanySize = company_size,
|
261 |
-
LinkedinVerified = linkedin_verified
|
262 |
-
)
|
263 |
-
# FILTER BY META
|
264 |
-
if filter_dict == {} :
|
265 |
-
retriever = vectorstore.as_retriever(search_kwargs = {"k": num_to_return})
|
266 |
-
|
267 |
-
elif len(filter_dict.keys()) == 1 :
|
268 |
-
retriever = vectorstore.as_retriever(search_kwargs = {"k": num_to_return,
|
269 |
-
"filter": filter_dict})
|
270 |
-
else :
|
271 |
-
retriever = vectorstore.as_retriever(search_kwargs = {"k": num_to_return,
|
272 |
-
"filter":{'$and': [{key : value} for key,value in filter_dict.items()]}
|
273 |
-
}
|
274 |
-
)
|
275 |
-
|
276 |
-
reviews = retriever.get_relevant_documents(query = query)
|
277 |
-
# only get page content
|
278 |
-
st.session_state['reviews'] = [review.page_content for review in reviews]
|
279 |
-
print(st.session_state['reviews'])
|
280 |
-
result = 'You may summarize now!'
|
281 |
-
|
282 |
-
if button_summarize :
|
283 |
-
print('Summarization in progress')
|
284 |
-
st.session_state['result'] = 'Summarization in progress'
|
285 |
-
results_df = summarization("\n".join(st.session_state['reviews']))
|
286 |
-
# only one input
|
287 |
-
st.session_state['result'] = results_df.summarization[0]
|
288 |
-
score = results_df.score[0]
|
289 |
-
|
290 |
-
|
291 |
-
col12, col22 = st.columns(2, gap = 'small')
|
292 |
-
|
293 |
-
with col12:
|
294 |
-
chosen_reviews = st.text_area("Reviews to be summarized", "\n".join(st.session_state['reviews']), height = 275)
|
295 |
-
with col22:
|
296 |
-
summarized_text = st.text_area("Summarized text", st.session_state['result'], height = 275)
|
297 |
-
|
298 |
-
score = st.text_area("Cosine similarity score", st.session_state['score'], height = 25)
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
# max_length = st.sidebar.slider("Max Length", min_value = 10, max_value=30)
|
305 |
-
# temperature = st.sidebar.slider("Temperature", value = 1.0, min_value = 0.0, max_value=1.0, step=0.05)
|
306 |
-
# top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 0)
|
307 |
-
# top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
|
308 |
-
# num_return_sequences = st.sidebar.number_input('Number of Return Sequences', min_value=1, max_value=5, value=1, step=1)s
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec776a7d993d37a267ba115d1f7b6ed3f47543372183e0aba99d3ec5cdbf443d
|
3 |
+
size 12135
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
chroma/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c75a8c4781833f9ff6631ce35b3afdf4f14fcfa96dbb89119f82e0ebede75538
|
3 |
+
size 94322688
|
chroma/f7fa132c-535b-432c-a08e-0733244a743d/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a178127ae117d29013c884005a50856af4d162e58e633875f3646a206de6db42
|
3 |
+
size 18436000
|
chroma/f7fa132c-535b-432c-a08e-0733244a743d/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9d515f925f64647cf578e98b503441d063c2a4ff18e6cfce00074c7a9a7110c
|
3 |
+
size 100
|
chroma/f7fa132c-535b-432c-a08e-0733244a743d/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e46404ed29ee26c1c828c9efe6c3ff2914220029c9fdd37b782725aaf2a648e0
|
3 |
+
size 636115
|
chroma/f7fa132c-535b-432c-a08e-0733244a743d/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74161e53fe79a9fafa54ab0ca4401fa6ec057448b2b7e5e62a71fc3b442bb876
|
3 |
+
size 44000
|
chroma/f7fa132c-535b-432c-a08e-0733244a743d/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd0a9b861ed1b179ba75a1a07bab17f04657fa9102271e900488b0f544c55b0e
|
3 |
+
size 97380
|
requirements.txt
CHANGED
@@ -1,186 +1,3 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
aiosignal==1.3.1
|
5 |
-
altair==5.3.0
|
6 |
-
annotated-types==0.6.0
|
7 |
-
anyio==4.3.0
|
8 |
-
appnope==0.1.4
|
9 |
-
asgiref==3.8.1
|
10 |
-
asttokens==2.4.1
|
11 |
-
async-timeout==4.0.3
|
12 |
-
attrs==23.2.0
|
13 |
-
backoff==2.2.1
|
14 |
-
bcrypt==4.1.2
|
15 |
-
bitsandbytes==0.42.0
|
16 |
-
blinker==1.7.0
|
17 |
-
build==1.2.1
|
18 |
-
cachetools==5.3.3
|
19 |
-
certifi==2024.2.2
|
20 |
-
charset-normalizer==3.3.2
|
21 |
-
chroma-hnswlib==0.7.3
|
22 |
-
chromadb==0.4.24
|
23 |
-
click==8.1.7
|
24 |
-
coloredlogs==15.0.1
|
25 |
-
comm==0.2.2
|
26 |
-
contourpy==1.2.1
|
27 |
-
cycler==0.12.1
|
28 |
-
dataclasses-json==0.6.4
|
29 |
-
debugpy==1.8.1
|
30 |
-
decorator==5.1.1
|
31 |
-
Deprecated==1.2.14
|
32 |
-
distro==1.9.0
|
33 |
-
exceptiongroup==1.2.0
|
34 |
-
executing==2.0.1
|
35 |
-
fastapi==0.110.1
|
36 |
-
ffmpy==0.3.2
|
37 |
-
filelock==3.13.4
|
38 |
-
flatbuffers==24.3.25
|
39 |
-
fonttools==4.51.0
|
40 |
-
frozenlist==1.4.1
|
41 |
-
fsspec==2024.3.1
|
42 |
-
gitdb==4.0.11
|
43 |
-
GitPython==3.1.43
|
44 |
-
google-auth==2.29.0
|
45 |
-
googleapis-common-protos==1.63.0
|
46 |
-
gradio==4.26.0
|
47 |
-
gradio_client==0.15.1
|
48 |
-
grpcio==1.62.1
|
49 |
-
h11==0.14.0
|
50 |
-
httpcore==1.0.5
|
51 |
-
httptools==0.6.1
|
52 |
-
httpx==0.27.0
|
53 |
-
huggingface-hub==0.22.2
|
54 |
-
humanfriendly==10.0
|
55 |
-
idna==3.7
|
56 |
-
importlib-metadata==7.0.0
|
57 |
-
importlib_resources==6.4.0
|
58 |
-
ipykernel==6.29.4
|
59 |
-
ipython==8.18.1
|
60 |
-
jedi==0.19.1
|
61 |
-
Jinja2==3.1.3
|
62 |
-
joblib==1.4.0
|
63 |
-
jsonpatch==1.33
|
64 |
-
jsonpointer==2.4
|
65 |
-
jsonschema==4.21.1
|
66 |
-
jsonschema-specifications==2023.12.1
|
67 |
-
jupyter_client==8.6.1
|
68 |
-
jupyter_core==5.7.2
|
69 |
-
kiwisolver==1.4.5
|
70 |
-
kubernetes==29.0.0
|
71 |
-
langchain==0.1.16
|
72 |
-
langchain-chroma==0.1.0
|
73 |
-
langchain-community==0.0.33
|
74 |
-
langchain-core==0.1.43
|
75 |
-
langchain-openai==0.1.3
|
76 |
-
langchain-text-splitters==0.0.1
|
77 |
-
langsmith==0.1.48
|
78 |
-
lark==1.1.9
|
79 |
-
markdown-it-py==3.0.0
|
80 |
-
MarkupSafe==2.1.5
|
81 |
-
marshmallow==3.21.1
|
82 |
-
matplotlib==3.8.4
|
83 |
-
matplotlib-inline==0.1.7
|
84 |
-
mdurl==0.1.2
|
85 |
-
mmh3==4.1.0
|
86 |
-
monotonic==1.6
|
87 |
-
mpmath==1.3.0
|
88 |
-
multidict==6.0.5
|
89 |
-
mypy-extensions==1.0.0
|
90 |
-
nest-asyncio==1.6.0
|
91 |
-
networkx==3.2.1
|
92 |
-
numpy==1.26.4
|
93 |
-
oauthlib==3.2.2
|
94 |
-
onnxruntime==1.17.3
|
95 |
-
openai==1.19.0
|
96 |
-
opentelemetry-api==1.24.0
|
97 |
-
opentelemetry-exporter-otlp-proto-common==1.24.0
|
98 |
-
opentelemetry-exporter-otlp-proto-grpc==1.24.0
|
99 |
-
opentelemetry-instrumentation==0.45b0
|
100 |
-
opentelemetry-instrumentation-asgi==0.45b0
|
101 |
-
opentelemetry-instrumentation-fastapi==0.45b0
|
102 |
-
opentelemetry-proto==1.24.0
|
103 |
-
opentelemetry-sdk==1.24.0
|
104 |
-
opentelemetry-semantic-conventions==0.45b0
|
105 |
-
opentelemetry-util-http==0.45b0
|
106 |
-
orjson==3.10.1
|
107 |
-
overrides==7.7.0
|
108 |
-
packaging==23.2
|
109 |
-
pandas==2.2.2
|
110 |
-
parso==0.8.4
|
111 |
-
pexpect==4.9.0
|
112 |
-
pillow==10.3.0
|
113 |
-
platformdirs==4.2.0
|
114 |
-
posthog==3.5.0
|
115 |
-
prompt-toolkit==3.0.43
|
116 |
-
protobuf==4.25.3
|
117 |
-
psutil==5.9.8
|
118 |
-
ptyprocess==0.7.0
|
119 |
-
pulsar-client==3.5.0
|
120 |
-
pure-eval==0.2.2
|
121 |
-
pyarrow==15.0.2
|
122 |
-
pyasn1==0.6.0
|
123 |
-
pyasn1_modules==0.4.0
|
124 |
-
pydantic==2.7.0
|
125 |
-
pydantic_core==2.18.1
|
126 |
-
pydeck==0.8.1b0
|
127 |
-
pydub==0.25.1
|
128 |
-
Pygments==2.17.2
|
129 |
-
pyparsing==3.1.2
|
130 |
-
PyPika==0.48.9
|
131 |
-
pyproject_hooks==1.0.0
|
132 |
-
python-dateutil==2.9.0.post0
|
133 |
-
python-dotenv==1.0.1
|
134 |
-
python-multipart==0.0.9
|
135 |
-
pytz==2024.1
|
136 |
-
PyYAML==6.0.1
|
137 |
-
pyzmq==26.0.0
|
138 |
-
referencing==0.34.0
|
139 |
-
regex==2023.12.25
|
140 |
-
requests==2.31.0
|
141 |
-
requests-oauthlib==2.0.0
|
142 |
-
rich==13.7.1
|
143 |
-
rpds-py==0.18.0
|
144 |
-
rsa==4.9
|
145 |
-
ruff==0.3.7
|
146 |
-
safetensors==0.4.3
|
147 |
-
scikit-learn==1.4.2
|
148 |
-
scipy==1.13.0
|
149 |
-
semantic-version==2.10.0
|
150 |
-
sentence-transformers==2.7.0
|
151 |
-
shellingham==1.5.4
|
152 |
-
six==1.16.0
|
153 |
-
smmap==5.0.1
|
154 |
-
sniffio==1.3.1
|
155 |
-
SQLAlchemy==2.0.29
|
156 |
-
stack-data==0.6.3
|
157 |
-
starlette==0.37.2
|
158 |
-
streamlit==1.33.0
|
159 |
-
sympy==1.12
|
160 |
-
tenacity==8.2.3
|
161 |
-
threadpoolctl==3.4.0
|
162 |
-
tiktoken==0.6.0
|
163 |
-
tokenizers==0.15.2
|
164 |
-
toml==0.10.2
|
165 |
-
tomli==2.0.1
|
166 |
-
tomlkit==0.12.0
|
167 |
-
toolz==0.12.1
|
168 |
-
torch==2.2.2
|
169 |
-
tornado==6.4
|
170 |
-
tqdm==4.66.2
|
171 |
-
traitlets==5.14.2
|
172 |
-
transformers==4.39.3
|
173 |
-
typer==0.12.3
|
174 |
-
typing-inspect==0.9.0
|
175 |
-
typing_extensions==4.11.0
|
176 |
-
tzdata==2024.1
|
177 |
-
urllib3==2.2.1
|
178 |
-
uvicorn==0.29.0
|
179 |
-
uvloop==0.19.0
|
180 |
-
watchfiles==0.21.0
|
181 |
-
wcwidth==0.2.13
|
182 |
-
websocket-client==1.7.0
|
183 |
-
websockets==11.0.3
|
184 |
-
wrapt==1.16.0
|
185 |
-
yarl==1.9.4
|
186 |
-
zipp==3.18.1
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1df8136f3d57d74ec4a316080a4d6be792614a760dea6473ae9fe3f317c22d01
|
3 |
+
size 3490
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|