Spaces:
Running
Running
Vitomir Jovanović
commited on
Commit
•
e620120
1
Parent(s):
43e9781
Add all vector similarity feature
Browse files- Procfile.yaml +1 -1
- main.py +11 -10
- models/Query.py +8 -1
- models/__pycache__/Query.cpython-312.pyc +0 -0
- models/__pycache__/data_reader.cpython-312.pyc +0 -0
- models/__pycache__/prompt_search_engine.cpython-312.pyc +0 -0
- models/data_reader.py +1 -1
- models/prompt_search_engine.py +21 -11
Procfile.yaml
CHANGED
@@ -1 +1 @@
|
|
1 |
-
web: gunicorn -w 1 -k uvicorn.workers.UvicornWorker main
|
|
|
1 |
+
web: gunicorn -w 1 -k uvicorn.workers.UvicornWorker main:app --bind 0.0.0.0:8000 & streamlit run app.py --server.port 7860
|
main.py
CHANGED
@@ -8,14 +8,14 @@ import datetime
|
|
8 |
from models.vectorizer import Vectorizer
|
9 |
from models.prompt_search_engine import PromptSearchEngine
|
10 |
from models.data_reader import load_prompts_from_jsonl
|
11 |
-
from models.Query import Query, Query_Multiple, SearchResponse, SimilarPrompt
|
12 |
from decouple import config
|
13 |
from fastapi import FastAPI, HTTPException, Depends, Body
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
|
16 |
|
17 |
|
18 |
-
prompt_path = r"C:\Users\jov2bg\Desktop\PromptSearch\models\prompts_data.jsonl"
|
19 |
|
20 |
|
21 |
app = FastAPI(title="Search Prompt Engine", description="API for prompt search", version="1.0")
|
@@ -46,16 +46,17 @@ async def search_prompts(query: Query, k: int = 3):
|
|
46 |
@app.post("/all_vectors_similarities/")
|
47 |
async def all_vectors(query: Query):
|
48 |
|
49 |
-
|
|
|
50 |
response = [
|
51 |
-
|
52 |
-
for
|
53 |
]
|
54 |
-
return
|
55 |
|
56 |
if __name__ == "__main__":
|
57 |
# Server Config
|
58 |
-
Search_SERVER_HOST_IP = socket.gethostbyname(socket.gethostname())
|
59 |
-
|
60 |
-
|
61 |
-
uvicorn.run(app, host=
|
|
|
8 |
from models.vectorizer import Vectorizer
|
9 |
from models.prompt_search_engine import PromptSearchEngine
|
10 |
from models.data_reader import load_prompts_from_jsonl
|
11 |
+
from models.Query import Query, Query_Multiple, SearchResponse, SimilarPrompt, PromptVector, VectorResponse
|
12 |
from decouple import config
|
13 |
from fastapi import FastAPI, HTTPException, Depends, Body
|
14 |
from sentence_transformers import SentenceTransformer
|
15 |
|
16 |
|
17 |
|
18 |
+
prompt_path = r"C:\Users\jov2bg\Desktop\PromptSearch\search_engine\models\prompts_data.jsonl"
|
19 |
|
20 |
|
21 |
app = FastAPI(title="Search Prompt Engine", description="API for prompt search", version="1.0")
|
|
|
46 |
@app.post("/all_vectors_similarities/")
|
47 |
async def all_vectors(query: Query):
|
48 |
|
49 |
+
query_embedding = search_engine.model.encode([query.prompt]) # Encode the prompt to a vector
|
50 |
+
all_similarities = search_engine.cosine_similarity(query_embedding, search_engine.index)
|
51 |
response = [
|
52 |
+
PromptVector(vector=index, distance=float(distance))
|
53 |
+
for index, distance in enumerate(all_similarities)
|
54 |
]
|
55 |
+
return VectorResponse(results=response)
|
56 |
|
57 |
if __name__ == "__main__":
|
58 |
# Server Config
|
59 |
+
# Search_SERVER_HOST_IP = socket.gethostbyname(socket.gethostname())
|
60 |
+
SERVER_HOST_IP = socket.gethostbyname("localhost") # for local deployment
|
61 |
+
SERVER_PORT = int(8084)
|
62 |
+
uvicorn.run(app, host=SERVER_HOST_IP, port=SERVER_PORT)
|
models/Query.py
CHANGED
@@ -17,4 +17,11 @@ class SimilarPrompt(BaseModel):
|
|
17 |
distance: float
|
18 |
|
19 |
class SearchResponse(BaseModel):
|
20 |
-
results: List[SimilarPrompt]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
distance: float
|
18 |
|
19 |
class SearchResponse(BaseModel):
|
20 |
+
results: List[SimilarPrompt]
|
21 |
+
|
22 |
+
class PromptVector(BaseModel):
|
23 |
+
vector: int
|
24 |
+
distance: float
|
25 |
+
|
26 |
+
class VectorResponse(BaseModel):
|
27 |
+
results: List[PromptVector]
|
models/__pycache__/Query.cpython-312.pyc
CHANGED
Binary files a/models/__pycache__/Query.cpython-312.pyc and b/models/__pycache__/Query.cpython-312.pyc differ
|
|
models/__pycache__/data_reader.cpython-312.pyc
CHANGED
Binary files a/models/__pycache__/data_reader.cpython-312.pyc and b/models/__pycache__/data_reader.cpython-312.pyc differ
|
|
models/__pycache__/prompt_search_engine.cpython-312.pyc
CHANGED
Binary files a/models/__pycache__/prompt_search_engine.cpython-312.pyc and b/models/__pycache__/prompt_search_engine.cpython-312.pyc differ
|
|
models/data_reader.py
CHANGED
@@ -41,7 +41,7 @@ def load_prompts_from_jsonl(file_path):
|
|
41 |
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
-
jsonl_file_path = r"C:\Users\jov2bg\Desktop\PromptSearch\models\prompts_data.jsonl"
|
45 |
num_shards = 1
|
46 |
dataset = download_data(num_shards, base_url)
|
47 |
extract_prompts(dataset, jsonl_file_path)
|
|
|
41 |
|
42 |
|
43 |
if __name__ == "__main__":
|
44 |
+
jsonl_file_path = r"C:\Users\jov2bg\Desktop\PromptSearch\search_engine\models\prompts_data.jsonl"
|
45 |
num_shards = 1
|
46 |
dataset = download_data(num_shards, base_url)
|
47 |
extract_prompts(dataset, jsonl_file_path)
|
models/prompt_search_engine.py
CHANGED
@@ -32,17 +32,27 @@ class PromptSearchEngine:
|
|
32 |
return similar_prompts, distances[0] # Return both the similar prompts and their distances
|
33 |
|
34 |
|
35 |
-
def cosine_similarity(query_vector
|
36 |
"""Compute the cosine similarity between a query vector and a set of corpus vectors.
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
|
|
|
32 |
return similar_prompts, distances[0] # Return both the similar prompts and their distances
|
33 |
|
34 |
|
35 |
+
def cosine_similarity(self, query_vector, index):
|
36 |
"""Compute the cosine similarity between a query vector and a set of corpus vectors.
|
37 |
+
Args: query_vector: The query vector to compare against the corpus vectors. corpus_vectors: The set of corpus vectors to compare against the query vector.
|
38 |
+
Returns: The cosine similarity between the query vector and the corpus vectors.
|
39 |
+
"""
|
40 |
+
|
41 |
+
query_vector = np.array(query_vector).astype('float32')
|
42 |
+
query_norm = query_vector / np.linalg.norm(query_vector)
|
43 |
+
|
44 |
+
# Get all vectors from FAISS
|
45 |
+
index_vectors = index.reconstruct_n(0, index.ntotal) # Reconstruct all vectors in the index
|
46 |
+
|
47 |
+
|
48 |
+
index_norms = np.linalg.norm(index_vectors, axis=1, keepdims=True)
|
49 |
+
normalized_index_vectors = index_vectors / index_norms
|
50 |
+
|
51 |
+
|
52 |
+
cosine_similarities = np.dot(normalized_index_vectors, query_norm.T)
|
53 |
+
|
54 |
+
return cosine_similarities
|
55 |
+
|
56 |
+
|
57 |
|
58 |
|