import json import os import numpy as np import openai import requests from scipy.spatial.distance import cosine def get_embedding_from_api(word, model="vicuna-7b-v1.5"): if "ada" in model: resp = openai.Embedding.create( model=model, input=word, ) embedding = np.array(resp["data"][0]["embedding"]) return embedding url = "http://localhost:8000/v1/embeddings" headers = {"Content-Type": "application/json"} data = json.dumps({"model": model, "input": word}) response = requests.post(url, headers=headers, data=data) if response.status_code == 200: embedding = np.array(response.json()["data"][0]["embedding"]) return embedding else: print(f"Error: {response.status_code} - {response.text}") return None def cosine_similarity(vec1, vec2): return 1 - cosine(vec1, vec2) def print_cosine_similarity(embeddings, texts): for i in range(len(texts)): for j in range(i + 1, len(texts)): sim = cosine_similarity(embeddings[texts[i]], embeddings[texts[j]]) print(f"Cosine similarity between '{texts[i]}' and '{texts[j]}': {sim:.2f}") texts = [ "The quick brown fox", "The quick brown dog", "The fast brown fox", "A completely different sentence", ] embeddings = {} for text in texts: embeddings[text] = get_embedding_from_api(text) print("Vicuna-7B:") print_cosine_similarity(embeddings, texts) for text in texts: embeddings[text] = get_embedding_from_api(text, model="text-similarity-ada-001") print("text-similarity-ada-001:") print_cosine_similarity(embeddings, texts) for text in texts: embeddings[text] = get_embedding_from_api(text, model="text-embedding-ada-002") print("text-embedding-ada-002:") print_cosine_similarity(embeddings, texts)