Spaces:
Sleeping
Sleeping
import io | |
import os | |
import warnings | |
import numpy as np | |
import time | |
from matplotlib import pyplot as plt | |
import math | |
from IPython.display import display | |
from PIL import Image, ImageDraw | |
import getpass | |
from transformers import AutoTokenizer, AutoModel | |
import langchain | |
from langchain_openai import OpenAIEmbeddings | |
from langchain.vectorstores import Pinecone | |
from pinecone import Pinecone, ServerlessSpec | |
from tqdm.notebook import tqdm | |
import openai | |
from openai import OpenAI | |
import string | |
import pandas as pd | |
import urllib.request | |
from io import BytesIO | |
import pillow_heif | |
from itertools import islice | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gc | |
import ast | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from sentence_transformers import SentenceTransformer | |
import streamlit as st | |
import re | |
import Levenshtein | |
from tabulate import tabulate | |
#from stability_sdk import client | |
#import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation | |
open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file | |
with open(open_ai_key_file, "r") as f: | |
for line in f: | |
OPENAI_KEY = line.strip() | |
OPEN_AI_API_KEY = line | |
break | |
# GETTING OpenAI and Pinecone api key | |
openai.api_key = OPENAI_KEY | |
openai_client = OpenAI(api_key=openai.api_key) | |
# GETTING OpenAI and Pinecone api key | |
openai.api_key = OPENAI_KEY | |
pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202" | |
openai_client = OpenAI(api_key=openai.api_key) | |
# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model | |
def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"): | |
text = text.replace("\n", " ") | |
return openai_client.embeddings.create(input = [text], model=model).data[0].embedding | |
def display_image_grid(image_caption_tuples): | |
# Number of images | |
n = len(image_caption_tuples) | |
# Grid dimensions | |
columns = 5 | |
rows = math.ceil(n / columns) | |
# Plot size | |
plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed | |
for i, (image_path, caption) in enumerate(image_caption_tuples, start=1): | |
# Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths | |
if isinstance(image_path, str): | |
image = Image.open(image_path) | |
else: | |
image = image_path # Assuming image_path is already an image object | |
# Create subplot | |
plt.subplot(rows, columns, i) | |
plt.imshow(image) | |
plt.title(caption, fontsize=10) # Show caption as title | |
plt.axis('off') # Hide axis | |
plt.tight_layout() | |
plt.show() | |
def get_completion(client, prompt, model="gpt-3.5-turbo"): | |
message = {"role": "user", "content": prompt} | |
with st.spinner("Generating ..."): | |
response = openai_client.chat.completions.create( | |
model=model, | |
messages=[message] | |
) | |
return response.choices[0].message.content | |
def query_pinecone_vector_store(index, query_embeddn, top_k=5): | |
ns = get_namespace(index) | |
return index.query( | |
namespace=ns, | |
top_k=top_k, | |
vector=query_embeddn, | |
include_values=True, | |
include_metadata=True | |
) | |
def get_top_k_text(matches): | |
text_list = [] | |
for i in range(0, 5): | |
text_list.append(matches.get('matches')[i]['metadata']['text']) | |
return ' '.join(text_list) | |
def get_top_filename(matches): | |
filename = matches.get('matches')[0]['metadata']['filename'] | |
text = matches.get('matches')[0]['metadata']['text'] | |
return f"[{filename}]: {text}" | |
def is_Yes(response) -> bool: | |
similarityYes = Levenshtein.ratio("Yes", response) | |
similarityNo = Levenshtein.ratio("No", response) | |
return similarityYes > similarityNo | |
def contains_py_filename(filename): | |
return '.py' in filename | |
def contains_sorry(response) -> bool: | |
return "Sorry" in response | |
general_greeting_num = 0 | |
general_question_num = 1 | |
machine_learning_num = 2 | |
python_code_num = 3 | |
obnoxious_num = 4 | |
progress_num = 5 | |
debug_num = 6 | |
default_num = 7 | |
query_classes = {'[General greeting]': general_greeting_num, | |
'[General question]': general_question_num, | |
'[Question about Machine Learning]': machine_learning_num, | |
'[Question about Python programming]' : python_code_num, | |
'[Obnoxious statement]': obnoxious_num, | |
'[Request for Progress]': progress_num, | |
'[Request for Score]': progress_num, | |
'[Debug statement]': debug_num | |
} | |
query_classes_text = ", ".join(query_classes.keys()) | |
class Classify_Agent: | |
def __init__(self, openai_client) -> None: | |
# TODO: Initialize the client and prompt for the Obnoxious_Agent | |
self.openai_client = openai_client | |
def classify_query(self, query): | |
prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}." | |
classification_response = get_completion(self.openai_client, prompt) | |
if classification_response != None and classification_response in query_classes.keys(): | |
query_class = query_classes.get(classification_response, default_num) | |
#st.write(f"query <{query}>: {classification_response}") | |
return query_classes.get(classification_response, default_num) | |
else: | |
#st.write(f"query <{query}>: {classification_response}") | |
return default_num | |
class Relevant_Documents_Agent: | |
def __init__(self, openai_client) -> None: | |
# TODO: Initialize the Relevant_Documents_Agent | |
self.client = openai_client | |
def is_relevant(self, matches_text, user_query_plus_conversation) -> bool: | |
prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No" | |
#st.write(f"is_relevant prompt {prompt}") | |
response = get_completion(self.client, prompt) | |
#st.write(f"is_relevant response {response}") | |
return is_Yes(response) | |
class OpenAI_Agent: | |
def __init__(self, model="gpt-3.5-turbo", key_filename="/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt"): | |
self.model = model | |
self.open_ai_key_file = key_filename | |
self.OPENAI_KEY = "" | |
self.OPEN_AI_API_KEY = "" | |
self.openai_client = None | |
with open(self.open_ai_key_file, "r") as f: | |
for line in f: | |
self.OPENAI_KEY = line.strip() | |
self.OPEN_AI_API_KEY = line | |
break | |
class Pinecone_Agent: | |
def __init__(self, key_filename="pc_api_key"): | |
self.pc_api_key_file = key_filename | |
self.PC_KEY = "" | |
self.PC_API_KEY = "" | |
with open(self.open_ai_key_file, "r") as f: | |
for line in f: | |
self.PC_KEY = line.strip() | |
self.PC_API_KEY = line | |
break | |
self.pc = Pinecone(api_key=self.PC_API_KEY) | |
self.ml_namespace = "ns-600" | |
self.ml_index = self.pc.Index("index-600") | |
self.python_namespace = "ns-python-files" | |
self.python_index = self.pc.Index("index-python-files") | |