import io import os import warnings import numpy as np import time from matplotlib import pyplot as plt import math from IPython.display import display from PIL import Image, ImageDraw import getpass from transformers import AutoTokenizer, AutoModel import langchain from langchain_openai import OpenAIEmbeddings from langchain.vectorstores import Pinecone from pinecone import Pinecone, ServerlessSpec from tqdm.notebook import tqdm import openai from openai import OpenAI import string import pandas as pd import urllib.request from io import BytesIO import pillow_heif from itertools import islice from sklearn.metrics.pairwise import cosine_similarity import gc import ast from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import streamlit as st import re import Levenshtein from tabulate import tabulate #from stability_sdk import client #import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file with open(open_ai_key_file, "r") as f: for line in f: OPENAI_KEY = line.strip() OPEN_AI_API_KEY = line break # GETTING OpenAI and Pinecone api key openai.api_key = OPENAI_KEY openai_client = OpenAI(api_key=openai.api_key) # GETTING OpenAI and Pinecone api key openai.api_key = OPENAI_KEY pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202" openai_client = OpenAI(api_key=openai.api_key) # Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"): text = text.replace("\n", " ") return openai_client.embeddings.create(input = [text], model=model).data[0].embedding def display_image_grid(image_caption_tuples): # Number of images n = len(image_caption_tuples) # Grid dimensions columns = 5 rows = math.ceil(n / columns) # Plot size plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed for i, (image_path, caption) in enumerate(image_caption_tuples, start=1): # Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths if isinstance(image_path, str): image = Image.open(image_path) else: image = image_path # Assuming image_path is already an image object # Create subplot plt.subplot(rows, columns, i) plt.imshow(image) plt.title(caption, fontsize=10) # Show caption as title plt.axis('off') # Hide axis plt.tight_layout() plt.show() def get_completion(client, prompt, model="gpt-3.5-turbo"): message = {"role": "user", "content": prompt} with st.spinner("Generating ..."): response = openai_client.chat.completions.create( model=model, messages=[message] ) return response.choices[0].message.content def query_pinecone_vector_store(index, query_embeddn, top_k=5): ns = get_namespace(index) return index.query( namespace=ns, top_k=top_k, vector=query_embeddn, include_values=True, include_metadata=True ) def get_top_k_text(matches): text_list = [] for i in range(0, 5): text_list.append(matches.get('matches')[i]['metadata']['text']) return ' '.join(text_list) def get_top_filename(matches): filename = matches.get('matches')[0]['metadata']['filename'] text = matches.get('matches')[0]['metadata']['text'] return f"[{filename}]: {text}" def is_Yes(response) -> bool: similarityYes = Levenshtein.ratio("Yes", response) similarityNo = Levenshtein.ratio("No", response) return similarityYes > similarityNo def contains_py_filename(filename): return '.py' in filename def contains_sorry(response) -> bool: return "Sorry" in response general_greeting_num = 0 general_question_num = 1 machine_learning_num = 2 python_code_num = 3 obnoxious_num = 4 progress_num = 5 debug_num = 6 default_num = 7 query_classes = {'[General greeting]': general_greeting_num, '[General question]': general_question_num, '[Question about Machine Learning]': machine_learning_num, '[Question about Python programming]' : python_code_num, '[Obnoxious statement]': obnoxious_num, '[Request for Progress]': progress_num, '[Request for Score]': progress_num, '[Debug statement]': debug_num } query_classes_text = ", ".join(query_classes.keys()) class Classify_Agent: def __init__(self, openai_client) -> None: # TODO: Initialize the client and prompt for the Obnoxious_Agent self.openai_client = openai_client def classify_query(self, query): prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}." classification_response = get_completion(self.openai_client, prompt) if classification_response != None and classification_response in query_classes.keys(): query_class = query_classes.get(classification_response, default_num) #st.write(f"query <{query}>: {classification_response}") return query_classes.get(classification_response, default_num) else: #st.write(f"query <{query}>: {classification_response}") return default_num class Relevant_Documents_Agent: def __init__(self, openai_client) -> None: # TODO: Initialize the Relevant_Documents_Agent self.client = openai_client def is_relevant(self, matches_text, user_query_plus_conversation) -> bool: prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No" #st.write(f"is_relevant prompt {prompt}") response = get_completion(self.client, prompt) #st.write(f"is_relevant response {response}") return is_Yes(response) class OpenAI_Agent: def __init__(self, model="gpt-3.5-turbo", key_filename="/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt"): self.model = model self.open_ai_key_file = key_filename self.OPENAI_KEY = "" self.OPEN_AI_API_KEY = "" self.openai_client = None with open(self.open_ai_key_file, "r") as f: for line in f: self.OPENAI_KEY = line.strip() self.OPEN_AI_API_KEY = line break class Pinecone_Agent: def __init__(self, key_filename="pc_api_key"): self.pc_api_key_file = key_filename self.PC_KEY = "" self.PC_API_KEY = "" with open(self.open_ai_key_file, "r") as f: for line in f: self.PC_KEY = line.strip() self.PC_API_KEY = line break self.pc = Pinecone(api_key=self.PC_API_KEY) self.ml_namespace = "ns-600" self.ml_index = self.pc.Index("index-600") self.python_namespace = "ns-python-files" self.python_index = self.pc.Index("index-python-files")