|
import os
|
|
from commons.Configs import configs
|
|
from commons.File import file
|
|
import openai
|
|
from openai.embeddings_utils import cosine_similarity
|
|
import json
|
|
|
|
|
|
class OpenAIClient:
|
|
def __init__(self, debug=False):
|
|
self.debug = debug
|
|
openai.api_key = configs.OPENAI_KEY
|
|
self.embeddingsModel = configs.embeddingsModel
|
|
|
|
def buildPrompt(self, name, variables):
|
|
|
|
promptFilePath = os.path.join(configs.promptsDir, f"{name}.prompt.txt")
|
|
prompt = file.readFile(promptFilePath)
|
|
for key, value in variables.items():
|
|
prompt = prompt.replace(f"{{{key}}}", value)
|
|
return prompt
|
|
|
|
def generateSyntheticQuestions(self, prompt, debugSentence=""):
|
|
|
|
"""Use OpenAI completion API to generate synthetic questions for each sentence"""
|
|
|
|
|
|
|
|
response = openai.ChatCompletion.create(
|
|
model=configs.chatCompletionModel,
|
|
messages=[{"role": "user", "content": prompt}]
|
|
)
|
|
responseText = response['choices'][0]['message']['content']
|
|
|
|
|
|
|
|
|
|
|
|
questionAnswers = responseText.replace("\n", "").split('(Q)', 1)[1]
|
|
|
|
questionAnswers = questionAnswers.split('(Q)')
|
|
|
|
questionAnswers = [x.split('(A)', 1) for x in questionAnswers]
|
|
|
|
questionAnswers = [[x[0].strip(), x[1].strip()]
|
|
for x in questionAnswers if len(x) == 2]
|
|
jsonData = [{"question": x[0], "answer": x[1]}
|
|
for x in questionAnswers]
|
|
|
|
|
|
if self.debug:
|
|
print("Sentence: ", debugSentence)
|
|
print("Response text: ", responseText)
|
|
print("jsonData: ", json.dumps(jsonData, indent=4))
|
|
|
|
return jsonData
|
|
|
|
def generateEmbeddings(self, sentences):
|
|
|
|
response = openai.Embedding.create(
|
|
input=sentences,
|
|
model=self.embeddingsModel,
|
|
)
|
|
embeddings = []
|
|
for x in response['data']:
|
|
embeddings.append(x['embedding'])
|
|
assert len(embeddings) == len(sentences)
|
|
return embeddings
|
|
|
|
def searchBestEmbeddingIndex(self, embeddedQuestion, embeddingsToSearch):
|
|
|
|
|
|
"""Search for the best embedding index"""
|
|
maxSimilarity = 0
|
|
maxSimilarityIndex = 0
|
|
for i, embedding in enumerate(embeddingsToSearch):
|
|
|
|
|
|
similarity = cosine_similarity(embeddedQuestion, embedding)
|
|
if similarity > maxSimilarity:
|
|
maxSimilarity = similarity
|
|
maxSimilarityIndex = i
|
|
|
|
return maxSimilarityIndex
|
|
|
|
|
|
|
|
|
|
openaiClient = OpenAIClient()
|
|
|