jet-taekyo commited on
Commit
ece0f5f
β€’
1 Parent(s): ed83bfd

change into langchain style

Browse files
aimakerspace/__init__.py DELETED
File without changes
aimakerspace/langchain_wrappers/__init__.py DELETED
File without changes
aimakerspace/openai_utils/__init__.py DELETED
File without changes
aimakerspace/openai_utils/chatmodel.py DELETED
@@ -1,45 +0,0 @@
1
- from openai import OpenAI, AsyncOpenAI
2
- from dotenv import load_dotenv
3
- import os
4
-
5
- load_dotenv()
6
-
7
-
8
- class ChatOpenAI:
9
- def __init__(self, model_name: str = "gpt-4o-mini"):
10
- self.model_name = model_name
11
- self.openai_api_key = os.getenv("OPENAI_API_KEY")
12
- if self.openai_api_key is None:
13
- raise ValueError("OPENAI_API_KEY is not set")
14
-
15
- def run(self, messages, text_only: bool = True, **kwargs):
16
- if not isinstance(messages, list):
17
- raise ValueError("messages must be a list")
18
-
19
- client = OpenAI()
20
- response = client.chat.completions.create(
21
- model=self.model_name, messages=messages, **kwargs
22
- )
23
-
24
- if text_only:
25
- return response.choices[0].message.content
26
-
27
- return response
28
-
29
- async def astream(self, messages, **kwargs):
30
- if not isinstance(messages, list):
31
- raise ValueError("messages must be a list")
32
-
33
- client = AsyncOpenAI()
34
-
35
- stream = await client.chat.completions.create(
36
- model=self.model_name,
37
- messages=messages,
38
- stream=True,
39
- **kwargs
40
- )
41
-
42
- async for chunk in stream:
43
- content = chunk.choices[0].delta.content
44
- if content is not None:
45
- yield content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aimakerspace/openai_utils/embedding.py DELETED
@@ -1,59 +0,0 @@
1
- from dotenv import load_dotenv
2
- from openai import AsyncOpenAI, OpenAI
3
- import openai
4
- from typing import List
5
- import os
6
- import asyncio
7
-
8
-
9
- class EmbeddingModel:
10
- def __init__(self, embeddings_model_name: str = "text-embedding-3-small"):
11
- load_dotenv()
12
- self.openai_api_key = os.getenv("OPENAI_API_KEY")
13
- self.async_client = AsyncOpenAI()
14
- self.client = OpenAI()
15
-
16
- if self.openai_api_key is None:
17
- raise ValueError(
18
- "OPENAI_API_KEY environment variable is not set. Please set it to your OpenAI API key."
19
- )
20
- openai.api_key = self.openai_api_key
21
- self.embeddings_model_name = embeddings_model_name
22
-
23
- async def async_get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
24
- embedding_response = await self.async_client.embeddings.create(
25
- input=list_of_text, model=self.embeddings_model_name
26
- )
27
-
28
- return [embeddings.embedding for embeddings in embedding_response.data]
29
-
30
- async def async_get_embedding(self, text: str) -> List[float]:
31
- embedding = await self.async_client.embeddings.create(
32
- input=text, model=self.embeddings_model_name
33
- )
34
-
35
- return embedding.data[0].embedding
36
-
37
- def get_embeddings(self, list_of_text: List[str]) -> List[List[float]]:
38
- embedding_response = self.client.embeddings.create(
39
- input=list_of_text, model=self.embeddings_model_name
40
- )
41
-
42
- return [embeddings.embedding for embeddings in embedding_response.data]
43
-
44
- def get_embedding(self, text: str) -> List[float]:
45
- embedding = self.client.embeddings.create(
46
- input=text, model=self.embeddings_model_name
47
- )
48
-
49
- return embedding.data[0].embedding
50
-
51
-
52
- if __name__ == "__main__":
53
- embedding_model = EmbeddingModel()
54
- print(asyncio.run(embedding_model.async_get_embedding("Hello, world!")))
55
- print(
56
- asyncio.run(
57
- embedding_model.async_get_embeddings(["Hello, world!", "Goodbye, world!"])
58
- )
59
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aimakerspace/openai_utils/prompts.py DELETED
@@ -1,78 +0,0 @@
1
- import re
2
-
3
-
4
- class BasePrompt:
5
- def __init__(self, prompt):
6
- """
7
- Initializes the BasePrompt object with a prompt template.
8
-
9
- :param prompt: A string that can contain placeholders within curly braces
10
- """
11
- self.prompt = prompt
12
- self._pattern = re.compile(r"\{([^}]+)\}")
13
-
14
- def format_prompt(self, **kwargs):
15
- """
16
- Formats the prompt string using the keyword arguments provided.
17
-
18
- :param kwargs: The values to substitute into the prompt string
19
- :return: The formatted prompt string
20
- """
21
- matches = self._pattern.findall(self.prompt)
22
- return self.prompt.format(**{match: kwargs.get(match, "") for match in matches})
23
-
24
- def get_input_variables(self):
25
- """
26
- Gets the list of input variable names from the prompt string.
27
-
28
- :return: List of input variable names
29
- """
30
- return self._pattern.findall(self.prompt)
31
-
32
-
33
- class RolePrompt(BasePrompt):
34
- def __init__(self, prompt, role: str):
35
- """
36
- Initializes the RolePrompt object with a prompt template and a role.
37
-
38
- :param prompt: A string that can contain placeholders within curly braces
39
- :param role: The role for the message ('system', 'user', or 'assistant')
40
- """
41
- super().__init__(prompt)
42
- self.role = role
43
-
44
- def create_message(self, format=True, **kwargs):
45
- """
46
- Creates a message dictionary with a role and a formatted message.
47
-
48
- :param kwargs: The values to substitute into the prompt string
49
- :return: Dictionary containing the role and the formatted message
50
- """
51
- if format:
52
- return {"role": self.role, "content": self.format_prompt(**kwargs)}
53
-
54
- return {"role": self.role, "content": self.prompt}
55
-
56
-
57
- class SystemRolePrompt(RolePrompt):
58
- def __init__(self, prompt: str):
59
- super().__init__(prompt, "system")
60
-
61
-
62
- class UserRolePrompt(RolePrompt):
63
- def __init__(self, prompt: str):
64
- super().__init__(prompt, "user")
65
-
66
-
67
- class AssistantRolePrompt(RolePrompt):
68
- def __init__(self, prompt: str):
69
- super().__init__(prompt, "assistant")
70
-
71
-
72
- if __name__ == "__main__":
73
- prompt = BasePrompt("Hello {name}, you are {age} years old")
74
- print(prompt.format_prompt(name="John", age=30))
75
-
76
- prompt = SystemRolePrompt("Hello {name}, you are {age} years old")
77
- print(prompt.create_message(name="John", age=30))
78
- print(prompt.get_input_variables())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aimakerspace/text_utils.py DELETED
@@ -1,77 +0,0 @@
1
- import os
2
- from typing import List
3
-
4
-
5
- class TextFileLoader:
6
- def __init__(self, path: str, encoding: str = "utf-8"):
7
- self.documents = []
8
- self.path = path
9
- self.encoding = encoding
10
-
11
- def load(self):
12
- if os.path.isdir(self.path):
13
- self.load_directory()
14
- elif os.path.isfile(self.path) and self.path.endswith(".txt"):
15
- self.load_file()
16
- else:
17
- raise ValueError(
18
- "Provided path is neither a valid directory nor a .txt file."
19
- )
20
-
21
- def load_file(self):
22
- with open(self.path, "r", encoding=self.encoding) as f:
23
- self.documents.append(f.read())
24
-
25
- def load_directory(self):
26
- for root, _, files in os.walk(self.path):
27
- for file in files:
28
- if file.endswith(".txt"):
29
- with open(
30
- os.path.join(root, file), "r", encoding=self.encoding
31
- ) as f:
32
- self.documents.append(f.read())
33
-
34
- def load_documents(self):
35
- self.load()
36
- return self.documents
37
-
38
-
39
- class CharacterTextSplitter:
40
- def __init__(
41
- self,
42
- chunk_size: int = 1000,
43
- chunk_overlap: int = 200,
44
- ):
45
- assert (
46
- chunk_size > chunk_overlap
47
- ), "Chunk size must be greater than chunk overlap"
48
-
49
- self.chunk_size = chunk_size
50
- self.chunk_overlap = chunk_overlap
51
-
52
- def split(self, text: str) -> List[str]:
53
- chunks = []
54
- for i in range(0, len(text), self.chunk_size - self.chunk_overlap):
55
- chunks.append(text[i : i + self.chunk_size])
56
- return chunks
57
-
58
- def split_texts(self, texts: List[str]) -> List[str]:
59
- chunks = []
60
- for text in texts:
61
- chunks.extend(self.split(text))
62
- return chunks
63
-
64
-
65
- if __name__ == "__main__":
66
- loader = TextFileLoader("data/KingLear.txt")
67
- loader.load()
68
- splitter = CharacterTextSplitter()
69
- chunks = splitter.split_texts(loader.documents)
70
- print(len(chunks))
71
- print(chunks[0])
72
- print("--------")
73
- print(chunks[1])
74
- print("--------")
75
- print(chunks[-2])
76
- print("--------")
77
- print(chunks[-1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aimakerspace/vectordatabase.py DELETED
@@ -1,81 +0,0 @@
1
- import numpy as np
2
- from collections import defaultdict
3
- from typing import List, Tuple, Callable
4
- from aimakerspace.openai_utils.embedding import EmbeddingModel
5
- import asyncio
6
-
7
-
8
- def cosine_similarity(vector_a: np.array, vector_b: np.array) -> float:
9
- """Computes the cosine similarity between two vectors."""
10
- dot_product = np.dot(vector_a, vector_b)
11
- norm_a = np.linalg.norm(vector_a)
12
- norm_b = np.linalg.norm(vector_b)
13
- return dot_product / (norm_a * norm_b)
14
-
15
-
16
- class VectorDatabase:
17
- def __init__(self, embedding_model: EmbeddingModel = None):
18
- self.vectors = defaultdict(np.array)
19
- self.embedding_model = embedding_model or EmbeddingModel()
20
-
21
- def insert(self, key: str, vector: np.array) -> None:
22
- self.vectors[key] = vector
23
-
24
- def search(
25
- self,
26
- query_vector: np.array,
27
- k: int,
28
- distance_measure: Callable = cosine_similarity,
29
- ) -> List[Tuple[str, float]]:
30
- scores = [
31
- (key, distance_measure(query_vector, vector))
32
- for key, vector in self.vectors.items()
33
- ]
34
- return sorted(scores, key=lambda x: x[1], reverse=True)[:k]
35
-
36
- def search_by_text(
37
- self,
38
- query_text: str,
39
- k: int,
40
- distance_measure: Callable = cosine_similarity,
41
- return_as_text: bool = False,
42
- ) -> List[Tuple[str, float]]:
43
- query_vector = self.embedding_model.get_embedding(query_text)
44
- results = self.search(query_vector, k, distance_measure)
45
- return [result[0] for result in results] if return_as_text else results
46
-
47
- def retrieve_from_key(self, key: str) -> np.array:
48
- return self.vectors.get(key, None)
49
-
50
- async def abuild_from_list(self, list_of_text: List[str]) -> "VectorDatabase":
51
- embeddings = await self.embedding_model.async_get_embeddings(list_of_text)
52
- for text, embedding in zip(list_of_text, embeddings):
53
- self.insert(text, np.array(embedding))
54
- return self
55
-
56
-
57
- if __name__ == "__main__":
58
- list_of_text = [
59
- "I like to eat broccoli and bananas.",
60
- "I ate a banana and spinach smoothie for breakfast.",
61
- "Chinchillas and kittens are cute.",
62
- "My sister adopted a kitten yesterday.",
63
- "Look at this cute hamster munching on a piece of broccoli.",
64
- ]
65
-
66
- vector_db = VectorDatabase()
67
- vector_db = asyncio.run(vector_db.abuild_from_list(list_of_text))
68
- k = 2
69
-
70
- searched_vector = vector_db.search_by_text("I think fruit is awesome!", k=k)
71
- print(f"Closest {k} vector(s):", searched_vector)
72
-
73
- retrieved_vector = vector_db.retrieve_from_key(
74
- "I like to eat broccoli and bananas."
75
- )
76
- print("Retrieved vector:", retrieved_vector)
77
-
78
- relevant_texts = vector_db.search_by_text(
79
- "I think fruit is awesome!", k=k, return_as_text=True
80
- )
81
- print(f"Closest {k} text(s):", relevant_texts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,68 +1,93 @@
1
  import os
 
 
2
  from typing import List
 
 
3
  from chainlit.types import AskFileResponse
4
- from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader
5
- from aimakerspace.openai_utils.prompts import (
6
- UserRolePrompt,
7
- SystemRolePrompt,
8
- AssistantRolePrompt,
9
- )
10
- from aimakerspace.openai_utils.embedding import EmbeddingModel
11
- from aimakerspace.vectordatabase import VectorDatabase
12
- from aimakerspace.openai_utils.chatmodel import ChatOpenAI
 
 
13
  import chainlit as cl
 
 
 
 
 
 
14
 
15
  system_template = """\
16
- Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
17
- system_role_prompt = SystemRolePrompt(system_template)
18
 
19
- user_prompt_template = """\
20
  Context:
21
- {context}
22
-
 
23
  Question:
24
  {question}
25
  """
26
- user_role_prompt = UserRolePrompt(user_prompt_template)
 
27
 
28
- class RetrievalAugmentedQAPipeline:
29
- def __init__(self, llm: ChatOpenAI(), vector_db_retriever: VectorDatabase) -> None:
30
- self.llm = llm
31
- self.vector_db_retriever = vector_db_retriever
32
 
33
- async def arun_pipeline(self, user_query: str):
34
- context_list = self.vector_db_retriever.search_by_text(user_query, k=4)
35
 
36
- context_prompt = ""
37
- for context in context_list:
38
- context_prompt += context[0] + "\n"
39
 
40
- formatted_system_prompt = system_role_prompt.create_message()
41
-
42
- formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)
43
-
44
- async def generate_response():
45
- async for chunk in self.llm.astream([formatted_system_prompt, formatted_user_prompt]):
46
- yield chunk
 
 
 
 
 
 
47
 
48
- return {"response": generate_response(), "context": context_list}
 
 
 
 
 
 
49
 
50
- text_splitter = CharacterTextSplitter()
51
 
52
 
53
  def process_text_file(file: AskFileResponse):
54
  import tempfile
55
 
56
- with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
57
- temp_file_path = temp_file.name
58
-
59
- with open(temp_file_path, "wb") as f:
60
- f.write(file.content)
61
-
62
- text_loader = TextFileLoader(temp_file_path)
63
- documents = text_loader.load_documents()
64
- texts = text_splitter.split_texts(documents)
65
- return texts
 
 
 
 
 
 
 
 
66
 
67
 
68
  @cl.on_chat_start
@@ -73,13 +98,13 @@ async def on_chat_start():
73
  while files == None:
74
  files = await cl.AskFileMessage(
75
  content="Please upload a Text File file to begin!",
76
- accept=["text/plain"],
77
- max_size_mb=2,
78
  timeout=180,
79
  ).send()
80
 
81
- file = files[0]
82
 
 
83
  msg = cl.Message(
84
  content=f"Processing `{file.name}`...", disable_human_feedback=True
85
  )
@@ -91,32 +116,34 @@ async def on_chat_start():
91
  print(f"Processing {len(texts)} text chunks")
92
 
93
  # Create a dict vector store
94
- vector_db = VectorDatabase()
95
- vector_db = await vector_db.abuild_from_list(texts)
 
96
 
97
- chat_openai = ChatOpenAI()
98
 
99
  # Create a chain
100
- retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(
101
- vector_db_retriever=vector_db,
102
- llm=chat_openai
103
  )
104
 
105
  # Let the user know that the system is ready
106
- msg.content = f"Processing `{file.name}` done. You can now ask questions!"
107
  await msg.update()
108
 
109
- cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
 
 
110
 
111
 
112
  @cl.on_message
113
  async def main(message):
 
 
114
  chain = cl.user_session.get("chain")
115
 
116
  msg = cl.Message(content="")
117
- result = await chain.arun_pipeline(message.content)
118
-
119
- async for stream_resp in result["response"]:
120
- await msg.stream_token(stream_resp)
121
 
122
  await msg.send()
 
1
  import os
2
+
3
+ # For type hints
4
  from typing import List
5
+ from langchain_core.vectorstores import VectorStoreRetriever
6
+ from langchain_openai import ChatOpenAI
7
  from chainlit.types import AskFileResponse
8
+ from langchain_openai.embeddings import OpenAIEmbeddings
9
+
10
+ # Libraries to be used
11
+ from langchain_community.document_loaders.text import TextLoader
12
+ from langchain_community.document_loaders.pdf import PyPDFLoader
13
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
14
+ from langchain_core.prompts import ChatPromptTemplate
15
+ from langchain_wrappers.langchain_chat_models import MyChatOpenAI
16
+ from langchain_wrappers.langchain_embedding_models import MyOpenAIEmbeddings
17
+ from langchain_qdrant import QdrantVectorStore
18
+ from langchain_core.runnables import RunnablePassthrough, RunnableParallel
19
  import chainlit as cl
20
+ from dotenv import load_dotenv
21
+
22
+ # Cache
23
+ from langchain.globals import set_llm_cache, get_llm_cache
24
+ from langchain_community.cache import InMemoryCache
25
+ set_llm_cache(InMemoryCache())
26
 
27
  system_template = """\
28
+ Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer.\
 
29
 
 
30
  Context:
31
+ {context}
32
+ """
33
+ human_template = """\
34
  Question:
35
  {question}
36
  """
37
+ system_msg = ('system', system_template)
38
+ user_msg = ('human', human_template)
39
 
40
+ text_splitter = RecursiveCharacterTextSplitter()
 
 
 
41
 
 
 
42
 
43
+ load_dotenv()
 
 
44
 
45
+ ### RAG chain
46
+ def Get_RAG_pipeline(retriever: VectorStoreRetriever, llm: ChatOpenAI):
47
+
48
+ retriever = retriever.with_config({'run_name': 'RAG: Retriever'})
49
+ prompt = ChatPromptTemplate([system_msg, user_msg]).with_config({'run_name': 'RAG Step2: Prompt (Augmented)'})
50
+ llm = llm.with_config({'run_name': 'RAG Step3: LLM (Generation)'})
51
+
52
+ def get_context(relevant_docs: List):
53
+ context = ""
54
+ for doc in relevant_docs:
55
+ context += doc.page_content + "\n"
56
+ return context
57
+
58
 
59
+ RAG_chain = RunnableParallel(
60
+ relevant_docs = retriever,
61
+ question = lambda x: x
62
+ ).with_config({'run_name':'RAG Step1-1: Get relevant docs (Retrieval)'}) | RunnablePassthrough.assign(
63
+ context = lambda x: get_context(x['relevant_docs'])
64
+ ).with_config({'run_name':'RAG Step1-2: Get context (Retrieval)'}) | prompt | llm
65
+ RAG_chain = RAG_chain.with_config({'run_name':'RAG pipeline'})
66
 
67
+ return RAG_chain
68
 
69
 
70
  def process_text_file(file: AskFileResponse):
71
  import tempfile
72
 
73
+ if file.name.endswith('.pdf'):
74
+ print("PDF file detected")
75
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".pdf") as temp_file:
76
+ temp_file_path = temp_file.name
77
+ with open(temp_file_path, "wb") as f:
78
+ f.write(file.content)
79
+ document_loader = PyPDFLoader(temp_file_path)
80
+ elif file.name.endswith('.txt'):
81
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".txt") as temp_file:
82
+ temp_file_path = temp_file.name
83
+ with open(temp_file_path, "wb") as f:
84
+ f.write(file.content)
85
+ document_loader = TextLoader(temp_file_path, autodetect_encoding=True)
86
+
87
+ documents = document_loader.load()
88
+ splitted_documents = [x.page_content for x in text_splitter.transform_documents(documents)]
89
+
90
+ return splitted_documents
91
 
92
 
93
  @cl.on_chat_start
 
98
  while files == None:
99
  files = await cl.AskFileMessage(
100
  content="Please upload a Text File file to begin!",
101
+ accept=["text/plain", "application/pdf"],
102
+ max_size_mb=5,
103
  timeout=180,
104
  ).send()
105
 
 
106
 
107
+ file = files[0]
108
  msg = cl.Message(
109
  content=f"Processing `{file.name}`...", disable_human_feedback=True
110
  )
 
116
  print(f"Processing {len(texts)} text chunks")
117
 
118
  # Create a dict vector store
119
+ vector_db = await QdrantVectorStore.afrom_texts(
120
+ texts, MyOpenAIEmbeddings.from_model('small'), location=":memory:", collection_name="texts"
121
+ )
122
 
 
123
 
124
  # Create a chain
125
+ RAG_chain = Get_RAG_pipeline(
126
+ retriever=vector_db.as_retriever(search_kwargs = {'k':3}),
127
+ llm=MyChatOpenAI.from_model()
128
  )
129
 
130
  # Let the user know that the system is ready
131
+ msg.content = f"Processing `{file.name}` done ({len(texts)} chunks in total). You can now ask questions!"
132
  await msg.update()
133
 
134
+ cl.user_session.set("chain", RAG_chain)
135
+
136
+
137
 
138
 
139
  @cl.on_message
140
  async def main(message):
141
+ os.environ['LANGSMITH_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
142
+
143
  chain = cl.user_session.get("chain")
144
 
145
  msg = cl.Message(content="")
146
+ async for stream_resp in chain.astream(message.content):
147
+ await msg.stream_token(stream_resp.content)
 
 
148
 
149
  await msg.send()
{aimakerspace/langchain_wrappers β†’ langchain_wrappers}/langchain_chat_models.py RENAMED
@@ -2,10 +2,12 @@ import os
2
  from dotenv import load_dotenv
3
  from typing import Optional
4
  from langchain_openai import ChatOpenAI
5
- import inspect
6
 
7
- load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
8
 
 
 
 
 
9
  class MyChatOpenAI:
10
  @classmethod
11
  def from_model(
@@ -17,9 +19,8 @@ class MyChatOpenAI:
17
  max_tokens: Optional[int] = 4096,
18
  max_retries: int = 1,
19
  **kwargs
20
- )-> ChatOpenAI:
21
-
22
- os.environ['LANGCHAIN_PROJECT'] = langsmith_project
23
  if model in ['gpt-4o', 'GPT-4o', 'GPT-4O', 'gpt-4O', 'gpt4o', 'GPT4o', 'GPT4O', 'gpt4O']:
24
  model = 'gpt-4o'
25
  elif model in ['gpt-4o-mini', 'GPT-4o-mini', 'GPT-4O-mini', 'gpt-4O-mini', 'gpt4o-mini', 'GPT4o-mini', 'GPT4O-mini', 'gpt4O-mini', 'gpt4omini', 'GPT4omini', 'GPT4Omini', 'gpt4Omini']:
@@ -38,13 +39,5 @@ class MyChatOpenAI:
38
  )
39
 
40
 
41
- @classmethod
42
- def get_model_price(cls)-> dict:
43
- # Dictionary to store the cost of input and output tokens for each model
44
- supported_models = {'gpt-4o' : (5, 15)} # gpt-4o model: input cost = $5 per 1M tokens, output cost = $15 per 1M tokens
45
- supported_models.update({'gpt-4o-mini' : (0.15, 0.6)}) # gpt-4o-mini model: input cost = $0.15 per 1M tokens, output cost = $0.6 per 1M tokens
46
-
47
- return supported_models
48
-
49
 
50
 
 
2
  from dotenv import load_dotenv
3
  from typing import Optional
4
  from langchain_openai import ChatOpenAI
 
5
 
 
6
 
7
+ # import inspect
8
+ # load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
9
+
10
+ load_dotenv()
11
  class MyChatOpenAI:
12
  @classmethod
13
  def from_model(
 
19
  max_tokens: Optional[int] = 4096,
20
  max_retries: int = 1,
21
  **kwargs
22
+ )-> ChatOpenAI:
23
+
 
24
  if model in ['gpt-4o', 'GPT-4o', 'GPT-4O', 'gpt-4O', 'gpt4o', 'GPT4o', 'GPT4O', 'gpt4O']:
25
  model = 'gpt-4o'
26
  elif model in ['gpt-4o-mini', 'GPT-4o-mini', 'GPT-4O-mini', 'gpt-4O-mini', 'gpt4o-mini', 'GPT4o-mini', 'GPT4O-mini', 'gpt4O-mini', 'gpt4omini', 'GPT4omini', 'GPT4Omini', 'gpt4Omini']:
 
39
  )
40
 
41
 
 
 
 
 
 
 
 
 
42
 
43
 
{aimakerspace/langchain_wrappers β†’ langchain_wrappers}/langchain_embedding_models.py RENAMED
@@ -2,10 +2,11 @@ import os
2
  from dotenv import load_dotenv
3
  from typing import Optional
4
  from langchain_openai.embeddings import OpenAIEmbeddings
5
- import inspect
6
 
7
- load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
 
8
 
 
9
  class MyOpenAIEmbeddings:
10
  @classmethod
11
  def from_model(
@@ -36,13 +37,6 @@ class MyOpenAIEmbeddings:
36
  )
37
 
38
 
39
- @classmethod
40
- def get_model_price(cls)-> dict:
41
- # Dictionary to store the cost of input and output tokens for each model
42
- supported_models = {'text-embedding-3-small' : 0.02} # text-embedding-3-small model: $0.02 per 1M tokens
43
- supported_models.update({'text-embedding-3-large' : 0.13}) # text-embedding-3-large model: $0.13 per 1M tokens
44
-
45
- return supported_models
46
 
47
 
48
 
 
2
  from dotenv import load_dotenv
3
  from typing import Optional
4
  from langchain_openai.embeddings import OpenAIEmbeddings
 
5
 
6
+ # import inspect
7
+ # load_dotenv(os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) , '.env'))
8
 
9
+ load_dotenv()
10
  class MyOpenAIEmbeddings:
11
  @classmethod
12
  def from_model(
 
37
  )
38
 
39
 
 
 
 
 
 
 
 
40
 
41
 
42
 
requirements.txt CHANGED
@@ -1,3 +1,10 @@
1
  numpy
2
  chainlit==0.7.700
3
- openai
 
 
 
 
 
 
 
 
1
  numpy
2
  chainlit==0.7.700
3
+ openai==1.42.0
4
+ langchain==0.2.14
5
+ langchain-core==0.2.34
6
+ langchain-community==0.2.12
7
+ langchain-openai==0.1.22
8
+ langchain-qdrant==0.1.3
9
+ qdrant-client==1.11.0
10
+ pypdf==4.3.1