DentalGPT / aifunc.py
anen's picture
Upload 3 files
de1df0d
import os
import keyboard
import time
import requests
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
#from langchain.vectorstores.weaviate import Weaviate
from langchain.document_loaders import TextLoader #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
from langchain.document_loaders import UnstructuredPDFLoader #load pdf
from langchain.indexes import VectorstoreIndexCreator #vectorize db index with chromadb
from langchain.chains import RetrievalQA
from langchain.document_loaders import UnstructuredURLLoader #load urls into docoument-loader
import requests
import textwrap
from langchain.document_loaders import TextLoader
loader = TextLoader('./KS-all-info_rev1.txt')
documents = loader.load()
def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
lines = text.split('\n')
# Wrap each line individually
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
# Join the wrapped lines back together using newline characters
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
text_splitter = CharacterTextSplitter(chunk_size=3000, chunk_overlap=10)
docs = text_splitter.split_documents(documents)
# Embeddings
embeddings = HuggingFaceEmbeddings()
#Create the vectorized db
# Vectorstore: https://python.langchain.com/en/latest/modules/indexes/vectorstores.html
db = FAISS.from_documents(docs, embeddings)
llm=HuggingFaceHub(repo_id="MBZUAI/LaMini-Flan-T5-783M", model_kwargs={"temperature":0, "max_length":512})
chain = load_qa_chain(llm, chain_type="stuff")
def run_chain(query):
result=chain.run(input_documents=docs, question=query)
return result
#keyboard.unhook_all()###########################