Spaces:

Sk4467
/

fastapiapp

Sleeping

App Files Files Community

Sk4467 commited on Mar 17

Commit

d0fbfa7

•

1 Parent(s): 4f9bf7e

added application

Browse files

Files changed (15) hide show

.gitignore +1 -0
Dockerfile +14 -0
__pycache__/RAG.cpython-310.pyc +0 -0
__pycache__/app.cpython-310.pyc +0 -0
__pycache__/app.cpython-39.pyc +0 -0
__pycache__/file_processing.cpython-310.pyc +0 -0
__pycache__/file_processing.cpython-39.pyc +0 -0
__pycache__/main.cpython-310.pyc +0 -0
__pycache__/query_processing.cpython-310.pyc +0 -0
__pycache__/query_processing.cpython-39.pyc +0 -0
app.py +54 -0
file_processing.py +105 -0
main.py +90 -0
query_processing.py +29 -0
requirements.txt +42 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .vercel

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+# Use the official Python 3.10.9 image
+FROM python:3.10.9
+# Copy the current directory contents into the container at .
+COPY . .
+# Set the working directory to /
+WORKDIR /
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /requirements.txt
+# Start the FastAPI app on port 7860, the default port expected by Spaces
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

__pycache__/RAG.cpython-310.pyc ADDED Viewed

Binary file (3.26 kB). View file

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (2.14 kB). View file

__pycache__/app.cpython-39.pyc ADDED Viewed

Binary file (1.15 kB). View file

__pycache__/file_processing.cpython-310.pyc ADDED Viewed

Binary file (3.34 kB). View file

__pycache__/file_processing.cpython-39.pyc ADDED Viewed

Binary file (1.69 kB). View file

__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (1.47 kB). View file

__pycache__/query_processing.cpython-310.pyc ADDED Viewed

Binary file (1.18 kB). View file

__pycache__/query_processing.cpython-39.pyc ADDED Viewed

Binary file (995 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from fastapi import FastAPI, File, UploadFile, Form
+from file_processing import load_documents, chunk_documents, create_embeddings
+from query_processing import load_qa_chain, process_query
+from dotenv import load_dotenv
+import os
+load_dotenv(r'C:\Users\sksha\Desktop\llm-assignment-master\llm-assignment-master\llm-assignment-master_\backend\.env')
+openai_api_key = os.environ.get('OPENAI_API_KEY')
+print(openai_api_key)
+app = FastAPI()
+from fastapi.middleware.cors import CORSMiddleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:3000"],  # Allows only requests from your React app
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+@app.post("/process-file")
+async def process_file(collection_name: str = Form(...), file: UploadFile = File(...)):
+    print("Received collection_name:", collection_name)
+    print("Received file:", file.filename)
+    # Load documents
+    documents = await load_documents(file)
+    # Chunk documents
+    chunked_docs = chunk_documents(documents, chunk_size=500, chunk_overlap=100)
+    # Create embeddings and store in Chroma
+    vector_store = create_embeddings(chunked_docs, collection_name)
+    preview_length = 750  # Adjust based on desired preview size
+    document_previews = [doc.page_content[:preview_length] for doc in documents]  # or whatever attribute holds the content
+    # Return the success message along with the document previews
+    return {"message": "File processed successfully", "document_preview": document_previews}
+from pydantic import BaseModel
+class QueryRequest(BaseModel):
+    collection_name: str
+    query: str
+@app.post("/query")
+async def query(request: QueryRequest):
+    # Load the RetrievalQA chain
+    print(request.dict())
+    qa_chain = load_qa_chain(request.collection_name)
+    # Process the query
+    result = process_query(request.query, qa_chain)
+    return {"result": result}

file_processing.py ADDED Viewed

	@@ -0,0 +1,105 @@

+from langchain.document_loaders import PyPDFLoader, PDFMinerLoader, DirectoryLoader
+from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from os.path import join
+import os
+from dotenv import load_dotenv
+load_dotenv(r'C:\Users\sksha\Desktop\llm-assignment-master\llm-assignment-master\llm-assignment-master_\backend\.env')
+openai_api_key = os.environ.get('OPENAI_API_KEY')
+from langchain.document_loaders import TextLoader, PDFMinerLoader, UnstructuredWordDocumentLoader, CSVLoader
+# def load_documents(file_path):
+#     if file_path.endswith('.txt'):
+#         loader = TextLoader(file_path)
+#     elif file_path.endswith('.pdf'):
+#         loader = PyPDFLoader(file_path)
+#     elif file_path.endswith('.doc') or file_path.endswith('.docx'):
+#         loader = UnstructuredWordDocumentLoader(file_path)
+#     elif file_path.endswith('.csv'):
+#         loader = CSVLoader(file_path)
+#     else:
+#         raise ValueError(f"Unsupported file format: {file_path}")
+#     documents = loader.load()
+#     return documents
+from fastapi import UploadFile
+from typing import List
+import fitz  # PyMuPDF
+import pandas as pd
+import docx
+from langchain.docstore.document import Document
+def read_pdf(file_path: str) -> str:
+    doc = fitz.open(file_path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def read_docx(file_path: str) -> str:
+    doc = docx.Document(file_path)
+    fullText = []
+    for para in doc.paragraphs:
+        fullText.append(para.text)
+    return '\n'.join(fullText)
+def read_csv(file_path: str) -> str:
+    df = pd.read_csv(file_path)
+    return df.to_string()
+def read_txt(file_path: str) -> str:
+    with open(file_path, 'r', encoding='utf-8') as file:
+        return file.read()
+async def load_documents(file: UploadFile)->List[Document]:
+    temp_file_path = f"temp_{file.filename}"
+    try:
+        # Save the uploaded file to a temporary file
+        with open(temp_file_path, "wb") as temp_file:
+            temp_file.write(await file.read())
+        content = ""
+        if file.filename.endswith('.pdf'):
+            content = read_pdf(temp_file_path)
+        elif file.filename.endswith('.docx'):
+            content = read_docx(temp_file_path)
+        elif file.filename.endswith('.csv'):
+            content = read_csv(temp_file_path)
+        elif file.filename.endswith('.txt'):
+            content = read_txt(temp_file_path)
+        else:
+            raise ValueError("Unsupported file format")
+    except Exception as e:
+        # Handle general errors - log or adjust as necessary for your application
+        print(f"Error processing document: {e}")
+        content = "Error processing document."
+    finally:
+        # Cleanup: remove the temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+    metadata = {'source': file.filename}
+    document = Document(page_content=content, metadata=metadata)
+    return [document]
+from langchain.text_splitter import CharacterTextSplitter
+def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):
+    text_splitter = CharacterTextSplitter(
+        chunk_size=chunk_size,
+        chunk_overlap=chunk_overlap
+    )
+    chunked_docs = text_splitter.split_documents(documents)
+    return chunked_docs
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+def create_embeddings(chunked_docs, collection_name):
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    vector_store = Chroma.from_documents(chunked_docs, embeddings, collection_name=collection_name)
+    vector_store.persist()
+    return vector_store

main.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# from dotenv import load_dotenv
+# from typing import Any
+# from fastapi import FastAPI, HTTPException
+# from fastapi.middleware.cors import CORSMiddleware
+# from pydantic import BaseModel
+# import RAG
+# # Load environment variables from .env file (if any)
+# load_dotenv()
+# class Response(BaseModel):
+#     result: str | None
+# class UserQuery(BaseModel):
+#     messages: str
+# origins = [
+#     "http://localhost",
+#     "http://localhost:8080",
+#     "http://localhost:3000"
+# ]
+# app = FastAPI()
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=origins,
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# initialize_model()
+# # @app.post("/predict", response_model = Response)
+# # def predict() -> Any:
+# #   #implement this code block
+# #   return {"result": "hello world!"}
+# # @app.get("/hello")
+# # async def hello():
+# #     return 'Hello World'
+# @app.post("/home")
+# def home_route(home: UserQuery):
+#     try:
+#         if not home.messages:
+#             raise HTTPException(status_code=400, detail="Empty value")
+#         # Call the custom function to generate a response using RetrievalQA
+#         answer, generation = generate_response(home.messages)
+#         return {"response": answer, "reasoning": generation}
+#     except Exception as e:
+#         print(f"An error occurred: {e}")
+#         raise HTTPException(status_code=500, detail="Internal Server Error")
+from file_processing import load_documents, chunk_documents, create_embeddings
+from query_processing import load_qa_chain, process_query
+from dotenv import load_dotenv
+import os
+def main():
+    load_dotenv()
+    openai_api_key = os.environ.get('OPENAI_API_KEY')
+    file_path = r'C:\Users\sksha\Desktop\llm-assignment-master\llm-assignment-master\backend\files\Option for Residence Accommodation.pdf'
+    collection_name = 'my_collection'
+    # Load documents
+    documents = load_documents(file_path)
+    # Chunk documents
+    chunked_docs = chunk_documents(documents, chunk_size=500, chunk_overlap=100)
+    # Create embeddings and store in Chroma
+    vector_store = create_embeddings(chunked_docs, collection_name)
+    # Load the RetrievalQA chain
+    qa_chain = load_qa_chain(collection_name)
+    # Process user queries
+    while True:
+        query = input("Enter your query (or 'exit' to quit): ")
+        if query.lower() == 'exit':
+            break
+        result = process_query(query, qa_chain)
+        print(result)
+if __name__ == '__main__':
+    main()

query_processing.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from langchain.embeddings import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain.llms import OpenAI
+from langchain.chains import RetrievalQA
+import os
+from dotenv import load_dotenv
+load_dotenv(r'C:\Users\sksha\Desktop\llm-assignment-master\llm-assignment-master\llm-assignment-master_\backend\.env')
+openai_api_key = os.environ.get('OPENAI_API_KEY')
+def load_qa_chain(collection_name):
+    # Load the vector store from disk
+    vector_store = Chroma(collection_name=collection_name, embedding_function=OpenAIEmbeddings())
+    # Create an instance of OpenAI language model
+    llm = OpenAI(openai_api_key=openai_api_key)
+    retriever = vector_store.as_retriever(search_kwargs={"k": 2})
+    # Create a RetrievalQA chain
+    qa_chain = RetrievalQA.from_chain_type(
+        llm=llm,
+        chain_type="map_reduce",
+        retriever=vector_store.as_retriever()
+    )
+    return qa_chain
+def process_query(query, qa_chain):
+    # Run the query through the RetrievalQA chain
+    result = qa_chain.run(query)
+    return result

requirements.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+# annotated-types==0.6.0
+# anyio==4.3.0
+# click==8.1.7
+# colorama==0.4.6
+# exceptiongroup==1.2.0
+# fastapi==0.110.0
+# h11==0.14.0
+# httptools==0.6.1
+# idna==3.6
+# pydantic==2.6.3
+# pydantic_core==2.16.3
+# pyspark==3.3.1
+# python-dotenv==1.0.1
+# PyYAML==6.0.1
+# sniffio==1.3.1
+# starlette==0.36.3
+# typing_extensions==4.10.0
+# uvicorn==0.28.0
+# watchfiles==0.21.0
+# websockets==12.0
+# langchain
+# sentence-transformers
+# chromadb
+# torch==2.1.0
+# accelerate==0.22.0
+# bitsandbytes
+langchain
+openai
+chromadb
+tiktoken
+PyPDF2
+pypdf
+python-docx
+pandas
+python-dotenv
+fastapi
+uvicorn
+python-multipart
+# chardet
+PyMuPDF>=1.18.19
+python-docx
+pandas