Spaces:

deepakMLKT
/

Biskane-RAG

Sleeping

App Files Files Community

root commited on Jun 13

Commit

b0ccf04

•

1 Parent(s): 08fd05e

files added

Browse files

Files changed (3) hide show

Dockerfile +16 -16
app.py +42 -42
utils.py +90 -90

Dockerfile CHANGED Viewed

@@ -1,16 +1,16 @@
-# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
-FROM python:3.9
-RUN useradd -m -u 1000 user
-WORKDIR /app
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,42 +1,42 @@
-from fastapi import FastAPI, HTTPException
-from pydantic import BaseModel
-from utils import retrive_context, generate_response
-# Initialize FastAPI
-app = FastAPI()
-class QueryRequest(BaseModel):
-    # Asked query should be in string format
-    query: str
-class QueryResponse(BaseModel):
-    # Response should be in string format
-    response: str
-@app.post("/infer", response_model=QueryResponse)
-def infer(query_request: QueryRequest):
-    query = query_request.query
-    context = retrive_context(query)
-    if context == 500:
-        raise HTTPException(status_code=500, detail="Error retrieving context")
-    response = generate_response(query, context)
-    if response == 500:
-        raise HTTPException(status_code=500, detail="Error generating response")
-    return QueryResponse(response=response)
-# Root endpoint for testing
-@app.get("/")
-def read_root():
-    return {"message": "Inference API is running"}
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from utils import retrive_context, generate_response
+# Initialize FastAPI
+app = FastAPI()
+class QueryRequest(BaseModel):
+    # Asked query should be in string format
+    query: str
+class QueryResponse(BaseModel):
+    # Response should be in string format
+    response: str
+@app.post("/infer", response_model=QueryResponse)
+def infer(query_request: QueryRequest):
+    query = query_request.query
+    context = retrive_context(query)
+    if context == 500:
+        raise HTTPException(status_code=500, detail="Error retrieving context")
+    response = generate_response(query, context)
+    if response == 500:
+        raise HTTPException(status_code=500, detail="Error generating response")
+    return QueryResponse(response=response)
+# Root endpoint for testing
+@app.get("/")
+def read_root():
+    return {"message": "Inference API is running"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000, log_level="info")

utils.py CHANGED Viewed

@@ -1,90 +1,90 @@
-# Required modules
-import os
-from pinecone import Pinecone
-from transformers import AutoModel
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_groq import ChatGroq
-from dotenv import load_dotenv
-load_dotenv()
-# Initialize clients, indexes, models etc.
-pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
-pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
-embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
-groq_llm=ChatGroq(
-    groq_api_key=os.getenv("GROQ_API_KEY"),
-    model_name="Llama3-8b-8192"
-)
-#context retrivel
-def retrive_context(user_query:str) -> str:
-    """Retrives the context for asked query from vector database
-    Args:
-        user_query (str): Questions asked by user to bot
-    Returns:
-        context (str): Question's context
-    """
-    context = ""
-    try:
-        embedded_query = embedding_model.encode(user_query).tolist()
-    except Exception as e:
-        return 500
-    try:
-        res = pc_index.query(
-            vector=embedded_query,
-            top_k=5,
-            include_values=True,
-            include_metadata = True
-        )
-    except Exception as e:
-        return 500
-    for match in res['matches']:
-        context = context + match['metadata']['text'] + " "
-    print(context)
-    return context
-# Prompt Engineering for LLM
-prompt = ChatPromptTemplate.from_template(
-    """
-    Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
-    <context>
-    {context}
-    <context>
-    Question: {query}
-    """
-)
-# Response generator
-def generate_response(query:str, context:str) -> str:
-    """Generates the response for asked question from given context
-    Args:
-        query (str): Query asked by user to bot
-        context (str): Context, retrived from vector database
-    Returns:
-        answer (str): Generated response
-    """
-    try:
-        chain = prompt | groq_llm
-        llm_response = chain.invoke({
-            "context": context,
-            "query": query
-        })
-        return llm_response.content
-    except Exception as e:
-        return 500

+# Required modules
+import os
+from pinecone import Pinecone
+from transformers import AutoModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_groq import ChatGroq
+from dotenv import load_dotenv
+load_dotenv()
+# Initialize clients, indexes, models etc.
+pc_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
+pc_index = pc_client.Index(os.getenv("PINECONE_INDEX"))
+embedding_model = AutoModel.from_pretrained('jinaai/jina-embeddings-v2-base-en', trust_remote_code=True)
+groq_llm=ChatGroq(
+    groq_api_key=os.getenv("GROQ_API_KEY"),
+    model_name="Llama3-8b-8192"
+)
+#context retrivel
+def retrive_context(user_query:str) -> str:
+    """Retrives the context for asked query from vector database
+    Args:
+        user_query (str): Questions asked by user to bot
+    Returns:
+        context (str): Question's context
+    """
+    context = ""
+    try:
+        embedded_query = embedding_model.encode(user_query).tolist()
+    except Exception as e:
+        return 500
+    try:
+        res = pc_index.query(
+            vector=embedded_query,
+            top_k=5,
+            include_values=True,
+            include_metadata = True
+        )
+    except Exception as e:
+        return 500
+    for match in res['matches']:
+        context = context + match['metadata']['text'] + " "
+    print(context)
+    return context
+# Prompt Engineering for LLM
+prompt = ChatPromptTemplate.from_template(
+    """
+    Hello! As a RAG agent for Biskane, your task is to answer the user's question using the provided context. Please keep your responses brief and straightforward.
+    <context>
+    {context}
+    <context>
+    Question: {query}
+    """
+)
+# Response generator
+def generate_response(query:str, context:str) -> str:
+    """Generates the response for asked question from given context
+    Args:
+        query (str): Query asked by user to bot
+        context (str): Context, retrived from vector database
+    Returns:
+        answer (str): Generated response
+    """
+    try:
+        chain = prompt | groq_llm
+        llm_response = chain.invoke({
+            "context": context,
+            "query": query
+        })
+        return llm_response.content
+    except Exception as e:
+        return 500