Spaces:

2001muhammadumair
/

Physical_Pharmacy_Bot

Runtime error

App Files Files Community

2001muhammadumair commited on 28 days ago

Commit

2da8c28

•

1 Parent(s): 8e2a8b6

Create app.py

Browse files

Files changed (1) hide show

app.py +252 -0

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import os
+import faiss
+import numpy as np
+import fitz  # PyMuPDF for PDF processing
+from sentence_transformers import SentenceTransformer
+from groq import Groq
+import gradio as gr
+import logging
+import pickle
+# Initialize logging to track events and errors
+logging.basicConfig(filename='query_logs.log', level=logging.INFO)
+# Securely load GROQ API key from environment variables
+grog_api_key = "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"
+if not grog_api_key:
+    raise ValueError("GROQ_API_KEY environment variable not set.")
+client = Groq(api_key=grog_api_key)
+# Path to the PDF file containing pharmaceutical content
+book_path = '/content/martins-physical-pharmacy-6th-ed-2011-dr-murtadha-alshareifi.pdf'
+# Function to read and extract text from the PDF
+def read_pdf(file_path):
+    try:
+        doc = fitz.open(file_path)
+        text_data = []
+        for page_num in range(doc.page_count):
+            page = doc.load_page(page_num)
+            text = page.get_text("text")
+            text_data.append(text)
+        return text_data
+    except Exception as e:
+        logging.error(f"Error reading PDF: {str(e)}")
+        return []
+# Function to split text into paragraphs
+def split_text_into_paragraphs(text_pages, max_tokens=300):
+    chunks = []
+    for page in text_pages:
+        paragraphs = page.split('\n\n')
+        chunk = ""
+        for para in paragraphs:
+            if len(chunk) + len(para) <= max_tokens:
+                chunk += para + "\n"
+            else:
+                chunks.append(chunk.strip())
+                chunk = para + "\n"
+        if chunk:
+            chunks.append(chunk.strip())
+    return chunks
+# Function to vectorize text chunks and create a FAISS index
+def vectorize_text(chunks, batch_size=100, save_path="embeddings.pkl"):
+    if os.path.exists(save_path):
+        with open(save_path, "rb") as f:
+            index = pickle.load(f)
+        return index, chunks
+    try:
+        model = SentenceTransformer('all-MiniLM-L6-v2')
+        embeddings = []
+        index = faiss.IndexFlatL2(384)
+        for i in range(0, len(chunks), batch_size):
+            chunk_batch = chunks[i:i + batch_size]
+            batch_embeddings = model.encode(chunk_batch, show_progress_bar=True)
+            embeddings.append(batch_embeddings)
+            index.add(np.array(batch_embeddings))
+        with open(save_path, "wb") as f:
+            pickle.dump(index, f)
+        return index, chunks
+    except Exception as e:
+        logging.error(f"Error during vectorization: {str(e)}")
+        return None, None
+# Load and vectorize PDF content
+text_pages = read_pdf(book_path)
+if not text_pages:
+    raise RuntimeError("Failed to read PDF content. Check logs for details.")
+chunks = split_text_into_paragraphs(text_pages)
+vector_index, chunks = vectorize_text(chunks)
+if vector_index is None or chunks is None:
+    raise RuntimeError("Vectorization failed. Check logs for details.")
+# Function to generate query embeddings
+def generate_query_embedding(query, model):
+    return model.encode([query])
+# Function to check relevancy based on distance threshold
+def check_relevancy(distances, threshold=1):
+    return distances[0][0] <= threshold
+# System prompt defining the chatbot's attributes and response structure
+system_prompt = """
+You are **PharmaExpert Pro**, an advanced chatbot specialized in the field of pharmaceutical sciences. Your responses should be structured, concise, and informative, making complex topics accessible.
+# Response Structure:
+1. **Overview**: Start with a brief context to set the user’s expectations.
+2. **Definition**: Clearly define the concept being queried.
+3. **In-Depth Analysis**: Provide a detailed breakdown of concepts, including:
+   - Examples
+   - Relevant formulas (if applicable)
+   - Learning processes
+   - Working mechanisms
+   - Purpose
+   - Advantages and disadvantages
+   - Role in the broader topic
+4. **Summary**: Conclude with a short summary of essential takeaways, ensuring clarity and retention.
+# Communication Style:
+- **Professional yet Accessible**: Keep language rigorous yet clear.
+- **Concise and Informative**: Avoid excess details while covering the core information.
+- **Encouraging Exploration**: Foster an environment for follow-up questions.
+# Unique Qualities:
+1. **Source-Specific Expertise**: Refer only to the provided PDF.
+2. **Educational Tools**: Use summaries and key points.
+3. **Adaptability**: Adjust responses based on the user’s expertise level.
+"""
+# Function to generate a single, comprehensive answer
+def generate_answer(query):
+    model = SentenceTransformer('all-MiniLM-L6-v2')
+    query_embedding = generate_query_embedding(query, model)
+    D, I = vector_index.search(np.array(query_embedding), k=5)
+    if check_relevancy(D):
+        relevant_chunks = [chunks[i] for i in I[0]]
+        combined_text = " ".join(relevant_chunks)
+        user_prompt = f"The user has inquired about a complex pharmaceutical topic. Query: {query}"
+        assistant_prompt = f"""
+Using the following context from the pharmacy PDF, respond with structured detail. **Avoid external citations in your answer.**
+**Context:**
+{combined_text}
+**User's question:**
+{query}
+**Response Structure:**
+- **Concept Overview**
+- **Contextual Relevance**
+- **Overview of the Concept**
+- **Definition**
+- **Foundations**
+- **Examples** (including relevant case studies)
+- **Formulas** (if available)
+- **Key Terms and Definitions**
+- **Key Vocabulary**
+- **Historical Context**
+- **Applications and Practical Uses**
+- **Step-by-Step Explanation** of processes or calculations
+- **Visual Aids** (suggestions for diagrams or graphs)
+- **Visual Aids Explanation**
+- **Purpose and Significance**
+- **Common Misconceptions**
+- **Key Challenges and Controversies** in the field
+- **Practical Exercises**
+- **Comparative Analysis**
+- **Future Implications**
+- **Future Directions** or potential advancements
+- **Cultural Context**
+- **Fun Activities**
+- **Quiz Questions** 7 quiz
+- **Step-by-Step Guide**
+- **Interactive Elements**
+- **Summative Table** for quick reference
+- **Summative Review**
+- **Final Summary**
+- **Summary**
+"""
+# **Response Structure:**
+# - **Overview of the concept**
+# - **Definition**
+# - **Examples** (including relevant case studies)
+# - **Formulas** (if available)
+# - **Key Terms and Definitions**
+# - **Historical Context**
+# - **Applications and Practical Uses**
+# - **Step-by-Step Explanation** of processes or calculations
+# - **Visual Aids** (suggestions for diagrams or graphs)
+# - **Purpose and significance**
+# - **Common Misconceptions**
+# - **Key Challenges and Controversies** in the field
+# - **Future Directions** or potential advancements
+# - **Summative Table** for quick reference
+# - **Final Summary**
+# """
+        prompt = system_prompt + "\n\n" + user_prompt + "\n\n" + assistant_prompt
+        response = client.chat.completions.create(
+            messages=[{"role": "user", "content": prompt}],
+            model="llama3-8b-8192",
+            temperature=0.7,
+            top_p=0.9,
+        )
+        answer = response.choices[0].message.content.strip()
+        return answer
+    else:
+        fallback_prompt = f"The user's question is outside the scope of the PDF content. Provide a general answer without referencing external sources."
+        fallback_response = client.chat.completions.create(
+            messages=[{"role": "user", "content": fallback_prompt}],
+            model="llama3-8b-8192",
+            temperature=0.7,
+            top_p=0.9
+        )
+        return fallback_response.choices[0].message.content.strip()
+# Gradio app interface function
+def gradio_interface(user_query):
+    if user_query.strip() == "":
+        welcome_message = "Welcome to **Physical Pharmacy Book**! Ask me anything related to pharmaceutical sciences."
+        return welcome_message
+    response = generate_answer(user_query)
+    return response
+# Gradio interface setup
+with gr.Blocks(css=".footer {display: none;}") as iface:
+    gr.Markdown(
+        """
+        <h1 style='text-align: center; color: #4CAF50;'>PharmaExpert Pro</h1>
+        <p style='text-align: center; font-size: 18px; color: #333;'>
+        Your advanced chatbot for pharmaceutical sciences expertise!
+        </p>
+        """,
+        elem_id="header"
+    )
+    chatbot = gr.Chatbot(type="messages", elem_id="chatbot")
+    msg = gr.Textbox(label="Enter your query", placeholder="Type your question here...", lines=2, max_lines=5)
+    submit_btn = gr.Button("Submit", elem_id="submit-btn")
+    def respond(message, chat_history):
+        chat_history.append({"role": "user", "content": message})
+        response = generate_answer(message)
+        chat_history.append({"role": "assistant", "content": response})
+        return "", chat_history
+    msg.submit(respond, [msg, chatbot], [msg, chatbot])
+    submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
+# Launch the Gradio app
+if __name__ == "__main__":
+    iface.launch()