Spaces:

2001muhammadumair
/

Physical_Pharmacy_Bot

Runtime error

File size: 9,010 Bytes

import os
import faiss
import numpy as np
import fitz  # PyMuPDF for PDF processing
from sentence_transformers import SentenceTransformer
from groq import Groq
import gradio as gr
import logging
import pickle

# Initialize logging to track events and errors
logging.basicConfig(filename='query_logs.log', level=logging.INFO)

# Securely load GROQ API key from environment variables
grog_api_key = "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"
if not grog_api_key:
    raise ValueError("GROQ_API_KEY environment variable not set.")
client = Groq(api_key=grog_api_key)

# Path to the PDF file containing pharmaceutical content
book_path = 'martins-physical-pharmacy-6th-ed-2011-dr-murtadha-alshareifi.pdf'

# Function to read and extract text from the PDF
def read_pdf(file_path):
    try:
        doc = fitz.open(file_path)
        text_data = []
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text = page.get_text("text")
            text_data.append(text)
        return text_data
    except Exception as e:
        logging.error(f"Error reading PDF: {str(e)}")
        return []

# Function to split text into paragraphs
def split_text_into_paragraphs(text_pages, max_tokens=300):
    chunks = []
    for page in text_pages:
        paragraphs = page.split('\n\n')
        chunk = ""
        for para in paragraphs:
            if len(chunk) + len(para) <= max_tokens:
                chunk += para + "\n"
            else:
                chunks.append(chunk.strip())
                chunk = para + "\n"
        if chunk:
            chunks.append(chunk.strip())
    return chunks

# Function to vectorize text chunks and create a FAISS index
def vectorize_text(chunks, batch_size=100, save_path="embeddings.pkl"):
    if os.path.exists(save_path):
        with open(save_path, "rb") as f:
            index = pickle.load(f)
        return index, chunks
    try:
        model = SentenceTransformer('all-MiniLM-L6-v2')
        embeddings = []
        index = faiss.IndexFlatL2(384)

        for i in range(0, len(chunks), batch_size):
            chunk_batch = chunks[i:i + batch_size]
            batch_embeddings = model.encode(chunk_batch, show_progress_bar=True)
            embeddings.append(batch_embeddings)
            index.add(np.array(batch_embeddings))

        with open(save_path, "wb") as f:
            pickle.dump(index, f)
        return index, chunks
    except Exception as e:
        logging.error(f"Error during vectorization: {str(e)}")
        return None, None

# Load and vectorize PDF content
text_pages = read_pdf(book_path)
if not text_pages:
    raise RuntimeError("Failed to read PDF content. Check logs for details.")

chunks = split_text_into_paragraphs(text_pages)
vector_index, chunks = vectorize_text(chunks)
if vector_index is None or chunks is None:
    raise RuntimeError("Vectorization failed. Check logs for details.")

# Function to generate query embeddings
def generate_query_embedding(query, model):
    return model.encode([query])

# Function to check relevancy based on distance threshold
def check_relevancy(distances, threshold=1):
    return distances[0][0] <= threshold

# System prompt defining the chatbot's attributes and response structure
system_prompt = """
You are **PharmaExpert Pro**, an advanced chatbot specialized in the field of pharmaceutical sciences. Your responses should be structured, concise, and informative, making complex topics accessible.

# Response Structure:
1. **Overview**: Start with a brief context to set the user’s expectations.
2. **Definition**: Clearly define the concept being queried.
3. **In-Depth Analysis**: Provide a detailed breakdown of concepts, including:
   - Examples
   - Relevant formulas (if applicable)
   - Learning processes
   - Working mechanisms
   - Purpose
   - Advantages and disadvantages
   - Role in the broader topic
4. **Summary**: Conclude with a short summary of essential takeaways, ensuring clarity and retention.

# Communication Style:
- **Professional yet Accessible**: Keep language rigorous yet clear.
- **Concise and Informative**: Avoid excess details while covering the core information.
- **Encouraging Exploration**: Foster an environment for follow-up questions.

# Unique Qualities:
1. **Source-Specific Expertise**: Refer only to the provided PDF.
2. **Educational Tools**: Use summaries and key points.
3. **Adaptability**: Adjust responses based on the user’s expertise level.
"""

# Function to generate a single, comprehensive answer
def generate_answer(query):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = generate_query_embedding(query, model)
    D, I = vector_index.search(np.array(query_embedding), k=5)

    if check_relevancy(D):
        relevant_chunks = [chunks[i] for i in I[0]]
        combined_text = " ".join(relevant_chunks)

        user_prompt = f"The user has inquired about a complex pharmaceutical topic. Query: {query}"

        assistant_prompt = f"""
Using the following context from the pharmacy PDF, respond with structured detail. **Avoid external citations in your answer.**

**Context:**
{combined_text}

**User's question:**
{query}

**Response Structure:**

- **Concept Overview**
- **Contextual Relevance**
- **Overview of the Concept**
- **Definition**
- **Foundations**
- **Examples** (including relevant case studies)
- **Formulas** (if available)
- **Key Terms and Definitions**
- **Key Vocabulary**
- **Historical Context**
- **Applications and Practical Uses**
- **Step-by-Step Explanation** of processes or calculations
- **Visual Aids** (suggestions for diagrams or graphs)
- **Visual Aids Explanation**
- **Purpose and Significance**
- **Common Misconceptions**
- **Key Challenges and Controversies** in the field
- **Practical Exercises**
- **Comparative Analysis**
- **Future Implications**
- **Future Directions** or potential advancements
- **Cultural Context**
- **Fun Activities**
- **Quiz Questions** 7 quiz 
- **Step-by-Step Guide**
- **Interactive Elements**
- **Summative Table** for quick reference
- **Summative Review**
- **Final Summary**
- **Summary**
"""

# **Response Structure:**
# - **Overview of the concept**
# - **Definition**
# - **Examples** (including relevant case studies)
# - **Formulas** (if available)
# - **Key Terms and Definitions**
# - **Historical Context**
# - **Applications and Practical Uses**
# - **Step-by-Step Explanation** of processes or calculations
# - **Visual Aids** (suggestions for diagrams or graphs)
# - **Purpose and significance**
# - **Common Misconceptions**
# - **Key Challenges and Controversies** in the field
# - **Future Directions** or potential advancements
# - **Summative Table** for quick reference
# - **Final Summary**
        #''
# """


        prompt = system_prompt + "\n\n" + user_prompt + "\n\n" + assistant_prompt

        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-8b-8192",
            temperature=0.7,
            top_p=0.9, 
           
        )
        answer = response.choices[0].message.content.strip()
        return answer
    else:
        fallback_prompt = f"The user's question is outside the scope of the PDF content. Provide a general answer without referencing external sources."
        fallback_response = client.chat.completions.create(
            messages=[{"role": "user", "content": fallback_prompt}],
            model="llama3-8b-8192",
            temperature=0.7,
            top_p=0.9
        )
        return fallback_response.choices[0].message.content.strip()

# Gradio app interface function
def gradio_interface(user_query):
    if user_query.strip() == "":
        welcome_message = "Welcome to **Physical Pharmacy Book**! Ask me anything related to pharmaceutical sciences."
        return welcome_message
    response = generate_answer(user_query)
    return response

# Gradio interface setup
with gr.Blocks(css=".footer {display: none;}") as iface:
    gr.Markdown(
        """
        <h1 style='text-align: center; color: #4CAF50;'>PharmaExpert Pro</h1>
        <p style='text-align: center; font-size: 18px; color: #333;'>
        Your advanced chatbot for pharmaceutical sciences expertise!
        </p>
        """, 
        elem_id="header"
    )
    chatbot = gr.Chatbot(type="messages", elem_id="chatbot")
    msg = gr.Textbox(label="Enter your query", placeholder="Type your question here...", lines=2, max_lines=5)
    submit_btn = gr.Button("Submit", elem_id="submit-btn")

    def respond(message, chat_history):
        chat_history.append({"role": "user", "content": message})
        response = generate_answer(message)
        chat_history.append({"role": "assistant", "content": response})
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot], [msg, chatbot])

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()