Spaces:
Runtime error
Runtime error
import os | |
import faiss | |
import numpy as np | |
import fitz # PyMuPDF for PDF processing | |
from sentence_transformers import SentenceTransformer | |
from groq import Groq | |
import gradio as gr | |
import logging | |
import pickle | |
# Initialize logging to track events and errors | |
logging.basicConfig(filename='query_logs.log', level=logging.INFO) | |
# Securely load GROQ API key from environment variables | |
grog_api_key = "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" | |
if not grog_api_key: | |
raise ValueError("GROQ_API_KEY environment variable not set.") | |
client = Groq(api_key=grog_api_key) | |
# Path to the PDF file containing pharmaceutical content | |
book_path = 'martins-physical-pharmacy-6th-ed-2011-dr-murtadha-alshareifi.pdf' | |
# Function to read and extract text from the PDF | |
def read_pdf(file_path): | |
try: | |
doc = fitz.open(file_path) | |
text_data = [] | |
for page_num in range(doc.page_count): | |
page = doc.load_page(page_num) | |
text = page.get_text("text") | |
text_data.append(text) | |
return text_data | |
except Exception as e: | |
logging.error(f"Error reading PDF: {str(e)}") | |
return [] | |
# Function to split text into paragraphs | |
def split_text_into_paragraphs(text_pages, max_tokens=300): | |
chunks = [] | |
for page in text_pages: | |
paragraphs = page.split('\n\n') | |
chunk = "" | |
for para in paragraphs: | |
if len(chunk) + len(para) <= max_tokens: | |
chunk += para + "\n" | |
else: | |
chunks.append(chunk.strip()) | |
chunk = para + "\n" | |
if chunk: | |
chunks.append(chunk.strip()) | |
return chunks | |
# Function to vectorize text chunks and create a FAISS index | |
def vectorize_text(chunks, batch_size=100, save_path="embeddings.pkl"): | |
if os.path.exists(save_path): | |
with open(save_path, "rb") as f: | |
index = pickle.load(f) | |
return index, chunks | |
try: | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
embeddings = [] | |
index = faiss.IndexFlatL2(384) | |
for i in range(0, len(chunks), batch_size): | |
chunk_batch = chunks[i:i + batch_size] | |
batch_embeddings = model.encode(chunk_batch, show_progress_bar=True) | |
embeddings.append(batch_embeddings) | |
index.add(np.array(batch_embeddings)) | |
with open(save_path, "wb") as f: | |
pickle.dump(index, f) | |
return index, chunks | |
except Exception as e: | |
logging.error(f"Error during vectorization: {str(e)}") | |
return None, None | |
# Load and vectorize PDF content | |
text_pages = read_pdf(book_path) | |
if not text_pages: | |
raise RuntimeError("Failed to read PDF content. Check logs for details.") | |
chunks = split_text_into_paragraphs(text_pages) | |
vector_index, chunks = vectorize_text(chunks) | |
if vector_index is None or chunks is None: | |
raise RuntimeError("Vectorization failed. Check logs for details.") | |
# Function to generate query embeddings | |
def generate_query_embedding(query, model): | |
return model.encode([query]) | |
# Function to check relevancy based on distance threshold | |
def check_relevancy(distances, threshold=1): | |
return distances[0][0] <= threshold | |
# System prompt defining the chatbot's attributes and response structure | |
system_prompt = """ | |
You are **PharmaExpert Pro**, an advanced chatbot specialized in the field of pharmaceutical sciences. Your responses should be structured, concise, and informative, making complex topics accessible. | |
# Response Structure: | |
1. **Overview**: Start with a brief context to set the user’s expectations. | |
2. **Definition**: Clearly define the concept being queried. | |
3. **In-Depth Analysis**: Provide a detailed breakdown of concepts, including: | |
- Examples | |
- Relevant formulas (if applicable) | |
- Learning processes | |
- Working mechanisms | |
- Purpose | |
- Advantages and disadvantages | |
- Role in the broader topic | |
4. **Summary**: Conclude with a short summary of essential takeaways, ensuring clarity and retention. | |
# Communication Style: | |
- **Professional yet Accessible**: Keep language rigorous yet clear. | |
- **Concise and Informative**: Avoid excess details while covering the core information. | |
- **Encouraging Exploration**: Foster an environment for follow-up questions. | |
# Unique Qualities: | |
1. **Source-Specific Expertise**: Refer only to the provided PDF. | |
2. **Educational Tools**: Use summaries and key points. | |
3. **Adaptability**: Adjust responses based on the user’s expertise level. | |
""" | |
# Function to generate a single, comprehensive answer | |
def generate_answer(query): | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
query_embedding = generate_query_embedding(query, model) | |
D, I = vector_index.search(np.array(query_embedding), k=5) | |
if check_relevancy(D): | |
relevant_chunks = [chunks[i] for i in I[0]] | |
combined_text = " ".join(relevant_chunks) | |
user_prompt = f"The user has inquired about a complex pharmaceutical topic. Query: {query}" | |
assistant_prompt = f""" | |
Using the following context from the pharmacy PDF, respond with structured detail. **Avoid external citations in your answer.** | |
**Context:** | |
{combined_text} | |
**User's question:** | |
{query} | |
**Response Structure:** | |
- **Concept Overview** | |
- **Contextual Relevance** | |
- **Overview of the Concept** | |
- **Definition** | |
- **Foundations** | |
- **Examples** (including relevant case studies) | |
- **Formulas** (if available) | |
- **Key Terms and Definitions** | |
- **Key Vocabulary** | |
- **Historical Context** | |
- **Applications and Practical Uses** | |
- **Step-by-Step Explanation** of processes or calculations | |
- **Visual Aids** (suggestions for diagrams or graphs) | |
- **Visual Aids Explanation** | |
- **Purpose and Significance** | |
- **Common Misconceptions** | |
- **Key Challenges and Controversies** in the field | |
- **Practical Exercises** | |
- **Comparative Analysis** | |
- **Future Implications** | |
- **Future Directions** or potential advancements | |
- **Cultural Context** | |
- **Fun Activities** | |
- **Quiz Questions** 7 quiz | |
- **Step-by-Step Guide** | |
- **Interactive Elements** | |
- **Summative Table** for quick reference | |
- **Summative Review** | |
- **Final Summary** | |
- **Summary** | |
""" | |
# **Response Structure:** | |
# - **Overview of the concept** | |
# - **Definition** | |
# - **Examples** (including relevant case studies) | |
# - **Formulas** (if available) | |
# - **Key Terms and Definitions** | |
# - **Historical Context** | |
# - **Applications and Practical Uses** | |
# - **Step-by-Step Explanation** of processes or calculations | |
# - **Visual Aids** (suggestions for diagrams or graphs) | |
# - **Purpose and significance** | |
# - **Common Misconceptions** | |
# - **Key Challenges and Controversies** in the field | |
# - **Future Directions** or potential advancements | |
# - **Summative Table** for quick reference | |
# - **Final Summary** | |
#'' | |
# """ | |
prompt = system_prompt + "\n\n" + user_prompt + "\n\n" + assistant_prompt | |
response = client.chat.completions.create( | |
messages=[{"role": "user", "content": prompt}], | |
model="llama3-8b-8192", | |
temperature=0.7, | |
top_p=0.9, | |
) | |
answer = response.choices[0].message.content.strip() | |
return answer | |
else: | |
fallback_prompt = f"The user's question is outside the scope of the PDF content. Provide a general answer without referencing external sources." | |
fallback_response = client.chat.completions.create( | |
messages=[{"role": "user", "content": fallback_prompt}], | |
model="llama3-8b-8192", | |
temperature=0.7, | |
top_p=0.9 | |
) | |
return fallback_response.choices[0].message.content.strip() | |
# Gradio app interface function | |
def gradio_interface(user_query): | |
if user_query.strip() == "": | |
welcome_message = "Welcome to **Physical Pharmacy Book**! Ask me anything related to pharmaceutical sciences." | |
return welcome_message | |
response = generate_answer(user_query) | |
return response | |
# Gradio interface setup | |
with gr.Blocks(css=".footer {display: none;}") as iface: | |
gr.Markdown( | |
""" | |
<h1 style='text-align: center; color: #4CAF50;'>PharmaExpert Pro</h1> | |
<p style='text-align: center; font-size: 18px; color: #333;'> | |
Your advanced chatbot for pharmaceutical sciences expertise! | |
</p> | |
""", | |
elem_id="header" | |
) | |
chatbot = gr.Chatbot(type="messages", elem_id="chatbot") | |
msg = gr.Textbox(label="Enter your query", placeholder="Type your question here...", lines=2, max_lines=5) | |
submit_btn = gr.Button("Submit", elem_id="submit-btn") | |
def respond(message, chat_history): | |
chat_history.append({"role": "user", "content": message}) | |
response = generate_answer(message) | |
chat_history.append({"role": "assistant", "content": response}) | |
return "", chat_history | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
submit_btn.click(respond, [msg, chatbot], [msg, chatbot]) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
iface.launch() | |