File size: 9,010 Bytes
2da8c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1aef6a5
2da8c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61fb910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12df686
61fb910
2da8c28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
import os
import faiss
import numpy as np
import fitz  # PyMuPDF for PDF processing
from sentence_transformers import SentenceTransformer
from groq import Groq
import gradio as gr
import logging
import pickle

# Initialize logging to track events and errors
logging.basicConfig(filename='query_logs.log', level=logging.INFO)

# Securely load GROQ API key from environment variables
grog_api_key = "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"
if not grog_api_key:
    raise ValueError("GROQ_API_KEY environment variable not set.")
client = Groq(api_key=grog_api_key)

# Path to the PDF file containing pharmaceutical content
book_path = 'martins-physical-pharmacy-6th-ed-2011-dr-murtadha-alshareifi.pdf'

# Function to read and extract text from the PDF
def read_pdf(file_path):
    try:
        doc = fitz.open(file_path)
        text_data = []
        for page_num in range(doc.page_count):
            page = doc.load_page(page_num)
            text = page.get_text("text")
            text_data.append(text)
        return text_data
    except Exception as e:
        logging.error(f"Error reading PDF: {str(e)}")
        return []

# Function to split text into paragraphs
def split_text_into_paragraphs(text_pages, max_tokens=300):
    chunks = []
    for page in text_pages:
        paragraphs = page.split('\n\n')
        chunk = ""
        for para in paragraphs:
            if len(chunk) + len(para) <= max_tokens:
                chunk += para + "\n"
            else:
                chunks.append(chunk.strip())
                chunk = para + "\n"
        if chunk:
            chunks.append(chunk.strip())
    return chunks

# Function to vectorize text chunks and create a FAISS index
def vectorize_text(chunks, batch_size=100, save_path="embeddings.pkl"):
    if os.path.exists(save_path):
        with open(save_path, "rb") as f:
            index = pickle.load(f)
        return index, chunks
    try:
        model = SentenceTransformer('all-MiniLM-L6-v2')
        embeddings = []
        index = faiss.IndexFlatL2(384)

        for i in range(0, len(chunks), batch_size):
            chunk_batch = chunks[i:i + batch_size]
            batch_embeddings = model.encode(chunk_batch, show_progress_bar=True)
            embeddings.append(batch_embeddings)
            index.add(np.array(batch_embeddings))

        with open(save_path, "wb") as f:
            pickle.dump(index, f)
        return index, chunks
    except Exception as e:
        logging.error(f"Error during vectorization: {str(e)}")
        return None, None

# Load and vectorize PDF content
text_pages = read_pdf(book_path)
if not text_pages:
    raise RuntimeError("Failed to read PDF content. Check logs for details.")

chunks = split_text_into_paragraphs(text_pages)
vector_index, chunks = vectorize_text(chunks)
if vector_index is None or chunks is None:
    raise RuntimeError("Vectorization failed. Check logs for details.")

# Function to generate query embeddings
def generate_query_embedding(query, model):
    return model.encode([query])

# Function to check relevancy based on distance threshold
def check_relevancy(distances, threshold=1):
    return distances[0][0] <= threshold

# System prompt defining the chatbot's attributes and response structure
system_prompt = """
You are **PharmaExpert Pro**, an advanced chatbot specialized in the field of pharmaceutical sciences. Your responses should be structured, concise, and informative, making complex topics accessible.

# Response Structure:
1. **Overview**: Start with a brief context to set the user’s expectations.
2. **Definition**: Clearly define the concept being queried.
3. **In-Depth Analysis**: Provide a detailed breakdown of concepts, including:
   - Examples
   - Relevant formulas (if applicable)
   - Learning processes
   - Working mechanisms
   - Purpose
   - Advantages and disadvantages
   - Role in the broader topic
4. **Summary**: Conclude with a short summary of essential takeaways, ensuring clarity and retention.

# Communication Style:
- **Professional yet Accessible**: Keep language rigorous yet clear.
- **Concise and Informative**: Avoid excess details while covering the core information.
- **Encouraging Exploration**: Foster an environment for follow-up questions.

# Unique Qualities:
1. **Source-Specific Expertise**: Refer only to the provided PDF.
2. **Educational Tools**: Use summaries and key points.
3. **Adaptability**: Adjust responses based on the user’s expertise level.
"""

# Function to generate a single, comprehensive answer
def generate_answer(query):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = generate_query_embedding(query, model)
    D, I = vector_index.search(np.array(query_embedding), k=5)

    if check_relevancy(D):
        relevant_chunks = [chunks[i] for i in I[0]]
        combined_text = " ".join(relevant_chunks)

        user_prompt = f"The user has inquired about a complex pharmaceutical topic. Query: {query}"

        assistant_prompt = f"""
Using the following context from the pharmacy PDF, respond with structured detail. **Avoid external citations in your answer.**

**Context:**
{combined_text}

**User's question:**
{query}

**Response Structure:**

- **Concept Overview**
- **Contextual Relevance**
- **Overview of the Concept**
- **Definition**
- **Foundations**
- **Examples** (including relevant case studies)
- **Formulas** (if available)
- **Key Terms and Definitions**
- **Key Vocabulary**
- **Historical Context**
- **Applications and Practical Uses**
- **Step-by-Step Explanation** of processes or calculations
- **Visual Aids** (suggestions for diagrams or graphs)
- **Visual Aids Explanation**
- **Purpose and Significance**
- **Common Misconceptions**
- **Key Challenges and Controversies** in the field
- **Practical Exercises**
- **Comparative Analysis**
- **Future Implications**
- **Future Directions** or potential advancements
- **Cultural Context**
- **Fun Activities**
- **Quiz Questions** 7 quiz 
- **Step-by-Step Guide**
- **Interactive Elements**
- **Summative Table** for quick reference
- **Summative Review**
- **Final Summary**
- **Summary**
"""

# **Response Structure:**
# - **Overview of the concept**
# - **Definition**
# - **Examples** (including relevant case studies)
# - **Formulas** (if available)
# - **Key Terms and Definitions**
# - **Historical Context**
# - **Applications and Practical Uses**
# - **Step-by-Step Explanation** of processes or calculations
# - **Visual Aids** (suggestions for diagrams or graphs)
# - **Purpose and significance**
# - **Common Misconceptions**
# - **Key Challenges and Controversies** in the field
# - **Future Directions** or potential advancements
# - **Summative Table** for quick reference
# - **Final Summary**
        #''
# """


        prompt = system_prompt + "\n\n" + user_prompt + "\n\n" + assistant_prompt

        response = client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model="llama3-8b-8192",
            temperature=0.7,
            top_p=0.9, 
           
        )
        answer = response.choices[0].message.content.strip()
        return answer
    else:
        fallback_prompt = f"The user's question is outside the scope of the PDF content. Provide a general answer without referencing external sources."
        fallback_response = client.chat.completions.create(
            messages=[{"role": "user", "content": fallback_prompt}],
            model="llama3-8b-8192",
            temperature=0.7,
            top_p=0.9
        )
        return fallback_response.choices[0].message.content.strip()

# Gradio app interface function
def gradio_interface(user_query):
    if user_query.strip() == "":
        welcome_message = "Welcome to **Physical Pharmacy Book**! Ask me anything related to pharmaceutical sciences."
        return welcome_message
    response = generate_answer(user_query)
    return response

# Gradio interface setup
with gr.Blocks(css=".footer {display: none;}") as iface:
    gr.Markdown(
        """
        <h1 style='text-align: center; color: #4CAF50;'>PharmaExpert Pro</h1>
        <p style='text-align: center; font-size: 18px; color: #333;'>
        Your advanced chatbot for pharmaceutical sciences expertise!
        </p>
        """, 
        elem_id="header"
    )
    chatbot = gr.Chatbot(type="messages", elem_id="chatbot")
    msg = gr.Textbox(label="Enter your query", placeholder="Type your question here...", lines=2, max_lines=5)
    submit_btn = gr.Button("Submit", elem_id="submit-btn")

    def respond(message, chat_history):
        chat_history.append({"role": "user", "content": message})
        response = generate_answer(message)
        chat_history.append({"role": "assistant", "content": response})
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    submit_btn.click(respond, [msg, chatbot], [msg, chatbot])

# Launch the Gradio app
if __name__ == "__main__":
    iface.launch()