2001muhammadumair commited on
Commit
2da8c28
1 Parent(s): 8e2a8b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +252 -0
app.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import faiss
3
+ import numpy as np
4
+ import fitz # PyMuPDF for PDF processing
5
+ from sentence_transformers import SentenceTransformer
6
+ from groq import Groq
7
+ import gradio as gr
8
+ import logging
9
+ import pickle
10
+
11
+ # Initialize logging to track events and errors
12
+ logging.basicConfig(filename='query_logs.log', level=logging.INFO)
13
+
14
+ # Securely load GROQ API key from environment variables
15
+ grog_api_key = "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"
16
+ if not grog_api_key:
17
+ raise ValueError("GROQ_API_KEY environment variable not set.")
18
+ client = Groq(api_key=grog_api_key)
19
+
20
+ # Path to the PDF file containing pharmaceutical content
21
+ book_path = '/content/martins-physical-pharmacy-6th-ed-2011-dr-murtadha-alshareifi.pdf'
22
+
23
+ # Function to read and extract text from the PDF
24
+ def read_pdf(file_path):
25
+ try:
26
+ doc = fitz.open(file_path)
27
+ text_data = []
28
+ for page_num in range(doc.page_count):
29
+ page = doc.load_page(page_num)
30
+ text = page.get_text("text")
31
+ text_data.append(text)
32
+ return text_data
33
+ except Exception as e:
34
+ logging.error(f"Error reading PDF: {str(e)}")
35
+ return []
36
+
37
+ # Function to split text into paragraphs
38
+ def split_text_into_paragraphs(text_pages, max_tokens=300):
39
+ chunks = []
40
+ for page in text_pages:
41
+ paragraphs = page.split('\n\n')
42
+ chunk = ""
43
+ for para in paragraphs:
44
+ if len(chunk) + len(para) <= max_tokens:
45
+ chunk += para + "\n"
46
+ else:
47
+ chunks.append(chunk.strip())
48
+ chunk = para + "\n"
49
+ if chunk:
50
+ chunks.append(chunk.strip())
51
+ return chunks
52
+
53
+ # Function to vectorize text chunks and create a FAISS index
54
+ def vectorize_text(chunks, batch_size=100, save_path="embeddings.pkl"):
55
+ if os.path.exists(save_path):
56
+ with open(save_path, "rb") as f:
57
+ index = pickle.load(f)
58
+ return index, chunks
59
+ try:
60
+ model = SentenceTransformer('all-MiniLM-L6-v2')
61
+ embeddings = []
62
+ index = faiss.IndexFlatL2(384)
63
+
64
+ for i in range(0, len(chunks), batch_size):
65
+ chunk_batch = chunks[i:i + batch_size]
66
+ batch_embeddings = model.encode(chunk_batch, show_progress_bar=True)
67
+ embeddings.append(batch_embeddings)
68
+ index.add(np.array(batch_embeddings))
69
+
70
+ with open(save_path, "wb") as f:
71
+ pickle.dump(index, f)
72
+ return index, chunks
73
+ except Exception as e:
74
+ logging.error(f"Error during vectorization: {str(e)}")
75
+ return None, None
76
+
77
+ # Load and vectorize PDF content
78
+ text_pages = read_pdf(book_path)
79
+ if not text_pages:
80
+ raise RuntimeError("Failed to read PDF content. Check logs for details.")
81
+
82
+ chunks = split_text_into_paragraphs(text_pages)
83
+ vector_index, chunks = vectorize_text(chunks)
84
+ if vector_index is None or chunks is None:
85
+ raise RuntimeError("Vectorization failed. Check logs for details.")
86
+
87
+ # Function to generate query embeddings
88
+ def generate_query_embedding(query, model):
89
+ return model.encode([query])
90
+
91
+ # Function to check relevancy based on distance threshold
92
+ def check_relevancy(distances, threshold=1):
93
+ return distances[0][0] <= threshold
94
+
95
+ # System prompt defining the chatbot's attributes and response structure
96
+ system_prompt = """
97
+ You are **PharmaExpert Pro**, an advanced chatbot specialized in the field of pharmaceutical sciences. Your responses should be structured, concise, and informative, making complex topics accessible.
98
+
99
+ # Response Structure:
100
+ 1. **Overview**: Start with a brief context to set the user’s expectations.
101
+ 2. **Definition**: Clearly define the concept being queried.
102
+ 3. **In-Depth Analysis**: Provide a detailed breakdown of concepts, including:
103
+ - Examples
104
+ - Relevant formulas (if applicable)
105
+ - Learning processes
106
+ - Working mechanisms
107
+ - Purpose
108
+ - Advantages and disadvantages
109
+ - Role in the broader topic
110
+ 4. **Summary**: Conclude with a short summary of essential takeaways, ensuring clarity and retention.
111
+
112
+ # Communication Style:
113
+ - **Professional yet Accessible**: Keep language rigorous yet clear.
114
+ - **Concise and Informative**: Avoid excess details while covering the core information.
115
+ - **Encouraging Exploration**: Foster an environment for follow-up questions.
116
+
117
+ # Unique Qualities:
118
+ 1. **Source-Specific Expertise**: Refer only to the provided PDF.
119
+ 2. **Educational Tools**: Use summaries and key points.
120
+ 3. **Adaptability**: Adjust responses based on the user’s expertise level.
121
+ """
122
+
123
+ # Function to generate a single, comprehensive answer
124
+ def generate_answer(query):
125
+ model = SentenceTransformer('all-MiniLM-L6-v2')
126
+ query_embedding = generate_query_embedding(query, model)
127
+ D, I = vector_index.search(np.array(query_embedding), k=5)
128
+
129
+ if check_relevancy(D):
130
+ relevant_chunks = [chunks[i] for i in I[0]]
131
+ combined_text = " ".join(relevant_chunks)
132
+
133
+ user_prompt = f"The user has inquired about a complex pharmaceutical topic. Query: {query}"
134
+
135
+ assistant_prompt = f"""
136
+ Using the following context from the pharmacy PDF, respond with structured detail. **Avoid external citations in your answer.**
137
+
138
+ **Context:**
139
+ {combined_text}
140
+
141
+ **User's question:**
142
+ {query}
143
+
144
+ **Response Structure:**
145
+
146
+ - **Concept Overview**
147
+ - **Contextual Relevance**
148
+ - **Overview of the Concept**
149
+ - **Definition**
150
+ - **Foundations**
151
+ - **Examples** (including relevant case studies)
152
+ - **Formulas** (if available)
153
+ - **Key Terms and Definitions**
154
+ - **Key Vocabulary**
155
+ - **Historical Context**
156
+ - **Applications and Practical Uses**
157
+ - **Step-by-Step Explanation** of processes or calculations
158
+ - **Visual Aids** (suggestions for diagrams or graphs)
159
+ - **Visual Aids Explanation**
160
+ - **Purpose and Significance**
161
+ - **Common Misconceptions**
162
+ - **Key Challenges and Controversies** in the field
163
+ - **Practical Exercises**
164
+ - **Comparative Analysis**
165
+ - **Future Implications**
166
+ - **Future Directions** or potential advancements
167
+ - **Cultural Context**
168
+ - **Fun Activities**
169
+ - **Quiz Questions** 7 quiz
170
+ - **Step-by-Step Guide**
171
+ - **Interactive Elements**
172
+ - **Summative Table** for quick reference
173
+ - **Summative Review**
174
+ - **Final Summary**
175
+ - **Summary**
176
+ """
177
+
178
+ # **Response Structure:**
179
+ # - **Overview of the concept**
180
+ # - **Definition**
181
+ # - **Examples** (including relevant case studies)
182
+ # - **Formulas** (if available)
183
+ # - **Key Terms and Definitions**
184
+ # - **Historical Context**
185
+ # - **Applications and Practical Uses**
186
+ # - **Step-by-Step Explanation** of processes or calculations
187
+ # - **Visual Aids** (suggestions for diagrams or graphs)
188
+ # - **Purpose and significance**
189
+ # - **Common Misconceptions**
190
+ # - **Key Challenges and Controversies** in the field
191
+ # - **Future Directions** or potential advancements
192
+ # - **Summative Table** for quick reference
193
+ # - **Final Summary**
194
+ # """
195
+
196
+
197
+ prompt = system_prompt + "\n\n" + user_prompt + "\n\n" + assistant_prompt
198
+
199
+ response = client.chat.completions.create(
200
+ messages=[{"role": "user", "content": prompt}],
201
+ model="llama3-8b-8192",
202
+ temperature=0.7,
203
+ top_p=0.9,
204
+
205
+ )
206
+ answer = response.choices[0].message.content.strip()
207
+ return answer
208
+ else:
209
+ fallback_prompt = f"The user's question is outside the scope of the PDF content. Provide a general answer without referencing external sources."
210
+ fallback_response = client.chat.completions.create(
211
+ messages=[{"role": "user", "content": fallback_prompt}],
212
+ model="llama3-8b-8192",
213
+ temperature=0.7,
214
+ top_p=0.9
215
+ )
216
+ return fallback_response.choices[0].message.content.strip()
217
+
218
+ # Gradio app interface function
219
+ def gradio_interface(user_query):
220
+ if user_query.strip() == "":
221
+ welcome_message = "Welcome to **Physical Pharmacy Book**! Ask me anything related to pharmaceutical sciences."
222
+ return welcome_message
223
+ response = generate_answer(user_query)
224
+ return response
225
+
226
+ # Gradio interface setup
227
+ with gr.Blocks(css=".footer {display: none;}") as iface:
228
+ gr.Markdown(
229
+ """
230
+ <h1 style='text-align: center; color: #4CAF50;'>PharmaExpert Pro</h1>
231
+ <p style='text-align: center; font-size: 18px; color: #333;'>
232
+ Your advanced chatbot for pharmaceutical sciences expertise!
233
+ </p>
234
+ """,
235
+ elem_id="header"
236
+ )
237
+ chatbot = gr.Chatbot(type="messages", elem_id="chatbot")
238
+ msg = gr.Textbox(label="Enter your query", placeholder="Type your question here...", lines=2, max_lines=5)
239
+ submit_btn = gr.Button("Submit", elem_id="submit-btn")
240
+
241
+ def respond(message, chat_history):
242
+ chat_history.append({"role": "user", "content": message})
243
+ response = generate_answer(message)
244
+ chat_history.append({"role": "assistant", "content": response})
245
+ return "", chat_history
246
+
247
+ msg.submit(respond, [msg, chatbot], [msg, chatbot])
248
+ submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
249
+
250
+ # Launch the Gradio app
251
+ if __name__ == "__main__":
252
+ iface.launch()