1-ARIjitS commited on
Commit
d8b73be
1 Parent(s): 772bbc6

changed to include stats

Browse files
Files changed (2) hide show
  1. app.py +2 -1
  2. llm_res.py +51 -21
app.py CHANGED
@@ -88,8 +88,9 @@ with st.container():
88
  status.json(json_of_clinical_trials, expanded=False)
89
  # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
90
  status.write("Getting a summary of the clinical trials...")
91
- response = get_short_summary_out_of_json_files(json_of_clinical_trials)
92
  print(f'Response from LLM summarization: {response}')
 
93
  status.write(f'Response from LLM summarization: {response}')
94
  # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
95
  status.write("Getting summary statistics of the clinical trials...")
 
88
  status.json(json_of_clinical_trials, expanded=False)
89
  # 7. Use an LLM to get a summary of the clinical trials, in plain text format.
90
  status.write("Getting a summary of the clinical trials...")
91
+ response, stats_dict = get_short_summary_out_of_json_files(json_of_clinical_trials)
92
  print(f'Response from LLM summarization: {response}')
93
+ print(f'basic_stats_dict:{stats_dict}')
94
  status.write(f'Response from LLM summarization: {response}')
95
  # 8. Use an LLM to extract numerical data from the clinical trials (e.g. number of patients, number of deaths, etc.). Get summary statistics out of that.
96
  status.write("Getting summary statistics of the clinical trials...")
llm_res.py CHANGED
@@ -22,6 +22,8 @@ from langchain_core.pydantic_v1 import BaseModel, Field
22
  from langchain_openai import ChatOpenAI
23
  from langchain.chains.llm import LLMChain
24
  from langchain_core.prompts import PromptTemplate
 
 
25
 
26
  load_dotenv()
27
 
@@ -267,6 +269,26 @@ def get_short_summary_out_of_json_files(data_json):
267
 
268
  return result
269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  def tagging_insights_from_json(data_json):
271
  processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
272
 
@@ -286,25 +308,25 @@ def tagging_insights_from_json(data_json):
286
  # description: str = Field(
287
  # description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
288
  # )
289
- project_title: list = Field(
290
- description="Extract the project titles of all the clinical trials"
291
- )
292
- status: list = Field(
293
- description="Extract the status of all the clinical trials"
294
- )
295
- # keywords: list = Field(
296
- # description="Extract the most relevant keywords regrouping all the clinical trials"
297
  # )
298
- interventions: list = Field(
299
- description="describe the interventions for each clinical trial using title, name and description"
 
 
 
300
  )
 
 
 
301
  primary_outcomes: list = Field(
302
- description="get the primary outcomes of each clinical trial"
 
 
 
 
303
  )
304
- # secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
305
- # eligibility: list = Field(
306
- # description="get the eligibilityCriteria grouping all the clinical trials"
307
- # )
308
  healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
309
  minimum_age: list = Field(
310
  description="get the minimum age from each experiment"
@@ -316,12 +338,12 @@ def tagging_insights_from_json(data_json):
316
 
317
  def get_dict(self):
318
  return {
319
- "project_title": self.project_title,
320
- "status": self.status,
321
- # "keywords": self.keywords,
322
- "interventions": self.interventions,
323
  "primary_outcomes": self.primary_outcomes,
324
- # "secondary_outcomes": self.secondary_outcomes,
325
  # "eligibility": self.eligibility,
326
  "healthy_volunteers": self.healthy_volunteers,
327
  "minimum_age": self.minimum_age,
@@ -342,8 +364,16 @@ def tagging_insights_from_json(data_json):
342
 
343
  res= tagging_chain.invoke({"input": processed_json})
344
  result_dict= res.get_dict()
 
 
 
 
 
 
 
 
345
  print(f"Result_tagging: {result_dict}")
346
- return result_dict
347
 
348
 
349
  # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])
 
22
  from langchain_openai import ChatOpenAI
23
  from langchain.chains.llm import LLMChain
24
  from langchain_core.prompts import PromptTemplate
25
+ from collections import Counter
26
+ import statistics
27
 
28
  load_dotenv()
29
 
 
269
 
270
  return result
271
 
272
+ def analyze_data(data):
273
+ # Extract minimum and maximum ages
274
+ min_ages = [int(age.split()[0]) for age in data['minimum_age'] if age]
275
+ max_ages = [int(age.split()[0]) for age in data['maximum_age'] if age]
276
+ # primary_timeframe= [int(age.split()[0]) for age in data['[primary_outcome]'] if age]
277
+
278
+ # Calculate average minimum and maximum ages
279
+ avg_min_age = statistics.mean(min_ages) if min_ages else None
280
+ avg_max_age = statistics.mean(max_ages) if max_ages else None
281
+
282
+ # Find most common gender
283
+ gender_counter = Counter(data['gender'])
284
+ most_common_gender = gender_counter.most_common(1)[0][0]
285
+
286
+ # Flatten keywords list and find common keywords
287
+ keywords = [keyword for sublist in data['keywords'] for keyword in sublist]
288
+ common_keywords = [word for word, count in Counter(keywords).most_common()]
289
+
290
+ return avg_min_age, avg_max_age, most_common_gender, common_keywords
291
+
292
  def tagging_insights_from_json(data_json):
293
  processed_json= process_dictionaty_with_llm_to_generate_response(data_json)
294
 
 
308
  # description: str = Field(
309
  # description="text description grouping all the clinical trials using briefDescription and detailedDescription keys"
310
  # )
311
+ # project_title: list = Field(
312
+ # description="Extract the project titles of all the clinical trials"
 
 
 
 
 
 
313
  # )
314
+ # status: list = Field(
315
+ # description="Extract the status of all the clinical trials"
316
+ # )
317
+ keywords: list = Field(
318
+ description="Extract the most relevant keywords for each clinical trials"
319
  )
320
+ # interventions: list = Field(
321
+ # description="describe the interventions for each clinical trial using title, name and description"
322
+ # )
323
  primary_outcomes: list = Field(
324
+ description="get the timeframe of each clinical trial"
325
+ )
326
+ secondary_outcomes: list= Field(description= "get the secondary outcomes of each clinical trial")
327
+ eligibility: list = Field(
328
+ description="get the timeframe of each clinical trial"
329
  )
 
 
 
 
330
  healthy_volunteers: list= Field(description= "determine whether the clinical trial requires healthy volunteers")
331
  minimum_age: list = Field(
332
  description="get the minimum age from each experiment"
 
338
 
339
  def get_dict(self):
340
  return {
341
+ # "project_title": self.project_title,
342
+ # "status": self.status,
343
+ "keywords": self.keywords,
344
+ # "interventions": self.interventions,
345
  "primary_outcomes": self.primary_outcomes,
346
+ "secondary_outcomes": self.secondary_outcomes,
347
  # "eligibility": self.eligibility,
348
  "healthy_volunteers": self.healthy_volunteers,
349
  "minimum_age": self.minimum_age,
 
364
 
365
  res= tagging_chain.invoke({"input": processed_json})
366
  result_dict= res.get_dict()
367
+
368
+ avg_min_age, avg_max_age, most_common_gender, common_keywords= analyze_data(result_dict)
369
+
370
+ stats_dict= {'Average Minimum age': avg_min_age,
371
+ 'Average Maximum age': avg_max_age,
372
+ 'Most common gender undergoing the trials': most_common_gender,
373
+ 'common keywords found in the trials': common_keywords}
374
+
375
  print(f"Result_tagging: {result_dict}")
376
+ return result_dict, stats_dict
377
 
378
 
379
  # clinical_record_info = get_clinical_records_by_ids(['NCT00841061', 'NCT03035123', 'NCT02272751', 'NCT03035123', 'NCT03055377'])