ACMCMC commited on
Commit
551646a
1 Parent(s): a6bd112

First final version

Browse files
Files changed (3) hide show
  1. app.py +19 -7
  2. llm_res.py +6 -9
  3. utils.py +1 -1
app.py CHANGED
@@ -111,20 +111,20 @@ with st.container():
111
  status.write(
112
  "Augmenting the set of diseases by finding others with related embeddings..."
113
  )
114
- augmented_set_of_diseases = augment_the_set_of_diseaces(diseases_uris)
115
  similarities_of_augmented_set_of_diseases = (
116
  get_similarities_among_diseases_uris(augmented_set_of_diseases)
117
  )
118
  df_similarities_augmented_set = get_similarities_df(
119
  similarities_of_augmented_set_of_diseases
120
  )
121
- status.table(
122
- df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
123
- )
124
- status.json(similarities_of_augmented_set_of_diseases, expanded=True)
125
  status.info(
126
  f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
127
  )
 
 
 
128
  status.json(augmented_set_of_diseases, expanded=False)
129
  status.divider()
130
  # 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
@@ -193,12 +193,14 @@ We use the embeddings of the diseases to determine the similarity between them.
193
 
194
  Specifically, it optimizes the following cost function:
195
  $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
 
 
196
  """
197
  )
198
  try:
199
  edges_to_show = []
200
  labels_of_diseases = get_labels_of_diseases_from_uris(
201
- df_similarities_augmented_set.index
202
  )
203
  uris_and_labels_of_diseases = dict(
204
  zip(df_similarities_augmented_set.index, labels_of_diseases)
@@ -227,7 +229,7 @@ $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h,
227
  Node(
228
  id=disease,
229
  label=disease,#uris_and_labels_of_diseases[disease],
230
- size=25,
231
  shape="circular",
232
  )
233
  for disease in df_similarities_augmented_set.index
@@ -290,6 +292,16 @@ with st.container():
290
  with tabs[i]:
291
  render_trial_details(trials[i])
292
 
 
 
 
 
 
 
 
 
 
 
293
  show_graph_of_all_diseases = False
294
  if show_graph_of_all_diseases:
295
  # If disease_names is not defined, define it
 
111
  status.write(
112
  "Augmenting the set of diseases by finding others with related embeddings..."
113
  )
114
+ augmented_set_of_diseases = augment_the_set_of_diseaces(filtered_diseases_uris)
115
  similarities_of_augmented_set_of_diseases = (
116
  get_similarities_among_diseases_uris(augmented_set_of_diseases)
117
  )
118
  df_similarities_augmented_set = get_similarities_df(
119
  similarities_of_augmented_set_of_diseases
120
  )
121
+ #status.json(similarities_of_augmented_set_of_diseases, expanded=True)
 
 
 
122
  status.info(
123
  f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
124
  )
125
+ status.table(
126
+ df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
127
+ )
128
  status.json(augmented_set_of_diseases, expanded=False)
129
  status.divider()
130
  # 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
 
193
 
194
  Specifically, it optimizes the following cost function:
195
  $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
196
+
197
+ By minimizing this cost function, the model learns the embeddings of the entities and relations that best represent the graph. The embeddings are then used to calculate the similarity between the diseases, which is shown in the graph.
198
  """
199
  )
200
  try:
201
  edges_to_show = []
202
  labels_of_diseases = get_labels_of_diseases_from_uris(
203
+ [f'http://identifiers.org/medgen/{disease}' for disease in augmented_set_of_diseases]
204
  )
205
  uris_and_labels_of_diseases = dict(
206
  zip(df_similarities_augmented_set.index, labels_of_diseases)
 
229
  Node(
230
  id=disease,
231
  label=disease,#uris_and_labels_of_diseases[disease],
232
+ size=50,
233
  shape="circular",
234
  )
235
  for disease in df_similarities_augmented_set.index
 
292
  with tabs[i]:
293
  render_trial_details(trials[i])
294
 
295
+
296
+ st.markdown(
297
+ """This app has been created in HackUPC 2024 by the team 'Klìnic'. The team members are:
298
+ - [Aldan Creo](https://acmc-website.web.app)
299
+ - [Matthias Seiler](https://www.linkedin.com/in/maseiler/)
300
+ - [Tanguyvans Vansnick](https://www.linkedin.com/in/tanguy-vansnick-44186a199/)
301
+ - [Arjit Samal](https://www.linkedin.com/in/arijit-samal1/)
302
+ """
303
+ )
304
+
305
  show_graph_of_all_diseases = False
306
  if show_graph_of_all_diseases:
307
  # If disease_names is not defined, define it
llm_res.py CHANGED
@@ -309,17 +309,14 @@ def tagging_insights_from_json(data_json):
309
  processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
310
 
311
  tagging_prompt = ChatPromptTemplate.from_template(
312
- """
313
- You are an expert on clinicial trials and analysis of their reports.
314
 
315
- Extract the desired information from the following JSON data.
316
 
317
- Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ].
318
-
319
- JSON data:
320
- {input}
321
- """
322
- )
323
 
324
  class Classification(BaseModel):
325
  # description: str = Field(
 
309
  processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
310
 
311
  tagging_prompt = ChatPromptTemplate.from_template(
312
+ """Extract the desired information from the following JSON data.
 
313
 
314
+ Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ], for each of the properties.
315
 
316
+ Raw data (in JSON format):
317
+ {input}
318
+ """
319
+ )
 
 
320
 
321
  class Classification(BaseModel):
322
  # description: str = Field(
utils.py CHANGED
@@ -229,7 +229,7 @@ def filter_out_less_promising_diseases(info_dicts: List[Dict[str, Any]]) -> List
229
  filtered_diseases = df_diseases_similarities.mean()[
230
  df_diseases_similarities.mean() > mean - 0.2 * std
231
  ].index.tolist()
232
- return filtered_diseases, df_diseases_similarities
233
 
234
 
235
  def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]:
 
229
  filtered_diseases = df_diseases_similarities.mean()[
230
  df_diseases_similarities.mean() > mean - 0.2 * std
231
  ].index.tolist()
232
+ return [f'http://identifiers.org/medgen/{d}' for d in filtered_diseases], df_diseases_similarities
233
 
234
 
235
  def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]: