Spaces:

klinic-hackupc
/

klinic

Sleeping

App Files Files Community

ACMCMC commited on May 5

Commit

551646a

•

1 Parent(s): a6bd112

First final version

Browse files

Files changed (3) hide show

app.py +19 -7
llm_res.py +6 -9
utils.py +1 -1

app.py CHANGED Viewed

@@ -111,20 +111,20 @@ with st.container():
             status.write(
                 "Augmenting the set of diseases by finding others with related embeddings..."
             )
-            augmented_set_of_diseases = augment_the_set_of_diseaces(diseases_uris)
             similarities_of_augmented_set_of_diseases = (
                 get_similarities_among_diseases_uris(augmented_set_of_diseases)
             )
             df_similarities_augmented_set = get_similarities_df(
                 similarities_of_augmented_set_of_diseases
             )
-            status.table(
-                df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
-            )
-            status.json(similarities_of_augmented_set_of_diseases, expanded=True)
             status.info(
                 f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
             )
             status.json(augmented_set_of_diseases, expanded=False)
             status.divider()
             # 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
@@ -193,12 +193,14 @@ We use the embeddings of the diseases to determine the similarity between them.
 Specifically, it optimizes the following cost function:
 $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
 """
         )
         try:
             edges_to_show = []
             labels_of_diseases = get_labels_of_diseases_from_uris(
-                df_similarities_augmented_set.index
             )
             uris_and_labels_of_diseases = dict(
                 zip(df_similarities_augmented_set.index, labels_of_diseases)
@@ -227,7 +229,7 @@ $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h,
                     Node(
                         id=disease,
                         label=disease,#uris_and_labels_of_diseases[disease],
-                        size=25,
                         shape="circular",
                     )
                     for disease in df_similarities_augmented_set.index
@@ -290,6 +292,16 @@ with st.container():
             with tabs[i]:
                 render_trial_details(trials[i])
 show_graph_of_all_diseases = False
 if show_graph_of_all_diseases:
     # If disease_names is not defined, define it

             status.write(
                 "Augmenting the set of diseases by finding others with related embeddings..."
             )
+            augmented_set_of_diseases = augment_the_set_of_diseaces(filtered_diseases_uris)
             similarities_of_augmented_set_of_diseases = (
                 get_similarities_among_diseases_uris(augmented_set_of_diseases)
             )
             df_similarities_augmented_set = get_similarities_df(
                 similarities_of_augmented_set_of_diseases
             )
+            #status.json(similarities_of_augmented_set_of_diseases, expanded=True)
             status.info(
                 f"Augmented set of diseases: {len(augmented_set_of_diseases)} diseases."
             )
+            status.table(
+                df_similarities_augmented_set.style.background_gradient(cmap="viridis", axis=None)
+            )
             status.json(augmented_set_of_diseases, expanded=False)
             status.divider()
             # 6. Query the embeddings of the diseases related to each clinical trial (also in the DB), to get the most similar clinical trials to our set of diseases
 Specifically, it optimizes the following cost function:
 $\\text{minimize} \\sum_{(h, r, t) \\in S} \\max(0, \\gamma + f(h, r, t) - f(h, r, t')) + \\sum_{(h, r, t) \\in S'} f(h, r, t)$
+By minimizing this cost function, the model learns the embeddings of the entities and relations that best represent the graph. The embeddings are then used to calculate the similarity between the diseases, which is shown in the graph.
 """
         )
         try:
             edges_to_show = []
             labels_of_diseases = get_labels_of_diseases_from_uris(
+                [f'http://identifiers.org/medgen/{disease}' for disease in augmented_set_of_diseases]
             )
             uris_and_labels_of_diseases = dict(
                 zip(df_similarities_augmented_set.index, labels_of_diseases)
                     Node(
                         id=disease,
                         label=disease,#uris_and_labels_of_diseases[disease],
+                        size=50,
                         shape="circular",
                     )
                     for disease in df_similarities_augmented_set.index
             with tabs[i]:
                 render_trial_details(trials[i])
+st.markdown(
+    """This app has been created in HackUPC 2024 by the team 'Klìnic'. The team members are:
+- [Aldan Creo](https://acmc-website.web.app)
+- [Matthias Seiler](https://www.linkedin.com/in/maseiler/)
+- [Tanguyvans Vansnick](https://www.linkedin.com/in/tanguy-vansnick-44186a199/)
+- [Arjit Samal](https://www.linkedin.com/in/arijit-samal1/)
+"""
+)
 show_graph_of_all_diseases = False
 if show_graph_of_all_diseases:
     # If disease_names is not defined, define it

llm_res.py CHANGED Viewed

@@ -309,17 +309,14 @@ def tagging_insights_from_json(data_json):
     processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
     tagging_prompt = ChatPromptTemplate.from_template(
-        """
-    You are an expert on clinicial trials and analysis of their reports.
-    Extract the desired information from the following JSON data.
-    Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ].
-    JSON data:
-    {input}
-    """
-    )
     class Classification(BaseModel):
         # description: str = Field(

     processed_json = process_dictionaty_with_llm_to_generate_response(data_json)
     tagging_prompt = ChatPromptTemplate.from_template(
+        """Extract the desired information from the following JSON data.
+Only extract the properties mentioned in the 'Classification' function. Output a list of the extracted properties, starting with [ and ending with ], for each of the properties.
+Raw data (in JSON format):
+{input}
+"""
+)
     class Classification(BaseModel):
         # description: str = Field(

utils.py CHANGED Viewed

@@ -229,7 +229,7 @@ def filter_out_less_promising_diseases(info_dicts: List[Dict[str, Any]]) -> List
     filtered_diseases = df_diseases_similarities.mean()[
         df_diseases_similarities.mean() > mean - 0.2 * std
     ].index.tolist()
-    return filtered_diseases, df_diseases_similarities
 def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]:

     filtered_diseases = df_diseases_similarities.mean()[
         df_diseases_similarities.mean() > mean - 0.2 * std
     ].index.tolist()
+    return [f'http://identifiers.org/medgen/{d}' for d in filtered_diseases], df_diseases_similarities
 def get_labels_of_diseases_from_uris(uris: List[str]) -> List[str]: