Spaces:

danielhajialigol
/

DRGCoder

Running

App Files Files Community

danielhajialigol commited on Aug 3, 2023

Commit

6a4a8e0

•

1 Parent(s): b3e501a

added drg and icd external link functionality

Browse files

Files changed (2) hide show

app.py +7 -4
utils.py +37 -5

app.py CHANGED Viewed

@@ -2,10 +2,9 @@ import numpy as np
 import gradio as gr
 import pandas as pd
 import torch
-import random
 from model import MimicTransformer
-from utils import load_rule, get_attribution, get_drg_link, visualize_attn
 from transformers import set_seed
 set_seed(42)
@@ -21,7 +20,7 @@ related_tensor = torch.load('discharge_embeddings.pt')
 # get model and results
 mimic = read_model(model=mimic, path=model_path)
-all_summaries = pd.read_csv('all_summaries.csv')['SUMMARIES'][:10000].to_list()
 tokenizer = mimic.tokenizer
 mimic.eval()
@@ -78,9 +77,12 @@ def run(text, related_discharges=False):
     model_results = get_model_results(text=text)
     drg_code = model_results['class']
     drg_link = get_drg_link(drg_code=drg_code)
     row = rule_df[rule_df['DRG_CODE'] == drg_code]
     drg_description = row['DESCRIPTION'].values[0]
     model_results['class_dsc'] = drg_description
     global related_summaries
     # related_summaries = generate_similar_summeries()
     related_summaries = find_related_summaries(model_results['logits'])
@@ -129,7 +131,8 @@ def prettify_text(nested_list):
     idx = 1
     string = ''
     for li in nested_list:
-        string += f'({idx})\n{li[0]}\n\n'
         idx += 1
     return string

 import gradio as gr
 import pandas as pd
 import torch
 from model import MimicTransformer
+from utils import load_rule, get_attribution, get_drg_link, get_icd_annotations, visualize_attn
 from transformers import set_seed
 set_seed(42)
 # get model and results
 mimic = read_model(model=mimic, path=model_path)
+all_summaries = pd.read_csv('all_summaries.csv')['SUMMARIES'].to_list()
 tokenizer = mimic.tokenizer
 mimic.eval()
     model_results = get_model_results(text=text)
     drg_code = model_results['class']
     drg_link = get_drg_link(drg_code=drg_code)
+    icd_results = get_icd_annotations(text=text)
     row = rule_df[rule_df['DRG_CODE'] == drg_code]
     drg_description = row['DESCRIPTION'].values[0]
     model_results['class_dsc'] = drg_description
+    model_results['drg_link'] = drg_link
+    model_results['icd_results'] = icd_results
     global related_summaries
     # related_summaries = generate_similar_summeries()
     related_summaries = find_related_summaries(model_results['logits'])
     idx = 1
     string = ''
     for li in nested_list:
+        delimiters = 99 * '='
+        string += f'({idx})\n{li[0]}\n{delimiters}\n'
         idx += 1
     return string

utils.py CHANGED Viewed

@@ -66,7 +66,12 @@ def clean_text(text):
     return new_text
 def get_drg_link(drg_code):
-    return f'https://www.aapc.com/codes/icd9-codes/{drg_code}'
 def prettify(dict_list, k):
     li = [di[k] for di in dict_list]
@@ -179,7 +184,7 @@ def reconstruct_text(tokenizer, tokens, attn):
     # final representation of text
     final_text = ' '.join(reconstructed_tokens).replace(' .', '.')
     final_text = final_text.replace(' ,', ',')
-    assert final_text == reconstructed_text
     return aggregated_attn, reconstructed_tokens
 def load_rule(path):
@@ -225,7 +230,7 @@ def visualize_attn(model_results):
         raw_input_ids=tokens,
         convergence_score=1
     )
-    return visualize_text(viz_record)
 def modify_attn_html(attn_html):
@@ -233,20 +238,46 @@ def modify_attn_html(attn_html):
     htmls = [attn_split[0]]
     for html in attn_split[1:]:
         # wrap around href tag
-        href_html = f'<a href="espn.com" \
             <mark{html} \
             </a>'
         htmls.append(href_html)
     return "".join(htmls)
 # copied out of captum because we need raw html instead of a jupyter widget
-def visualize_text(datarecord):
     dom = ["<table width: 100%>"]
     rows = [
         "<th style='text-align: left'>Predicted DRG</th>"
         "<th style='text-align: left'>Word Importance</th>"
     ]
     pred_class_html = visualization.format_classname(datarecord.pred_class)
     word_attn_html = visualization.format_word_importances(
         datarecord.raw_input_ids, datarecord.word_attributions
     )
@@ -257,6 +288,7 @@ def visualize_text(datarecord):
                 "<tr>",
                 pred_class_html,
                 word_attn_html,
                 "<tr>",
             ]
         )

     return new_text
 def get_drg_link(drg_code):
+    drg_code = str(drg_code)
+    if len(drg_code) == 1:
+        drg_code = '00' + drg_code
+    elif len(drg_code) == 2:
+        drg_code = '0' + drg_code
+    return f'https://www.findacode.com/code.php?set=DRG&c={drg_code}'
 def prettify(dict_list, k):
     li = [di[k] for di in dict_list]
     # final representation of text
     final_text = ' '.join(reconstructed_tokens).replace(' .', '.')
     final_text = final_text.replace(' ,', ',')
+    # final_text == reconstructed_text
     return aggregated_attn, reconstructed_tokens
 def load_rule(path):
         raw_input_ids=tokens,
         convergence_score=1
     )
+    return visualize_text(viz_record, drg_link=model_results['drg_link'], icd_annotations=model_results['icd_results'])
 def modify_attn_html(attn_html):
     htmls = [attn_split[0]]
     for html in attn_split[1:]:
         # wrap around href tag
+        href_html = f'<a href="https://espn.com" \
             <mark{html} \
             </a>'
         htmls.append(href_html)
     return "".join(htmls)
+def modify_code_html(html, link, icd=False):
+    html = html.split('<td>')[1].split('</td>')[0]
+    href_html = f'<td><a href="{link}"{html}</a></td>'
+    if icd:
+        href_html = href_html.replace('<td>', '').replace('</td>', '')
+    return href_html
+def modify_drg_html(html, drg_link):
+    return modify_code_html(html=html, link=drg_link, icd=False)
+def get_icd_html(icd_list):
+    if len(icd_list) == 0:
+        return '<td><text style="padding-right:2em"><b>N/A</b></text></td>'
+    final_html = '<td>'
+    for icd_dict in icd_list:
+        text, link = icd_dict['text'], icd_dict['link']
+        tmp_html = visualization.format_classname(classname=text)
+        html = modify_code_html(html=tmp_html, link=link, icd=True)
+        final_html += html
+    return final_html + '</td>'
 # copied out of captum because we need raw html instead of a jupyter widget
+def visualize_text(datarecord, drg_link, icd_annotations):
     dom = ["<table width: 100%>"]
     rows = [
         "<th style='text-align: left'>Predicted DRG</th>"
         "<th style='text-align: left'>Word Importance</th>"
+        "<th style='text-align: left'>ICD Codes</th>"
     ]
     pred_class_html = visualization.format_classname(datarecord.pred_class)
+    icd_class_html = get_icd_html(icd_annotations)
+    pred_class_html = modify_drg_html(html=pred_class_html, drg_link=drg_link)
     word_attn_html = visualization.format_word_importances(
         datarecord.raw_input_ids, datarecord.word_attributions
     )
                 "<tr>",
                 pred_class_html,
                 word_attn_html,
+                icd_class_html,
                 "<tr>",
             ]
         )