Spaces:
Sleeping
Sleeping
ljyflores
commited on
Commit
•
850fcc9
1
Parent(s):
16939ac
Add cached files and update app
Browse files- __pycache__/utils_casemaker.cpython-310.pyc +0 -0
- __pycache__/utils_report_parser.cpython-310.pyc +0 -0
- app.py +15 -5
- utils_casemaker.py +16 -11
__pycache__/utils_casemaker.cpython-310.pyc
ADDED
Binary file (8.49 kB). View file
|
|
__pycache__/utils_report_parser.cpython-310.pyc
ADDED
Binary file (582 Bytes). View file
|
|
app.py
CHANGED
@@ -20,7 +20,7 @@ if uploaded_file is not None:
|
|
20 |
)
|
21 |
|
22 |
patient_options = {
|
23 |
-
f"Patient {patient_id}
|
24 |
for patient_id in reports.keys()
|
25 |
}
|
26 |
selected_patient_string = st.radio(
|
@@ -34,7 +34,17 @@ if uploaded_file is not None:
|
|
34 |
summary_by_organ = casemaker.parse_records(reports[selected_patient_id])
|
35 |
summary_by_organ = casemaker.format_reports(summary_by_organ)
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
)
|
21 |
|
22 |
patient_options = {
|
23 |
+
f"Patient {patient_id} ({len(reports[patient_id])} reports)": patient_id
|
24 |
for patient_id in reports.keys()
|
25 |
}
|
26 |
selected_patient_string = st.radio(
|
|
|
34 |
summary_by_organ = casemaker.parse_records(reports[selected_patient_id])
|
35 |
summary_by_organ = casemaker.format_reports(summary_by_organ)
|
36 |
|
37 |
+
# Display the report
|
38 |
+
col1, col2 = st.columns(2)
|
39 |
+
with col1:
|
40 |
+
st.subheader("Original")
|
41 |
+
for report in reports[selected_patient_id]:
|
42 |
+
st.write(f"**Report {report.date}**")
|
43 |
+
st.write(report.text)
|
44 |
+
|
45 |
+
with col2:
|
46 |
+
st.subheader("With Casemaker")
|
47 |
+
for chosen_organ in summary_by_organ.keys():
|
48 |
+
if summary_by_organ[chosen_organ]:
|
49 |
+
st.header(chosen_organ.capitalize())
|
50 |
+
st.write(summary_by_organ[chosen_organ])
|
utils_casemaker.py
CHANGED
@@ -16,12 +16,14 @@ from transformers import (
|
|
16 |
pipeline,
|
17 |
)
|
18 |
|
|
|
19 |
@dataclass
|
20 |
class Report:
|
21 |
-
patient_id: str|int
|
22 |
text: str
|
23 |
date: str
|
24 |
-
summary: str|None = None
|
|
|
25 |
|
26 |
def clean(s: str) -> str:
|
27 |
s = s.replace("\n", " ") # Concatenate into one string
|
@@ -64,7 +66,7 @@ def format_casemaker_data(
|
|
64 |
.groupby("patient_id")
|
65 |
.apply(lambda df: df[["patient_id", "text", "date"]].to_dict("records"))
|
66 |
)
|
67 |
-
reports_by_patient = dict[str,Sequence[Report]]()
|
68 |
for patient_id, report_list in zip(df.index, df):
|
69 |
patient_id = str(patient_id)
|
70 |
report_list = [Report(**report) for report in report_list]
|
@@ -151,11 +153,8 @@ class CaseMaker:
|
|
151 |
return report_string_by_organ
|
152 |
|
153 |
def trim_to_relevant_portion(self, report: str):
|
154 |
-
# Cut the report to the findings
|
155 |
-
report = get_section_from_report(report, "findings")
|
156 |
-
|
157 |
# Only keep sentences with symptoms and disease descriptions
|
158 |
-
relevant_sentences = []
|
159 |
for sentence in sent_tokenize(report):
|
160 |
if any(
|
161 |
[
|
@@ -163,7 +162,7 @@ class CaseMaker:
|
|
163 |
for ent in self.ner_pipe(sentence)
|
164 |
]
|
165 |
):
|
166 |
-
relevant_sentences.append(sentence)
|
167 |
return "\n".join(relevant_sentences)
|
168 |
|
169 |
def summarize_report(self, text: str) -> str:
|
@@ -219,12 +218,18 @@ class CaseMaker:
|
|
219 |
must contain "text" and "date" keys
|
220 |
"""
|
221 |
|
222 |
-
#
|
223 |
reports_by_organ = dict[str, Sequence[Report]]()
|
224 |
for report in reports:
|
225 |
-
|
|
|
|
|
|
|
|
|
226 |
for organ, report_text in report_by_organ.items():
|
227 |
-
organ_level_record = Report(
|
|
|
|
|
228 |
if organ in reports_by_organ:
|
229 |
reports_by_organ[organ].append(organ_level_record)
|
230 |
else:
|
|
|
16 |
pipeline,
|
17 |
)
|
18 |
|
19 |
+
|
20 |
@dataclass
|
21 |
class Report:
|
22 |
+
patient_id: str | int
|
23 |
text: str
|
24 |
date: str
|
25 |
+
summary: str | None = None
|
26 |
+
|
27 |
|
28 |
def clean(s: str) -> str:
|
29 |
s = s.replace("\n", " ") # Concatenate into one string
|
|
|
66 |
.groupby("patient_id")
|
67 |
.apply(lambda df: df[["patient_id", "text", "date"]].to_dict("records"))
|
68 |
)
|
69 |
+
reports_by_patient = dict[str, Sequence[Report]]()
|
70 |
for patient_id, report_list in zip(df.index, df):
|
71 |
patient_id = str(patient_id)
|
72 |
report_list = [Report(**report) for report in report_list]
|
|
|
153 |
return report_string_by_organ
|
154 |
|
155 |
def trim_to_relevant_portion(self, report: str):
|
|
|
|
|
|
|
156 |
# Only keep sentences with symptoms and disease descriptions
|
157 |
+
relevant_sentences = list[str]()
|
158 |
for sentence in sent_tokenize(report):
|
159 |
if any(
|
160 |
[
|
|
|
162 |
for ent in self.ner_pipe(sentence)
|
163 |
]
|
164 |
):
|
165 |
+
relevant_sentences.append(str(sentence))
|
166 |
return "\n".join(relevant_sentences)
|
167 |
|
168 |
def summarize_report(self, text: str) -> str:
|
|
|
218 |
must contain "text" and "date" keys
|
219 |
"""
|
220 |
|
221 |
+
# Split the reports by organ
|
222 |
reports_by_organ = dict[str, Sequence[Report]]()
|
223 |
for report in reports:
|
224 |
+
# Cut the report to the findings
|
225 |
+
report_findings = get_section_from_report(report.text, "findings")
|
226 |
+
|
227 |
+
# For each organ, collect a list of relevant records containing the text and date
|
228 |
+
report_by_organ = self.parse_report_by_organ(report_findings)
|
229 |
for organ, report_text in report_by_organ.items():
|
230 |
+
organ_level_record = Report(
|
231 |
+
text=report_text, date=report.date, patient_id=report.patient_id
|
232 |
+
)
|
233 |
if organ in reports_by_organ:
|
234 |
reports_by_organ[organ].append(organ_level_record)
|
235 |
else:
|