Solshine commited on
Commit
24cdf80
1 Parent(s): 7fcf3f4

Upload MiniMed_EHR_Analyst_Spaces.py

Browse files

Rough Draft with notations. Attempting to run using the k23_Minimed model which is finetuned on pubmed data, however config issues prevent my using it successfully thus far.

Files changed (1) hide show
  1. MiniMed_EHR_Analyst_Spaces.py +53 -0
MiniMed_EHR_Analyst_Spaces.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
+
5
+ #Note this should be used always in compliance with applicable laws and regulations if used with real patient data.
6
+
7
+ # Load the tokenizer and model: pseudolab/K23_MiniMed by Tonic (Note: This is a large model and will take a while to download)
8
+ # Config issues persist with this model, unfortunately. It may not be ready for use.
9
+ tokenizer = AutoTokenizer.from_pretrained("pseudolab/K23_MiniMed")
10
+ model = AutoModelForCausalLM.from_pretrained("pseudolab/K23_MiniMed")
11
+
12
+ #Upload Patient Data
13
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
14
+
15
+ # Prepare the context
16
+ def prepare_context(data):
17
+ # Format the data as a string
18
+ data_str = data.to_string(index=False, header=False)
19
+
20
+ # Tokenize the data
21
+ input_ids = tokenizer.encode(data_str, return_tensors="pt")
22
+
23
+ # Truncate the input if it's too long for the model
24
+ max_length = tokenizer.model_max_length
25
+ if input_ids.shape[1] > max_length:
26
+ input_ids = input_ids[:, :max_length]
27
+
28
+ return input_ids
29
+
30
+ if uploaded_file is not None:
31
+ data = pd.read_csv(uploaded_file)
32
+ st.write(data)
33
+
34
+ # Generate text based on the context
35
+ context = prepare_context(data)
36
+ generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
37
+ st.write(generated_text)
38
+
39
+ # Internally prompt the model to data analyze the EHR patient data
40
+ prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."
41
+
42
+ if prompt:
43
+ # Tokenize the prompt
44
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
45
+
46
+ # Generate text based on the prompt
47
+ generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
48
+ st.write(generated_text)
49
+ else:
50
+ st.write("Please enter patient data")
51
+
52
+ else:
53
+ st.write("No file uploaded")