OncoDigger / app.py
jfataphd's picture
Update app.py
b2912c4
raw
history blame
No virus
3.07 kB
import streamlit as st
import time
import json
from gensim.models import Word2Vec
import pandas as pd
from datasets import load_dataset
from datasets import Dataset
# Define the HTML and CSS styles
html_temp = """
<div style="background-color:black;padding:10px">
<h1 style="color:white;text-align:center;">My Streamlit App with HTML and CSS</h1>
</div>
"""
# Display the HTML and CSS styles
st.markdown(html_temp, unsafe_allow_html=True)
# Add some text to the app
st.write("This is my Streamlit app with HTML and CSS formatting.")
query = st.text_input("Enter a word")
# query = input ("Enter your keyword(s):")
query = query.lower()
if query:
model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
words = list(model.wv.key_to_index)
X = model.wv[model.wv.key_to_index]
model2 = model.wv[query]
df = pd.DataFrame(X)
# def findRelationships(query, df):
table = model.wv.most_similar_cosmul(query, topn=10000)
table = (pd.DataFrame(table))
table.index.name = 'Rank'
table.columns = ['Word', 'SIMILARITY']
print()
print("Similarity to " + str(query))
pd.set_option('display.max_rows', None)
csv = table.head(50).to_csv(index=False).encode('utf-8')
st.download_button(
label=f"Download words similar to {query} in .csv format",
data=csv,
file_name='clotting_sim1.csv',
mime='text/csv'
)
json = table.head(50).to_json(index=True).encode('utf-8')
st.download_button(
label=f"Download words similar to {query} in .js format",
data=json,
file_name='clotting_sim1.js',
mime='json'
)
print(table.head(10))
table.head(50).to_csv("clotting_sim1.csv", index=True)
table.head(50).to_json("clotting_sim1.js", index=True)
st.header(f"Similar Words to {query}")
st.write(table.head(50))
#
print()
print("Human genes similar to " + str(query))
df1 = table
df2 = pd.read_csv('Human_Genes.csv')
m = df1.Word.isin(df2.symbol)
df1 = df1[m]
df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
csv2 = df1.head(50).to_csv(index=False).encode('utf-8')
st.download_button(
label=f"Download genes similar to {query} in .csv format",
data=csv2,
file_name='clotting_sim2.csv',
mime='text/csv'
)
json2 = df1.head(50).to_json(index=True).encode('utf-8')
st.download_button(
label=f"Download words similar to {query} in .js format",
data=json2,
file_name='clotting_sim1.js',
mime='json'
)
print(df1.head(10))
df1.head(50).to_csv("clotting_sim2.csv", index=True)
df1.head(50).to_json("clotting_sim2.js", index=True)
print()
st.header(f"Similar Genes to {query}")
st.write(df1.head(50))
# arrow_dataset = Dataset.from_pandas(df1.head(50))
# arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2")
# arrow_dataset_reloaded = load_from_disk('sim2.js')
# arrow_dataset_reloaded