import streamlit as st import time import json from gensim.models import Word2Vec import pandas as pd query = st.text_input("Enter a word") # query = input ("Enter your keyword(s):") if query: model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model! words = list(model.wv.key_to_index) X = model.wv[model.wv.key_to_index] model2 = model.wv[query] df = pd.DataFrame(X) # def findRelationships(query, df): table = model.wv.most_similar_cosmul(query, topn=10000) table = (pd.DataFrame(table)) table.index.name = 'Rank' table.columns = ['Word', 'SIMILARITY'] print() print("Similarity to " + str(query)) pd.set_option('display.max_rows', None) print(table.head(100)) table.head(10).to_csv("clotting_sim1.csv", index=True) st.header(f"Similar Words to {query}") st.write(table.head(50)) # print() print("Human genes similar to " + str(query)) df1 = table df2 = pd.read_csv('Human_Genes.csv') m = df1.Word.isin(df2.symbol) df1 = df1[m] df1.rename(columns={'Word': 'Human Gene'}, inplace=True) print(df1.head(10)) print() df1.head(10).to_csv("clotting_sim2.csv", index=True, header=False) time.sleep(2) st.header(f"Similar Genes to {query}") st.write(table.head(50)) # findRelationships(query, df) # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True) # similar_words = model.most_similar(word) # output = json.dumps({"word": word, "similar_words": similar_words}) # st.write(output)