File size: 1,596 Bytes
e41540d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import streamlit as st
import time
import json
from gensim.models import Word2Vec
import pandas as pd


query = st.text_input("Enter a word")
# query = input ("Enter your keyword(s):")

if query:
    model = Word2Vec.load("pubmed_model_clotting")  # you can continue training with the loaded model!
    words = list(model.wv.key_to_index)
    X = model.wv[model.wv.key_to_index]
    model2 = model.wv[query]
    df = pd.DataFrame(X)


# def findRelationships(query, df):
    table = model.wv.most_similar_cosmul(query, topn=10000)
    table = (pd.DataFrame(table))
    table.index.name = 'Rank'
    table.columns = ['Word', 'SIMILARITY']
    print()
    print("Similarity to " + str(query))
    pd.set_option('display.max_rows', None)
    print(table.head(100))
    table.head(10).to_csv("clotting_sim1.csv", index=True)
    st.header(f"Similar Words to {query}")
    st.write(table.head(50))
    #
    print()
    print("Human genes similar to " + str(query))
    df1 = table
    df2 = pd.read_csv('Human_Genes.csv')
    m = df1.Word.isin(df2.symbol)
    df1 = df1[m]
    df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
    print(df1.head(10))
    print()
    df1.head(10).to_csv("clotting_sim2.csv", index=True, header=False)
    time.sleep(2)
    st.header(f"Similar Genes to {query}")
    st.write(table.head(50))


# findRelationships(query, df)







# model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
# similar_words = model.most_similar(word)
# output = json.dumps({"word": word, "similar_words": similar_words})
# st.write(output)