jfataphd commited on
Commit
44363db
1 Parent(s): d6d28ab

Create word2vec_app.py

Browse files
Files changed (1) hide show
  1. word2vec_app.py +0 -71
word2vec_app.py CHANGED
@@ -1,71 +0,0 @@
1
- import streamlit as st
2
- import time
3
- import json
4
- from gensim.models import Word2Vec
5
- import pandas as pd
6
-
7
- # Define the HTML and CSS styles
8
- html_temp = """
9
- <div style="background-color:black;padding:10px">
10
- <h1 style="color:white;text-align:center;">My Streamlit App with HTML and CSS</h1>
11
- </div>
12
- """
13
-
14
- # Display the HTML and CSS styles
15
- st.markdown(html_temp, unsafe_allow_html=True)
16
-
17
- # Add some text to the app
18
- st.write("This is my Streamlit app with HTML and CSS formatting.")
19
-
20
- query = st.text_input("Enter a word")
21
- # query = input ("Enter your keyword(s):")
22
-
23
- if query:
24
- model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
25
- words = list(model.wv.key_to_index)
26
- X = model.wv[model.wv.key_to_index]
27
- model2 = model.wv[query]
28
- df = pd.DataFrame(X)
29
-
30
-
31
- # def findRelationships(query, df):
32
- table = model.wv.most_similar_cosmul(query, topn=10000)
33
- table = (pd.DataFrame(table))
34
- table.index.name = 'Rank'
35
- table.columns = ['Word', 'SIMILARITY']
36
- print()
37
- print("Similarity to " + str(query))
38
- pd.set_option('display.max_rows', None)
39
- print(table.head(100))
40
- table.head(10).to_csv("clotting_sim1.csv", index=True)
41
- st.header(f"Similar Words to {query}")
42
- st.write(table.head(50))
43
- #
44
- print()
45
- print("Human genes similar to " + str(query))
46
- df1 = table
47
- df2 = pd.read_csv('Human Genes.csv')
48
- m = df1.Word.isin(df2.symbol)
49
- df1 = df1[m]
50
- df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
51
- print(df1.head(10))
52
- print()
53
- df1.head(10).to_csv("clotting_sim2.csv", index=True, header=False)
54
- time.sleep(2)
55
- st.header(f"Similar Genes to {query}")
56
- st.write(table.head(50))
57
-
58
-
59
- # findRelationships(query, df)
60
-
61
-
62
-
63
-
64
-
65
-
66
-
67
- # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
68
- # similar_words = model.most_similar(word)
69
- # output = json.dumps({"word": word, "similar_words": similar_words})
70
- # st.write(output)
71
-