jfataphd commited on
Commit
e5a12b8
1 Parent(s): 3f05b2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -50
app.py CHANGED
@@ -3,8 +3,9 @@ import time
3
  import json
4
  from gensim.models import Word2Vec
5
  import pandas as pd
6
- from datasets import load_dataset
7
- from datasets import Dataset
 
8
 
9
  # Define the HTML and CSS styles
10
  html_temp = """
@@ -20,8 +21,8 @@ st.markdown(html_temp, unsafe_allow_html=True)
20
  st.write("This is my Streamlit app with HTML and CSS formatting.")
21
 
22
  query = st.text_input("Enter a word")
23
- # query = input ("Enter your keyword(s):")
24
  query = query.lower()
 
25
 
26
  if query:
27
  model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
@@ -39,28 +40,31 @@ if query:
39
  print()
40
  print("Similarity to " + str(query))
41
  pd.set_option('display.max_rows', None)
42
-
43
- csv = table.head(50).to_csv(index=False).encode('utf-8')
44
- st.download_button(
45
- label=f"Download words similar to {query} in .csv format",
46
- data=csv,
47
- file_name='clotting_sim1.csv',
48
- mime='text/csv'
49
- )
50
-
51
- json = table.head(50).to_json(index=True).encode('utf-8')
52
- st.download_button(
53
- label=f"Download words similar to {query} in .js format",
54
- data=json,
55
- file_name='clotting_sim1.js',
56
- mime='json'
57
- )
58
-
59
- print(table.head(10))
60
- table.head(50).to_csv("clotting_sim1.csv", index=True)
61
- table.head(50).to_json("clotting_sim1.js", index=True)
62
  st.header(f"Similar Words to {query}")
63
- st.write(table.head(50))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  #
65
 
66
  print()
@@ -70,36 +74,44 @@ if query:
70
  m = df1.Word.isin(df2.symbol)
71
  df1 = df1[m]
72
  df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
73
-
74
- csv2 = df1.head(50).to_csv(index=False).encode('utf-8')
75
- st.download_button(
76
- label=f"Download genes similar to {query} in .csv format",
77
- data=csv2,
78
- file_name='clotting_sim2.csv',
79
- mime='text/csv'
80
- )
81
-
82
- json2 = df1.head(50).to_json(index=True).encode('utf-8')
83
- st.download_button(
84
- label=f"Download words similar to {query} in .js format",
85
- data=json2,
86
- file_name='clotting_sim1.js',
87
- mime='json'
88
- )
89
- print(df1.head(10))
90
- df1.head(50).to_csv("clotting_sim2.csv", index=True)
91
- df1.head(50).to_json("clotting_sim2.js", index=True)
92
  print()
 
 
93
  st.header(f"Similar Genes to {query}")
94
- st.write(df1.head(50))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
- # arrow_dataset = Dataset.from_pandas(df1.head(50))
98
- # arrow_dataset.save_to_disk("https://huggingface.co/datasets/jfataphd/word2vec_dataset/sim2")
99
 
100
- # arrow_dataset_reloaded = load_from_disk('sim2.js')
101
- # arrow_dataset_reloaded
102
-
103
-
104
 
 
 
 
 
105
 
 
3
  import json
4
  from gensim.models import Word2Vec
5
  import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import squarify
8
+ import numpy as np
9
 
10
  # Define the HTML and CSS styles
11
  html_temp = """
 
21
  st.write("This is my Streamlit app with HTML and CSS formatting.")
22
 
23
  query = st.text_input("Enter a word")
 
24
  query = query.lower()
25
+ # query = input ("Enter your keyword(s):")
26
 
27
  if query:
28
  model = Word2Vec.load("pubmed_model_clotting") # you can continue training with the loaded model!
 
40
  print()
41
  print("Similarity to " + str(query))
42
  pd.set_option('display.max_rows', None)
43
+ print(table.head(50))
44
+ table.head(10).to_csv("clotting_sim1.csv", index=True)
45
+ # short_table = table.head(50)
46
+ # print(table)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  st.header(f"Similar Words to {query}")
48
+
49
+ # calculate the sizes of the squares in the treemap
50
+ short_table = table.head(20)
51
+ short_table.index += 1
52
+ short_table.index = 1 / short_table.index
53
+ sizes = short_table.index.tolist()
54
+
55
+ cmap = plt.cm.Greens(np.linspace(0.05, .5, len(sizes)))
56
+ color = [cmap[i] for i in range(len(sizes))]
57
+
58
+ short_table.set_index('Word', inplace=True)
59
+ squarify.plot(sizes=sizes, label=short_table.index.tolist(), color=color, pad=.005, text_kwargs={'fontsize': 6})
60
+ # # plot the treemap using matplotlib
61
+ plt.axis('off')
62
+ fig = plt.gcf()
63
+ # # display the treemap in Streamlit
64
+ st.pyplot(fig)
65
+ plt.clf()
66
+
67
+ # st.write(short_table)
68
  #
69
 
70
  print()
 
74
  m = df1.Word.isin(df2.symbol)
75
  df1 = df1[m]
76
  df1.rename(columns={'Word': 'Human Gene'}, inplace=True)
77
+ df1["Human Gene"] = df1["Human Gene"].str.upper()
78
+ print(df1.head(50))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  print()
80
+ df1.head(50).to_csv("clotting_sim2.csv", index=True, header=False)
81
+ # time.sleep(2)
82
  st.header(f"Similar Genes to {query}")
83
+
84
+ df1 = df1.head(20)
85
+ df1.index = 1/df1.index
86
+ sizes = df1.index.tolist()
87
+
88
+ cmap2 = plt.cm.Blues(np.linspace(0.05, .5, len(sizes)))
89
+ color2 = [cmap2[i] for i in range(len(sizes))]
90
+
91
+ df1.set_index('Human Gene', inplace=True)
92
+ squarify.plot(sizes=sizes, label=df1.index.tolist(), color=color2, pad=.005, text_kwargs={'fontsize': 8})
93
+ #
94
+ # # plot the treemap using matplotlib
95
+
96
+ plt.axis('off')
97
+ fig2 = plt.gcf()
98
+ # plt.show()
99
+ #
100
+ # # display the treemap in Streamlit
101
+ st.pyplot(fig2)
102
+
103
+
104
+
105
+ # findRelationships(query, df)
106
+
107
+
108
+
109
 
110
 
 
 
111
 
 
 
 
 
112
 
113
+ # model = gensim.models.KeyedVectors.load_word2vec_format('pubmed_model_clotting', binary=True)
114
+ # similar_words = model.most_similar(word)
115
+ # output = json.dumps({"word": word, "similar_words": similar_words})
116
+ # st.write(output)
117