sivan22 commited on
Commit
c1fdc9d
1 Parent(s): e710286

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. app.py +27 -44
  2. requirements.txt +0 -1
  3. run.bat +1 -0
  4. test42.db +2 -2
  5. upload to hub.py +8 -0
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import streamlit as st
2
  from streamlit.logger import get_logger
3
- import gematriapy
4
  from timeit import default_timer as timer
5
  import sqlite3
6
- import ast
7
  import pandas as pd
8
 
9
  LOGGER = get_logger(__name__)
@@ -13,41 +11,21 @@ def preprocess(s:str)->str:
13
 
14
  @st.cache_resource
15
  def get_dfs()->object:
16
-
17
- def to_daf_long(i:int)->str:
18
- if i>0 and i<999:
19
- i+=1
20
- if i%2 ==0:
21
- return gematriapy.to_hebrew(i//2)+' עמוד א '
22
- else:
23
- return gematriapy.to_hebrew(i//2)+' עמוד ב'
24
- return i
25
-
26
- def gematria(i)->str:
27
- if type(i) == int and i>0 and i<999:
28
- return gematriapy.to_hebrew(i) + ' '
29
- else: return i if type(i)==str else ''
30
-
31
  print('hello from get_dfs..')
32
-
33
  # //get the books table//
34
-
35
  # Connect to the database
36
  conn = sqlite3.connect('test42.db')
37
 
38
  # Query the database and retrieve the results
39
- cursor = conn.execute("SELECT * FROM books")
40
  results = cursor.fetchall()
41
 
42
  # Convert the query results into a Pandas DataFrame
43
- books = pd.DataFrame(list(results))
44
- books.columns=list(map(lambda x: x[0], cursor.description))
45
 
46
- # convert the array format string "["Section","Section"]" that came from the database into a real array [Section,Section]
47
- books['heSectionNames']=books['heSectionNames'].apply(lambda x: ast.literal_eval(x) if x is not None else [''] )
48
-
49
  # //get the texts table//
50
-
51
  # Query the database and retrieve the results
52
  cursor = conn.execute("SELECT * FROM texts")
53
  results = cursor.fetchall()
@@ -55,30 +33,30 @@ def get_dfs()->object:
55
  # Convert the query results into a Pandas DataFrame
56
  texts = pd.DataFrame(results)
57
  texts.columns=list(map(lambda x: x[0], cursor.description))
 
 
 
 
 
58
 
59
- # // get the table that includes the titles//
60
- # Query the database and retrieve the results
61
- cursor = conn.execute("SELECT * FROM titles")
 
 
 
62
  results = cursor.fetchall()
63
 
64
  # Convert the query results into a Pandas DataFrame
65
- titles = pd.DataFrame(results)
66
- titles.columns=list(map(lambda x: x[0], cursor.description))
67
- # merge the texts with the original books table (without the extra hebrew titles)
68
- merged = pd.merge(texts,books,how='inner',left_on='bid',right_on='_id')
69
-
70
- #convert the Talmud marks (1,2,3...) into dafs (א עמוד א..)
71
- has_dafs = merged.loc[merged['heSectionNames'].apply(lambda x: True if len(x)>1 and x[-2] == 'דף' else False)==True]
72
- merged.loc[has_dafs.index,'level2'] = has_dafs['level2'].map(to_daf_long)
73
 
74
- # create a reference text, for exapmle: רש"י על בראשית פרק א פסוק א
75
- merged['ref_text_long']= merged['heTitle'] + ' ' + \
76
- merged['heSectionNames'].map(lambda x:x[-4] + ' ' if len(x)>3 else "") + merged['level4'].map(gematria) + \
77
- merged['heSectionNames'].map(lambda x:x[-3] + ' ' if len(x)>2 else "") + merged['level3'].map(gematria) + \
78
- merged['heSectionNames'].map(lambda x:x[-2] + ' ' if len(x)>1 else "") + merged['level2'].map(gematria)
79
 
80
  titles_df = titles
81
- texts_df = merged
82
  return titles_df, texts_df
83
 
84
 
@@ -137,8 +115,13 @@ def run():
137
  results = find_ref(titles_df,texts_df,user_input,top_k,num_of_results,algorithm)
138
  time = f"finished in {1e3*(timer()-time0):.1f} ms"
139
  st.write(time)
140
- for result in results:
 
141
  st.write(result)
 
 
 
 
142
 
143
  if __name__ == "__main__":
144
  run()
 
1
  import streamlit as st
2
  from streamlit.logger import get_logger
 
3
  from timeit import default_timer as timer
4
  import sqlite3
 
5
  import pandas as pd
6
 
7
  LOGGER = get_logger(__name__)
 
11
 
12
  @st.cache_resource
13
  def get_dfs()->object:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  print('hello from get_dfs..')
15
+
16
  # //get the books table//
 
17
  # Connect to the database
18
  conn = sqlite3.connect('test42.db')
19
 
20
  # Query the database and retrieve the results
21
+ cursor = conn.execute("SELECT * FROM titles")
22
  results = cursor.fetchall()
23
 
24
  # Convert the query results into a Pandas DataFrame
25
+ titles = pd.DataFrame(results)
26
+ titles.columns=list(map(lambda x: x[0], cursor.description))
27
 
 
 
 
28
  # //get the texts table//
 
29
  # Query the database and retrieve the results
30
  cursor = conn.execute("SELECT * FROM texts")
31
  results = cursor.fetchall()
 
33
  # Convert the query results into a Pandas DataFrame
34
  texts = pd.DataFrame(results)
35
  texts.columns=list(map(lambda x: x[0], cursor.description))
36
+
37
+ # //get the references database
38
+ # Query the database and retrieve the results
39
+ cursor = conn.execute("SELECT * FROM refs")
40
+ results = cursor.fetchall()
41
 
42
+ # Convert the query results into a Pandas DataFrame
43
+ refs = pd.DataFrame(results)
44
+ refs.columns=list(map(lambda x: x[0], cursor.description))
45
+
46
+ # Query the database and retrieve the results
47
+ cursor = conn.execute("SELECT * FROM books")
48
  results = cursor.fetchall()
49
 
50
  # Convert the query results into a Pandas DataFrame
51
+ books = pd.DataFrame(list(results))
52
+ books.columns=list(map(lambda x: x[0], cursor.description))
 
 
 
 
 
 
53
 
54
+ #merge the books and refs with the texts
55
+ merged = pd.merge(texts,books,how='inner',left_on='bid',right_on='_id')
56
+ texts_df = pd.merge(merged,refs,left_on='_id_x',right_on='tid')
 
 
57
 
58
  titles_df = titles
59
+
60
  return titles_df, texts_df
61
 
62
 
 
115
  results = find_ref(titles_df,texts_df,user_input,top_k,num_of_results,algorithm)
116
  time = f"finished in {1e3*(timer()-time0):.1f} ms"
117
  st.write(time)
118
+ buttons = []
119
+ for i, result in enumerate(results):
120
  st.write(result)
121
+ buttons.append(st.button("פתח " +result['ref'],i))
122
+ if buttons[i]:
123
+ st.write(texts_df.loc[texts_df['ref_text_long']==result['ref']][['heText','ref_text_long']])
124
+
125
 
126
  if __name__ == "__main__":
127
  run()
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
  gematriapy
2
  pandas
3
- rapidfuzz
 
1
  gematriapy
2
  pandas
 
run.bat ADDED
@@ -0,0 +1 @@
 
 
1
+ streamlit run app.py
test42.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76e5c2fa4efd1ec73ec3babf569b831182849d1ce1e46fdadbd2a6e54aa538c4
3
- size 2063155200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:022710c8c0e53a525b01fb59f33b88605c0c6c2989b86340bf85c77cb16f8556
3
+ size 2225819648
upload to hub.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ api = HfApi()
3
+
4
+ api.upload_folder(
5
+ folder_path="./",
6
+ repo_id="sivan22/sefaria-ref-finder",
7
+ repo_type="space",
8
+ )