patrickvonplaten commited on
Commit
b5dcb77
1 Parent(s): 72a5961
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -4,12 +4,13 @@ import glob
4
  import shutil
5
  import os
6
  import tqdm
 
7
  import tempfile
8
  import re
9
  import pdfminer
10
 
11
- print("pdfminer", print(pdfminer.__version__))
12
- print("pandoc", print(pypandoc.__version__))
13
 
14
  #from docx import Document
15
  #document = Document()
@@ -98,11 +99,12 @@ def main(filename, codewords_mapping):
98
  f.write(out)
99
 
100
  def convert(*keywords):
 
101
  codewords_mapping = {k: v for k,v in zip(CATEGORIES, keywords)}
102
 
103
  num_files = 0
104
 
105
- return "_".join(codewords_mapping.keys())
106
 
107
  for folder in tqdm.tqdm(glob.glob("./*")):
108
  shutil.rmtree(RESULTS_FOLDER, ignore_errors=True)
 
4
  import shutil
5
  import os
6
  import tqdm
7
+ from huggingface_hub import snapshot_download
8
  import tempfile
9
  import re
10
  import pdfminer
11
 
12
+ print("pdfminer", pdfminer.__version__)
13
+ print("pandoc", pypandoc.__version__)
14
 
15
  #from docx import Document
16
  #document = Document()
 
99
  f.write(out)
100
 
101
  def convert(*keywords):
102
+ # cached_folder = snapshot_download("claudiag/atlas", token=os.environ.get("HF_TOKEN"))
103
  codewords_mapping = {k: v for k,v in zip(CATEGORIES, keywords)}
104
 
105
  num_files = 0
106
 
107
+ return "_".join(codewords_mapping.values())
108
 
109
  for folder in tqdm.tqdm(glob.glob("./*")):
110
  shutil.rmtree(RESULTS_FOLDER, ignore_errors=True)