Grosy commited on
Commit
b1ede63
1 Parent(s): 48c155e

added alternatives,

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -46,7 +46,7 @@ def calculateEmbeddings(sentences,tokenizer,model):
46
  # explicit no operation hash function, because model and tokenizer are not going to change
47
  @st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None, transformers.models.bert.modeling_bert.BertModel: lambda _: None})
48
  def load_model_and_tokenizer():
49
- multilingual_checkpoint = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'
50
  tokenizer = AutoTokenizer.from_pretrained(multilingual_checkpoint)
51
  model = AutoModel.from_pretrained(multilingual_checkpoint)
52
  print(type(tokenizer))
@@ -58,7 +58,7 @@ model,tokenizer = load_model_and_tokenizer();
58
  raw_text_file = 'joint_text_filtered.md'
59
  all_sentences = load_raw_sentences(raw_text_file)
60
 
61
- embeddings_file = 'multibert_embedded.pt'
62
  all_embeddings = load_embeddings(embeddings_file)
63
 
64
 
@@ -68,7 +68,7 @@ st.caption('[HU] Adjon meg egy tetszőleges kifejezést és a rendszer visszaadj
68
 
69
 
70
 
71
- text_area_input_query = st.text_area('[HU] Beviteli mező - [EN] Query input',value='Mikor van a leadási hataridő?')
72
 
73
  if text_area_input_query:
74
  query_embedding = calculateEmbeddings([text_area_input_query],tokenizer,model)
 
46
  # explicit no operation hash function, because model and tokenizer are not going to change
47
  @st.cache(hash_funcs={transformers.models.bert.tokenization_bert_fast.BertTokenizerFast: lambda _: None, transformers.models.bert.modeling_bert.BertModel: lambda _: None})
48
  def load_model_and_tokenizer():
49
+ multilingual_checkpoint = 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' #alternative: SZTAKI-HLT/hubert-base-cc
50
  tokenizer = AutoTokenizer.from_pretrained(multilingual_checkpoint)
51
  model = AutoModel.from_pretrained(multilingual_checkpoint)
52
  print(type(tokenizer))
 
58
  raw_text_file = 'joint_text_filtered.md'
59
  all_sentences = load_raw_sentences(raw_text_file)
60
 
61
+ embeddings_file = 'multibert_embedded.pt' #alternative: hunbert_embedded.pt
62
  all_embeddings = load_embeddings(embeddings_file)
63
 
64
 
 
68
 
69
 
70
 
71
+ text_area_input_query = st.text_area('[HU] Beviteli mező - [EN] Query input',value='Mik a részfeladatok?')
72
 
73
  if text_area_input_query:
74
  query_embedding = calculateEmbeddings([text_area_input_query],tokenizer,model)