MARI-posa commited on
Commit
c3a0db9
1 Parent(s): 00eb8d7

Delete f.py

Browse files
Files changed (1) hide show
  1. f.py +0 -18
f.py DELETED
@@ -1,18 +0,0 @@
1
- pip install transformers sentencepiece
2
- pip install torch
3
- import torch
4
- from transformers import AutoTokenizer, AutoModel
5
- tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
6
- model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
7
- # model.cuda() # uncomment it if you have a GPU
8
-
9
- def embed_bert_cls(text, model, tokenizer):
10
- t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
11
- with torch.no_grad():
12
- model_output = model(**{k: v.to(model.device) for k, v in t.items()})
13
- embeddings = model_output.last_hidden_state[:, 0, :]
14
- embeddings = torch.nn.functional.normalize(embeddings)
15
- return embeddings[0].cpu().numpy()
16
-
17
- print(embed_bert_cls('привет мир', model, tokenizer).shape)
18
- # (312,)