hynky HF staff commited on
Commit
0271ea4
1 Parent(s): 2ef7153

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -0
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A model for predicting the gender of author of news article
2
+ ## Usage:
3
+
4
+ ```
5
+ import re
6
+ from transformers import pipeline
7
+ from html import unescape
8
+ from unicodedata import normalize
9
+
10
+ re_multispace = re.compile(r"\s+")
11
+
12
+ def normalize_text(text):
13
+ if text == None:
14
+ return None
15
+
16
+ text = text.strip()
17
+ text = text.replace("\n", " ")
18
+ text = text.replace("\t", " ")
19
+ text = text.replace("\r", " ")
20
+ text = re_multispace.sub(" ", text)
21
+ text = unescape(text)
22
+ text = normalize("NFKC", text)
23
+ return text
24
+
25
+
26
+ model = pipeline(task="text-classification",
27
+ model=f"hynky/Gender", tokenizer="ufal/robeczech-base",
28
+ truncation=True, max_length=512,
29
+ top_k=5
30
+ )
31
+
32
+
33
+ def predict(article):
34
+ article = normalize_text(article)
35
+ predictions = model(article)
36
+
37
+ predict("Dnes v noci bude pršet.")