A model for predicting the category of news article

Usage:

import re
from transformers import pipeline
from html import unescape
from unicodedata import normalize

re_multispace = re.compile(r"\s+")

def normalize_text(text):
    if text == None:
        return None

    text = text.strip()
    text = text.replace("\n", " ")
    text = text.replace("\t", " ")
    text = text.replace("\r", " ")
    text = re_multispace.sub(" ", text)
    text = unescape(text)
    text = normalize("NFKC", text)
    return text


model = pipeline(task="text-classification",
 model=f"hynky/Category", tokenizer="ufal/robeczech-base",
 truncation=True, max_length=512,
 top_k=5
)


def predict(article):
    article = normalize_text(article)
    predictions = model(article)

predict("Dnes v noci bude pršet.")