cointegrated's picture
add torch as dependency
c6948e2
raw
history blame contribute delete
No virus
1.41 kB
import pandas as pd
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
if torch.cuda.is_available():
model.cuda()
def text2toxicity(text, aggregate=True):
""" Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
with torch.no_grad():
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
if isinstance(text, str):
proba = proba[0]
if aggregate:
return 1 - proba.T[0] * (1 - proba.T[-1])
return proba
text = st.text_area('Введите текст', value='Пороть надо таких придурков!')
proba = text2toxicity(text, aggregate=False)
s = pd.Series(
proba.tolist() + [proba[0] * (1 - proba[-1])],
index=[
'Стиль НЕтоксичный',
'Есть оскорбление',
'Есть непотребство',
'Есть угроза',
'Смысл текста неприемлемый',
'Текст - ОК'
],
name='Оценка вероятности'
)
s