Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
from datetime import datetime | |
import demoji | |
from huggingface_hub import CommitScheduler | |
from pathlib import Path | |
import re | |
from transformers import pipeline | |
from uuid import uuid4 | |
#based on https://huggingface.co/spaces/Wauplin/space_to_dataset_saver/blob/main/app_json.py | |
#data is saved at https://huggingface.co/datasets/MR17u/tweeteval-irony-mcc/tree/main | |
JSON_DATASET_DIR = Path("json_dataset") | |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True) | |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"data-{uuid4()}.json" | |
CLS_MODEL_NAME = "PierreEpron/tweeteval-irony-mcc" | |
scheduler = CommitScheduler( | |
repo_id="tweeteval-irony-mcc", | |
repo_type="dataset", | |
folder_path=JSON_DATASET_DIR, | |
path_in_repo="data", | |
) | |
classifier = pipeline(model = CLS_MODEL_NAME, tokenizer = 'cardiffnlp/twitter-roberta-large-2022-154m') | |
def clean_brackets(text): | |
return text.replace('{', '(').replace('}', ')') | |
def clean_emojis(text, type:str = ''): | |
if type=='rem': | |
return demoji.replace(text, '') | |
elif type!='keep': | |
return demoji.replace_with_desc(text, type) | |
else: | |
return text | |
def clean_hashtags(text, hashtags=['#irony', '#sarcasm','#not']): | |
for hashtag in hashtags: | |
text = re.sub(hashtag, '', text, flags=re.I) | |
return re.sub(r' +', r' ', text) | |
def clean_text(text): | |
return re.sub(' {2,}', ' ',clean_emojis(clean_hashtags(clean_brackets(text)))).strip() | |
def save_json(entry: str, result) -> None: | |
with scheduler.lock: | |
with JSON_DATASET_PATH.open("a") as f: | |
result = json.loads(result.replace("'",'"'))[0] | |
json.dump({"entry": entry, "label": result['label'], "score": result['score'], "datetime": datetime.now().isoformat()}, f) | |
f.write("\n") | |
def classif(text: str): | |
return classifier(clean_text(text)) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
entry = gr.Textbox(label="Input") | |
result = gr.Textbox(label="Classification") | |
input_btn = gr.Button("Submit") | |
input_btn.click(fn=classif, inputs=entry, outputs=result).success( | |
fn=save_json, | |
inputs=[entry, result], | |
outputs=None | |
) | |
demo.launch() |