Spaces:
Runtime error
Runtime error
File size: 2,189 Bytes
2950aec eb06a8d 2950aec eb06a8d 2950aec 27ce743 2950aec 27ce743 2950aec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
import json
from datetime import datetime
import demoji
from huggingface_hub import CommitScheduler
from pathlib import Path
import re
from transformers import pipeline
from uuid import uuid4
#based on https://huggingface.co/spaces/Wauplin/space_to_dataset_saver/blob/main/app_json.py
#data is saved at https://huggingface.co/datasets/MR17u/tweeteval-irony-mcc/tree/main
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / f"data-{uuid4()}.json"
CLS_MODEL_NAME = "PierreEpron/tweeteval-irony-mcc"
scheduler = CommitScheduler(
repo_id="tweeteval-irony-mcc",
repo_type="dataset",
folder_path=JSON_DATASET_DIR,
path_in_repo="data",
)
classifier = pipeline(model = CLS_MODEL_NAME, tokenizer = 'cardiffnlp/twitter-roberta-large-2022-154m')
def clean_brackets(text):
return text.replace('{', '(').replace('}', ')')
def clean_emojis(text, type:str = ''):
if type=='rem':
return demoji.replace(text, '')
elif type!='keep':
return demoji.replace_with_desc(text, type)
else:
return text
def clean_hashtags(text, hashtags=['#irony', '#sarcasm','#not']):
for hashtag in hashtags:
text = re.sub(hashtag, '', text, flags=re.I)
return re.sub(r' +', r' ', text)
def clean_text(text):
return re.sub(' {2,}', ' ',clean_emojis(clean_hashtags(clean_brackets(text)))).strip()
def save_json(entry: str, result) -> None:
with scheduler.lock:
with JSON_DATASET_PATH.open("a") as f:
result = json.loads(result.replace("'",'"'))[0]
json.dump({"entry": entry, "label": result['label'], "score": result['score'], "datetime": datetime.now().isoformat()}, f)
f.write("\n")
def classif(text: str):
return classifier(clean_text(text))
with gr.Blocks() as demo:
with gr.Row():
entry = gr.Textbox(label="Input")
result = gr.Textbox(label="Classification")
input_btn = gr.Button("Submit")
input_btn.click(fn=classif, inputs=entry, outputs=result).success(
fn=save_json,
inputs=[entry, result],
outputs=None
)
demo.launch() |