Spaces:

Luxoove
/

ArxivClassifier

Sleeping

ubuntu commited on Apr 17, 2023

Commit

3091067

•

1 Parent(s): bfa4bdd

MVP

Files changed (6) hide show

app.py CHANGED Viewed

@@ -1,7 +1,18 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

 import gradio as gr
+from load_model import load_model, predict_probs
+from constants import title, description, examples
+model = load_model()
+demo = gr.Interface(
+    fn=lambda text: predict_probs(model, text),
+    inputs=gr.Textbox(label='News article title and description'),
+    outputs=gr.Label(num_top_classes=4),
+    examples=examples,
+    allow_flagging='never',
+    title=title,
+    description=description
+)
+demo.launch()

constants.py ADDED Viewed

+title = 'Demo of News Classifier'
+description='This is the demo of News Classifier. You can submit your news title and description and NN will classify it into 4 classes: World, Sports, Business and Sci/Tech'
+examples = [
+    'Yandex School of Data Analysis is cool!',
+    "Five Killed in Al Qaeda Jailbreak in Kabul KABUL (Reuters) - Three Afghan prison guards and two prisoners were killed in a jail break attempt by al Qaeda inmates Friday and a shoot-out was going on between police and another two, the chief of Kabul's Pul-i-Charki prison told Reuters.",
+    'Olympic history for India, UAE An Indian army major shot his way to his country #39;s first ever individual Olympic silver medal on Tuesday, while in the same event an member of Dubai #39;s ruling family became the first ever medallist from the United Arab Emirates.',
+    "Fears for T N pension after talks Unions representing workers at Turner Newall say they are 'disappointed' after talks with stricken parent firm Federal Mogul.'"
+    'Monster Mashes Attract Masses Kaiju Big Battel -- a multimedia event in which costumed combatants spew toxic ooze on audience members -- is growing in popularity. There are already dedicated websites and a DVD series. Coming next: a book and TV pilot. By Xeni Jardin.',
+    '<script> alert("I love ML"); </script>',
+]
+__all__ = ['title', 'description', 'examples']

load_model.py ADDED Viewed

+import transformers
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased")
+def predict_probs(model, text,
+                  labels=['World', 'Sports', 'Business', 'Sci/Tech']):
+    with torch.no_grad():
+        tokens = tokenizer(text, padding="max_length", truncation=True, return_tensors='pt').to(device)
+        logits = model(**tokens).logits
+        probs = torch.nn.functional.softmax(logits)[0]
+    return {labels[i]: float(probs[i]) for i in range(min(len(probs), len(labels)))}
+def load_model(labels_count=4):
+    model = AutoModelForSequenceClassification.from_pretrained("pretrained_acc935/", num_labels=labels_count).to(device)
+    return model
+__all__ = ['predict_probs', 'load_model']

pretrained_acc935/config.json ADDED Viewed

+{
+  "_name_or_path": "/content/drive/MyDrive/checkpoint-2000",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "vocab_size": 28996
+}

pretrained_acc935/pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:176060d9274dd8947e999474862dc04e8eae26ce55c465d56c73b1b0f23d41d3
+size 263173805

pretrained_acc935/training_args.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:197660446f201b2c7ee748857bb121f05f9102745742686f5f1a77d71930bb92
+size 3579