Spaces:

daniild71r
/

my_space

Runtime error

App Files Files Community

daniild71r commited on Mar 28, 2022

Commit

b9f69ed

•

1 Parent(s): f37dabc

app created

Browse files

Files changed (6) hide show

app.py +90 -2
cat_mapping.json +42 -0
cat_name_mapping.json +42 -0
final_model/config.json +109 -0
final_model/pytorch_model.bin +3 -0
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -1,4 +1,92 @@
 import streamlit as st
-st.markdown('### You are gay, man.')
-st.markdown('<img src=\'https://sun9-76.userapi.com/impg/Wsd9lR42hY-8Hl_u5sAuCuxAJZ_OoXZMz8XbGA/jGaZN7X3UrU.jpg?size=1368x1080&quality=96&sign=c64d55f33cb98694f6514df99f81c732&type=album\' width=\'30%\'>', unsafe_allow_html=True)

+import json
 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline
+from tokenizers import Tokenizer
+def fake_hash(x):
+    return 0
+@st.cache(hash_funcs={Tokenizer: fake_hash}, suppress_st_warning=True, allow_output_mutation=True)
+def initialize():
+    model_name = 'distilbert-base-cased'
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained('./final_model')
+    the_pipeline = TextClassificationPipeline(
+        model=model,
+        tokenizer=tokenizer,
+        return_all_scores=True,
+        device=-1
+    )
+    cat_mapping_file = open('cat_mapping.json', 'r')
+    cat_name_mapping_file = open('cat_name_mapping.json', 'r')
+    cat_mapping = json.load(cat_mapping_file)
+    cat_name_mapping = json.load(cat_name_mapping_file)
+    return the_pipeline, cat_mapping, cat_name_mapping
+def get_top(the_pipeline, cat_mapping, title, summary, thresh=0.95):
+    if title == '' or summary == '':
+        return 'Not enough data to compute.'
+    question = title + ' || ' + summary
+    if len(question) > 4000:
+        return 'Your input is supsiciously long, try something shorter.'
+    try:
+        result = the_pipeline(question)[0]
+        result.sort(key=lambda x: -x['score'])
+        current_sum = 0
+        scores = []
+        for score in result:
+            scores.append(score)
+            current_sum += score['score']
+            if current_sum >= thresh:
+                break
+        for i in range(len(result)):
+            result[i]['label'] = cat_mapping[result[i]['label'][6:]]
+        return scores
+    except BaseException:
+        return 'Something unexpected happened, I\'m sorry. Try again.'
+st.markdown('## Welcome to the CS article classification page!')
+st.markdown('### What\'s below is pretty much self-explanatory.')
+img_source = 'https://sun9-55.userapi.com/impg/azBQ_VTvbgEVonbL9hhFEpwyKAhjAtpVl4H2GQ/I4Vq0H6c3UM.jpg'
+img_params = 'size=1200x900&quality=96&sign=f42419d9cdbf6fe55016fb002e4e85ae&type=album'
+st.markdown(
+    f'<img src="{img_source}?{img_params}" width="70%"><br>',
+    unsafe_allow_html=True
+)
+title = st.text_input(
+    'Please, insert the title of the CS article you are interested in.',
+    placeholder='The title (e. g. Incorporating alien technologies in CV)'
+)
+summary = st.text_area(
+    'Now, please, insert the summary of the CS article you are interested in.',
+    height=250, placeholder='The summary itself.'
+)
+the_pipeline, cat_mapping, cat_name_mapping = initialize()
+scores = get_top(the_pipeline, cat_mapping, title, summary)
+if isinstance(scores, str):
+    st.markdown(scores)
+else:
+    for score in scores:
+        percent = round(score['score'] * 100, 2)
+        category_short = score['label']
+        category_full = cat_name_mapping[category_short]
+        st.markdown(f'I\'m {percent}\% certain that the article is from the {category_short} category, which is "{category_full}"')

cat_mapping.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+    "0": "cs.AI",
+    "1": "cs.AR",
+    "2": "cs.CC",
+    "3": "cs.CE",
+    "4": "cs.CG",
+    "5": "cs.CL",
+    "6": "cs.CR",
+    "7": "cs.CV",
+    "8": "cs.CY",
+    "9": "cs.DB",
+    "10": "cs.DC",
+    "11": "cs.DL",
+    "12": "cs.DM",
+    "13": "cs.DS",
+    "14": "cs.ET",
+    "15": "cs.FL",
+    "16": "cs.GL",
+    "17": "cs.GR",
+    "18": "cs.GT",
+    "19": "cs.HC",
+    "20": "cs.IR",
+    "21": "cs.IT",
+    "22": "cs.LG",
+    "23": "cs.LO",
+    "24": "cs.MA",
+    "25": "cs.MM",
+    "26": "cs.MS",
+    "27": "cs.NA",
+    "28": "cs.NE",
+    "29": "cs.NI",
+    "30": "cs.OH",
+    "31": "cs.OS",
+    "32": "cs.PF",
+    "33": "cs.PL",
+    "34": "cs.RO",
+    "35": "cs.SC",
+    "36": "cs.SD",
+    "37": "cs.SE",
+    "38": "cs.SI",
+    "39": "cs.SY"
+}

cat_name_mapping.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+    "cs.AI": "Artificial Intelligence",
+    "cs.AR": "Hardware Architecture",
+    "cs.CC": "Computational Complexity",
+    "cs.CE": "Computational Engineering, Finance, and Science",
+    "cs.CG": "Computational Geometry",
+    "cs.CL": "Computation and Language",
+    "cs.CR": "Cryptography and Security",
+    "cs.CV": "Computer Vision and Pattern Recognition",
+    "cs.CY": "Computers and Society",
+    "cs.DB": "Databases",
+    "cs.DC": "Distributed, Parallel, and Cluster Computing",
+    "cs.DL": "Digital Libraries",
+    "cs.DM": "Discrete Mathematics",
+    "cs.DS": "Data Structures and Algorithms",
+    "cs.ET": "Emerging Technologies",
+    "cs.FL": "Formal Languages and Automata Theory",
+    "cs.GL": "General Literature",
+    "cs.GR": "Graphics",
+    "cs.GT": "Computer Science and Game Theory",
+    "cs.HC": "Human-Computer Interaction",
+    "cs.IR": "Information Retrieval",
+    "cs.IT": "Information Theory",
+    "cs.LG": "Machine Learning",
+    "cs.LO": "Logic in Computer Science",
+    "cs.MA": "Multiagent Systems",
+    "cs.MM": "Multimedia",
+    "cs.MS": "Mathematical Software",
+    "cs.NA": "Numerical Analysis",
+    "cs.NE": "Neural and Evolutionary Computing",
+    "cs.NI": "Networking and Internet Architecture",
+    "cs.OH": "Other Computer Science",
+    "cs.OS": "Operating Systems",
+    "cs.PF": "Performance",
+    "cs.PL": "Programming Languages",
+    "cs.RO": "Robotics",
+    "cs.SC": "Symbolic Computation",
+    "cs.SD": "Sound",
+    "cs.SE": "Software Engineering",
+    "cs.SI": "Social and Information Networks",
+    "cs.SY": "Systems and Control"
+}

final_model/config.json ADDED Viewed

	@@ -0,0 +1,109 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12",
+    "13": "LABEL_13",
+    "14": "LABEL_14",
+    "15": "LABEL_15",
+    "16": "LABEL_16",
+    "17": "LABEL_17",
+    "18": "LABEL_18",
+    "19": "LABEL_19",
+    "20": "LABEL_20",
+    "21": "LABEL_21",
+    "22": "LABEL_22",
+    "23": "LABEL_23",
+    "24": "LABEL_24",
+    "25": "LABEL_25",
+    "26": "LABEL_26",
+    "27": "LABEL_27",
+    "28": "LABEL_28",
+    "29": "LABEL_29",
+    "30": "LABEL_30",
+    "31": "LABEL_31",
+    "32": "LABEL_32",
+    "33": "LABEL_33",
+    "34": "LABEL_34",
+    "35": "LABEL_35",
+    "36": "LABEL_36",
+    "37": "LABEL_37",
+    "38": "LABEL_38",
+    "39": "LABEL_39"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_13": 13,
+    "LABEL_14": 14,
+    "LABEL_15": 15,
+    "LABEL_16": 16,
+    "LABEL_17": 17,
+    "LABEL_18": 18,
+    "LABEL_19": 19,
+    "LABEL_2": 2,
+    "LABEL_20": 20,
+    "LABEL_21": 21,
+    "LABEL_22": 22,
+    "LABEL_23": 23,
+    "LABEL_24": 24,
+    "LABEL_25": 25,
+    "LABEL_26": 26,
+    "LABEL_27": 27,
+    "LABEL_28": 28,
+    "LABEL_29": 29,
+    "LABEL_3": 3,
+    "LABEL_30": 30,
+    "LABEL_31": 31,
+    "LABEL_32": 32,
+    "LABEL_33": 33,
+    "LABEL_34": 34,
+    "LABEL_35": 35,
+    "LABEL_36": 36,
+    "LABEL_37": 37,
+    "LABEL_38": 38,
+    "LABEL_39": 39,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.14.0",
+  "vocab_size": 28996
+}

final_model/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba0c18f88b4a29acdd7ff9db7f997edd994d454382b0eda2c134b2b5a6022cff
+size 263289073

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ torch
2	+ transformers