Spaces:

Sj8287
/

Sentiment_Classification

Runtime error

App Files Files Community

Sj8287 commited on May 6, 2023

Commit

bd7db97

•

0 Parent(s):

Duplicate from Sj8287/Sentiment_Classification

Browse files

Files changed (16) hide show

.gitattributes +36 -0
Dockerfile +29 -0
README.md +13 -0
app/__init__.py +3 -0
app/__pycache__/__init__.cpython-39.pyc +0 -0
app/__pycache__/routes.cpython-39.pyc +0 -0
app/routes.py +99 -0
app/templates/index.html +122 -0
distilbert_base_uncased/special_tokens_map.json +7 -0
distilbert_base_uncased/tokenizer.json +0 -0
distilbert_base_uncased/tokenizer_config.json +14 -0
distilbert_base_uncased/vocab.txt +0 -0
distilbert_model_weights.best.hdf5 +3 -0
requirements.txt +36 -0
run.py +4 -0
train_150k.txt +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,36 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+train_150k.txt filter=lfs diff=lfs merge=lfs -text
+distilbert_model_weights.best.hdf5 filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,29 @@

+FROM python:3.9.16
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/code
+COPY . ./code
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/code
+EXPOSE 5000
+CMD ["python", "/home/user/code/run.py"]

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Sentiment Classification
+emoji: 🚀
+colorFrom: blue
+colorTo: red
+sdk: docker
+pinned: false
+license: mit
+app_port: 5000
+duplicated_from: Sj8287/Sentiment_Classification
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from flask import Flask,render_template
+app=Flask(__name__)
+from app import routes

app/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (270 Bytes). View file

app/__pycache__/routes.cpython-39.pyc ADDED Viewed

Binary file (3.72 kB). View file

app/routes.py ADDED Viewed

	@@ -0,0 +1,99 @@

+from flask import render_template,redirect,url_for,flash,request
+from wtforms.validators import ValidationError
+from app import app
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+from keras.layers import Input, Dense, LSTM, GRU, Embedding
+from keras.layers import Activation, Bidirectional, GlobalMaxPool1D, GlobalMaxPool2D, Dropout
+from keras.models import Model
+from keras.preprocessing import text, sequence
+import transformers
+from transformers import AutoTokenizer
+from tokenizers import BertWordPieceTokenizer
+from keras.initializers import Constant
+import numpy as np
+import re
+import tensorflow as tf
+import os
+@app.route('/')
+def home_page():
+    return render_template('index.html')
+tokenizer = transformers.AutoTokenizer.from_pretrained("distilbert-base-uncased")
+fast_tokenizer = BertWordPieceTokenizer('distilbert_base_uncased/vocab.txt', lowercase=True)
+def fast_encode_sentence(text, tokenizer, maxlen=128):
+    tokenizer.enable_truncation(max_length=maxlen)
+    tokenizer.enable_padding(length=maxlen)
+    all_ids = []
+    text_chunk = text
+    encs = tokenizer.encode(text_chunk)
+    all_ids.extend([encs.ids])
+    return np.array(all_ids)
+transformer_layer = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased')
+embedding_size = 128
+inp = Input(shape=(128, ))
+embedding_matrix=transformer_layer.weights[0].numpy()
+x = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],embeddings_initializer=Constant(embedding_matrix),trainable=False)(inp)
+x = Bidirectional(LSTM(25, return_sequences=True,recurrent_regularizer='L1L2'))(x)
+x = GlobalMaxPool1D()(x)
+x = Dropout(0.9)(x)
+x = Dense(50, activation='relu',kernel_initializer='he_normal',kernel_regularizer="L1L2")(x)
+x = Dropout(0.9)(x)
+x = Dense(1, activation='sigmoid')(x)
+model = Model(inputs=[inp], outputs=x)
+model.load_weights('distilbert_model_weights.best.hdf5')
+def predict_on_sentence(model,text):
+  text=text.lower()
+  pattern = re.compile('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
+  text = pattern.sub('', text)
+  text = re.sub(r"i'm", "i am", text)
+  text = re.sub(r"he's", "he is", text)
+  text = re.sub(r"she's", "she is", text)
+  text = re.sub(r"that's", "that is", text)
+  text = re.sub(r"what's", "what is", text)
+  text = re.sub(r"where's", "where is", text)
+  text = re.sub(r"\'ll", " will", text)
+  text = re.sub(r"\'ve", " have", text)
+  text = re.sub(r"\'re", " are", text)
+  text = re.sub(r"\'d", " would", text)
+  text = re.sub(r"\'ve", " have", text)
+  text = re.sub(r"won't", "will not", text)
+  text = re.sub(r"don't", "do not", text)
+  text = re.sub(r"did't", "did not", text)
+  text = re.sub(r"can't", "can not", text)
+  text = re.sub(r"it's", "it is", text)
+  text = re.sub(r"couldn't", "could not", text)
+  text = re.sub(r"have't", "have not", text)
+  text=re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", text)
+  text = re.sub(r"[,.\"!@#$%^&*(){}?/;`~:<>+=-]", "", text)
+  text = re.sub(r'(.)\1{3,}',r'\1', text)
+  final_text=fast_encode_sentence(text,fast_tokenizer)
+  prediction=model.predict(final_text)
+  final_text=tf.squeeze(tf.round(prediction))
+  return final_text
+@app.route('/predict',methods=['POST'])
+def predict():
+    int_features = request.form.get("sentence")
+    int_features=str(int_features)
+    final_result=predict_on_sentence(model,int_features)
+    result='bad'
+    if(final_result==1):
+        result='good'
+    return render_template('index.html', prediction_text='This is a {} comment'.format(result))

app/templates/index.html ADDED Viewed

	@@ -0,0 +1,122 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <!-- Required meta tags -->
+    <meta charset="utf-8" />
+    <meta
+      name="viewport"
+      content="width=device-width, initial-scale=1, shrink-to-fit=no"
+    />
+    <!-- Bootstrap CSS -->
+    <link
+      rel="stylesheet"
+      href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
+      integrity="sha384-TX8t27EcRE3e/ihU7zmQxVncDAy5uIKz4rEkgIXeMed4M0jlfIDPvg6uqKI2xXr2"
+      crossorigin="anonymous"
+    />
+    <title>Sentiment Classification</title>
+  </head>
+  <body>
+    <!DOCTYPE html>
+    <html lang="en">
+      <head>
+        <!-- Required meta tags -->
+        <meta charset="utf-8" />
+        <meta
+          name="viewport"
+          content="width=device-width, initial-scale=1, shrink-to-fit=no"
+        />
+        <!-- Bootstrap CSS -->
+        <link
+          rel="stylesheet"
+          href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css"
+          integrity="sha384-TX8t27EcRE3e/ihU7zmQxVncDAy5uIKz4rEkgIXeMed4M0jlfIDPvg6uqKI2xXr2"
+          crossorigin="anonymous"
+        />
+        <title>{% block title %} {% endblock %}</title>
+      </head>
+      <body>
+        <div class="login">
+          <br><br><br>
+          <center><h1>Sentiment Classification</h1></center>
+          <br><br><br>
+          <!-- Main Input For Receiving Query to our ML -->
+          <div class="form-outline w-100">
+          <center><form action="{{ url_for('predict')}}" method="post">
+            <input
+              type="text"
+              name="sentence"
+              placeholder="Insert your text"
+              required="required"
+            /></center></div>
+            <br>
+            <center><button type="submit" class="btn btn-primary btn-lg">
+              Predict
+            </button></center>
+          </form>
+          <br />
+          <br />
+          <center>{{ prediction_text }}</center>
+        </div>
+        <!-- Future Content here -->
+        <!-- Optional JavaScript -->
+        <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+        <script src="https://kit.fontawesome.com/a076d05399.js"></script>
+        <script
+          src="https://code.jquery.com/jquery-3.5.1.slim.min.js"
+          integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj"
+          crossorigin="anonymous"
+        ></script>
+        <script
+          src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js"
+          integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN"
+          crossorigin="anonymous"
+        ></script>
+        <script
+          src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"
+          integrity="sha384-B4gt1jrGC7Jh4AgTPSdUtOBvfO8shuf57BaghqFfPlYxofvL8/KUEfYiJOMMV+rV"
+          crossorigin="anonymous"
+        ></script>
+      </body>
+      <style>
+        body {
+          background-color: #212121;
+          color: white;
+        }
+      </style>
+    </html>
+    <!-- Future Content here -->
+    <!-- Optional JavaScript -->
+    <!-- jQuery first, then Popper.js, then Bootstrap JS -->
+    <script src="https://kit.fontawesome.com/a076d05399.js"></script>
+    <script
+      src="https://code.jquery.com/jquery-3.5.1.slim.min.js"
+      integrity="sha384-DfXdz2htPH0lsSSs5nCTpuj/zy4C+OGpamoFVy38MVBnE+IbbVYUew+OrCXaRkfj"
+      crossorigin="anonymous"
+    ></script>
+    <script
+      src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js"
+      integrity="sha384-9/reFTGAW83EW2RDu2S0VKaIzap3H66lZH81PoYlFhbGU+6BZp6G7niu735Sk7lN"
+      crossorigin="anonymous"
+    ></script>
+    <script
+      src="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/js/bootstrap.min.js"
+      integrity="sha384-B4gt1jrGC7Jh4AgTPSdUtOBvfO8shuf57BaghqFfPlYxofvL8/KUEfYiJOMMV+rV"
+      crossorigin="anonymous"
+    ></script>
+  </body>
+  <style>
+    body {
+      h1,h2,h3,h4,h5,h6{
+      font-family: 'Montserrat', sans-serif;
+                        }
+      background-color: #212121;
+      color: white;
+    }
+  </style>
+</html>

distilbert_base_uncased/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

distilbert_base_uncased/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

distilbert_base_uncased/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "name_or_path": "distilbert-base-uncased",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

distilbert_base_uncased/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

distilbert_model_weights.best.hdf5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2dc6fc767307f67255cb77c5e42722c3465d189dc7ee004e0d6a67fbe1b4e1e7
+size 94436392

requirements.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+charset-normalizer==3.1.0
+click==8.1.3
+Flask==2.2.3
+Flask-WTF==1.1.1
+flatbuffers==23.3.3
+fonttools==4.25.0
+gast==0.4.0
+google-auth==2.16.2
+google-auth-oauthlib==0.4.6
+grpcio==1.51.3
+h5py==3.8.0
+keras==2.10.0
+Keras-Preprocessing==1.1.2
+libclang==15.0.6.1
+Markdown==3.4.1
+matplotlib==3.7.1
+numpy==1.24.2
+oauthlib==3.2.2
+opt-einsum==3.3.0
+packaging==23.0
+pandas==1.5.3
+requests==2.28.2
+requests-oauthlib==1.3.1
+rsa==4.9
+scikit-learn==1.2.2
+scipy==1.10.1
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.10.0
+tensorflow-estimator==2.10.0
+tensorflow-io-gcs-filesystem==0.31.0
+termcolor==2.2.0
+tokenizers
+transformers
+typing_extensions==4.5.0
+WTForms==3.0.1

run.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from app import app
+if __name__=='__main__':
+    app.run(debug=True,host='0.0.0.0',port=5000)

train_150k.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75d03b31023bcccf255ae7228992a1626ac01d29cb3ebb588344b8a0d0ea7ccc
+size 11696248