Duplicate from omkarb09/gpt2_sentiment
Browse filesCo-authored-by: Omkar Bhatkande <[email protected]>
- .gitattributes +35 -0
- README.md +14 -0
- app.py +72 -0
- requirements.txt +3 -0
- saved_weights/GPT2_sentiment.data-00000-of-00001 +3 -0
- saved_weights/GPT2_sentiment.index +0 -0
- saved_weights/checkpoint +2 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
saved_weights/GPT2_sentiment.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Gpt2 Sentiment
|
3 |
+
emoji: 💻
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
duplicated_from: omkarb09/gpt2_sentiment
|
12 |
+
---
|
13 |
+
|
14 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import math
|
3 |
+
import tensorflow as tf
|
4 |
+
from transformers import GPT2Tokenizer, TFGPT2Model
|
5 |
+
|
6 |
+
st.title("GPT2 Sentiment Analysis")
|
7 |
+
st.write('Model detects if a specific tweet has positive or negative sentiment')
|
8 |
+
tweet = st.text_input("Enter your tweet", '')
|
9 |
+
#st.write(f"Hello {name}!")
|
10 |
+
|
11 |
+
PAD_TOKEN = "<|pad|>"
|
12 |
+
EOS_TOKEN = "<|endoftext|>"
|
13 |
+
MAX_LENGTH=20
|
14 |
+
# this will download and initialize the pre trained tokenizer
|
15 |
+
tokenizer = GPT2Tokenizer.from_pretrained("gpt2",pad_token=PAD_TOKEN,eos_token=EOS_TOKEN,max_length=MAX_LENGTH,is_split_into_words=True)
|
16 |
+
|
17 |
+
model = TFGPT2Model.from_pretrained("gpt2", use_cache=False,pad_token_id=tokenizer.pad_token_id,eos_token_id=tokenizer.eos_token_id)
|
18 |
+
model.training = True
|
19 |
+
model.resize_token_embeddings(len(tokenizer))
|
20 |
+
|
21 |
+
for layer in model.layers:
|
22 |
+
layer.trainable = False
|
23 |
+
|
24 |
+
input = tf.keras.layers.Input(shape=(None,), dtype='int32')
|
25 |
+
mask = tf.keras.layers.Input(shape=(None,), dtype='int32')
|
26 |
+
x = model(input, attention_mask=mask)
|
27 |
+
#x = x.last_hidden_state[:, -1]
|
28 |
+
x = tf.reduce_mean(x.last_hidden_state, axis=1)
|
29 |
+
x = tf.keras.layers.Dense(16, activation='relu')(x)
|
30 |
+
x = tf.keras.layers.Dropout(0.3)(x)
|
31 |
+
output = tf.keras.layers.Dense(2, activation='softmax')(x)
|
32 |
+
|
33 |
+
clf = tf.keras.Model([input, mask], output)
|
34 |
+
|
35 |
+
clf.load_weights('./saved_weights/GPT2_sentiment')
|
36 |
+
|
37 |
+
#text="@newedge thanks for the follow, and the new icon looks great"
|
38 |
+
|
39 |
+
sample_text=[tweet]
|
40 |
+
EOS_TOKEN = "<|endoftext|>"
|
41 |
+
sample_text=[str(ex) + EOS_TOKEN for ex in sample_text]
|
42 |
+
|
43 |
+
sample_text_ = [tokenizer(str(x), return_tensors='tf', max_length=MAX_LENGTH, truncation=True, pad_to_max_length=True, add_special_tokens=True)['input_ids'] for x in sample_text]
|
44 |
+
|
45 |
+
sample_text_mask_ = [tokenizer(str(x), return_tensors='tf', max_length=MAX_LENGTH, truncation=True, pad_to_max_length=True, add_special_tokens=True)["attention_mask"] for x in sample_text]
|
46 |
+
|
47 |
+
pred = clf.predict([sample_text_, sample_text_mask_])
|
48 |
+
|
49 |
+
#pred_out = tf.math.argmax(pred, axis=1)
|
50 |
+
#pred_out=pred_out.numpy()
|
51 |
+
|
52 |
+
#st.write(f"Hello {sample_text[0]}!")
|
53 |
+
positive = round(pred[0][1],4)
|
54 |
+
negative = round(pred[0][0],4)
|
55 |
+
|
56 |
+
st.write(f"Positive Sentiment Prediction: {positive}")
|
57 |
+
st.write(f"Negative Sentiment Prediction: {negative}")
|
58 |
+
|
59 |
+
st.header('Below samples are outside of train or test data')
|
60 |
+
st.header('Sample Positive Sentiment Tweets')
|
61 |
+
st.write(f"watchin Espn's First Take! my favorite mornin show! lol Skip is great tv! fyi Im a Witness!")
|
62 |
+
st.write(f"I'm eating cheezits...with TWO flavors! sharp cheddar & parmesan. :-D")
|
63 |
+
st.write(f"Just drunk a coffe,but I'm still sleeping lol...now drink a fresh lemonade and eat some marshmallows mmm...then study guitar!")
|
64 |
+
st.write(f"On way home blasting mcfly in the back of the car in the sun good times ")
|
65 |
+
st.write(f"@AshenDestiny Just had a look at ur updates..quite thoughtful ones..")
|
66 |
+
|
67 |
+
st.header('Sample Negative Sentiment Tweets')
|
68 |
+
st.write(f"Man, I so desperately do NOT want to be doing this freelance work. Unfortunately, it looks like I'll be doing it the rest of the weekend.")
|
69 |
+
st.write(f"Is watching ripley's believe it or not. Totally bored.")
|
70 |
+
st.write(f"not been able to tweet today at my dads and my sister had taken over the laptop, i was going to use my phone but it took all my credit :O")
|
71 |
+
st.write(f"Ughh I hate being broke does anyone know of any jobs??")
|
72 |
+
st.write(f"Just realized I will miss th destroy build destroy premiere tonight. I have failed @AndrewWK.")
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.19.0
|
2 |
+
tensorflow==2.10.1
|
3 |
+
transformers==4.26.1
|
saved_weights/GPT2_sentiment.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a00fcb28f85cf50e52d97dcf054bf6a80323e60145f11e9735859bd50a7737a6
|
3 |
+
size 806725585
|
saved_weights/GPT2_sentiment.index
ADDED
Binary file (11.4 kB). View file
|
|
saved_weights/checkpoint
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
model_checkpoint_path: "GPT2_sentiment"
|
2 |
+
all_model_checkpoint_paths: "GPT2_sentiment"
|