add senticgcn & senticgcn-bert; readme updated
Browse files- README.md +245 -0
- senticgcn/config.json +14 -0
- senticgcn/embedding/embed_config.json +9 -0
- senticgcn/embedding/embed_pytorch_model.bin +3 -0
- senticgcn/pytorch_model.bin +3 -0
- senticgcn/senticnet.pickle +3 -0
- senticgcn/tokenizer/special_tokens_map.json +1 -0
- senticgcn/tokenizer/tokenizer_config.json +1 -0
- senticgcn/tokenizer/vocab.pkl +3 -0
- senticgcn_bert/config.json +15 -0
- senticgcn_bert/pytorch_model.bin +3 -0
- senticgcn_bert/senticnet.pickle +3 -0
README.md
CHANGED
@@ -1,3 +1,248 @@
|
|
1 |
---
|
|
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language: en
|
3 |
license: mit
|
4 |
+
datasets:
|
5 |
+
- acl-14-short-data
|
6 |
+
- semeval14
|
7 |
+
- semeval15
|
8 |
+
- semeval16
|
9 |
+
tags:
|
10 |
+
- text-classification
|
11 |
+
inference: false
|
12 |
+
model-index:
|
13 |
+
- name: SenticGCN
|
14 |
+
results:
|
15 |
+
- task:
|
16 |
+
type: text-classification
|
17 |
+
name: Sentic-GCN
|
18 |
+
dataset:
|
19 |
+
name: SemEval14-Laptop (Sentic-GCN)
|
20 |
+
type: semeval14
|
21 |
+
metrics:
|
22 |
+
- name: Accuracy
|
23 |
+
type: accuracy
|
24 |
+
value: 0.9436
|
25 |
+
- name: F1
|
26 |
+
type: f1
|
27 |
+
value: 0.9443
|
28 |
+
- task:
|
29 |
+
type: text-classification
|
30 |
+
name: Sentic-GCN
|
31 |
+
dataset:
|
32 |
+
name: SemEval14-Restaurant (Sentic-GCN)
|
33 |
+
type: semeval14
|
34 |
+
metrics:
|
35 |
+
- name: Accuracy
|
36 |
+
type: accuracy
|
37 |
+
value: 0.9455
|
38 |
+
- name: F1
|
39 |
+
type: f1
|
40 |
+
value: 0.9199
|
41 |
+
- task:
|
42 |
+
type: text-classification
|
43 |
+
name: Sentic-GCN
|
44 |
+
dataset:
|
45 |
+
name: SemEval15-Restaurant (Sentic-GCN)
|
46 |
+
type: semeval15
|
47 |
+
metrics:
|
48 |
+
- name: Accuracy
|
49 |
+
type: accuracy
|
50 |
+
value: 0.9675
|
51 |
+
- name: F1
|
52 |
+
type: f1
|
53 |
+
value: 0.9355
|
54 |
+
- task:
|
55 |
+
type: text-classification
|
56 |
+
name: Sentic-GCN
|
57 |
+
dataset:
|
58 |
+
name: SemEval16-Restaurant (Sentic-GCN)
|
59 |
+
type: semeval16
|
60 |
+
metrics:
|
61 |
+
- name: Accuracy
|
62 |
+
type: accuracy
|
63 |
+
value: 0.9922
|
64 |
+
- name: F1
|
65 |
+
type: f1
|
66 |
+
value: 0.9915
|
67 |
+
|
68 |
+
- name: SenticGCNBert
|
69 |
+
results:
|
70 |
+
- task:
|
71 |
+
type: text-classification
|
72 |
+
name: Sentic-GCN Bert
|
73 |
+
dataset:
|
74 |
+
name: SemEval14-Laptop (Sentic-GCN Bert)
|
75 |
+
type: semeval14
|
76 |
+
metrics:
|
77 |
+
- name: Accuracy
|
78 |
+
type: accuracy
|
79 |
+
value: 0.9922
|
80 |
+
- name: F1
|
81 |
+
type: f1
|
82 |
+
value: 0.9915
|
83 |
+
- task:
|
84 |
+
type: text-classification
|
85 |
+
name: Sentic-GCN Bert
|
86 |
+
dataset:
|
87 |
+
name: SemEval14-Restaurant (Sentic-GCN Bert)
|
88 |
+
type: semeval14
|
89 |
+
metrics:
|
90 |
+
- name: Accuracy
|
91 |
+
type: accuracy
|
92 |
+
value: 0.9739
|
93 |
+
- name: F1
|
94 |
+
type: f1
|
95 |
+
value: 0.9653
|
96 |
+
- task:
|
97 |
+
type: text-classification
|
98 |
+
name: Sentic-GCN Bert
|
99 |
+
dataset:
|
100 |
+
name: SemEval15-Restaurant (Sentic-GCN Bert)
|
101 |
+
type: semeval15
|
102 |
+
metrics:
|
103 |
+
- name: Accuracy
|
104 |
+
type: accuracy
|
105 |
+
value: 0.9917
|
106 |
+
- name: F1
|
107 |
+
type: f1
|
108 |
+
value: 0.9878
|
109 |
+
- task:
|
110 |
+
type: text-classification
|
111 |
+
name: Sentic-GCN Bert
|
112 |
+
dataset:
|
113 |
+
name: SemEval16-Restaurant (Sentic-GCN Bert)
|
114 |
+
type: semeval16
|
115 |
+
metrics:
|
116 |
+
- name: Accuracy
|
117 |
+
type: accuracy
|
118 |
+
value: 0.9937
|
119 |
+
- name: F1
|
120 |
+
type: f1
|
121 |
+
value: 0.9879
|
122 |
---
|
123 |
+
|
124 |
+
# Aspect-Based Sentiment Analysis
|
125 |
+
You can **test the model** at [aspect-based-sentiment-analysis](https://huggingface.co/spaces/aisingapore/aspect-based-sentiment-analysis).<br />
|
126 |
+
If you want to find out more information, please contact us at [SGNLP-AISingapore]([email protected]).
|
127 |
+
|
128 |
+
|
129 |
+
## Table of Contents
|
130 |
+
- [Model Details](#model-details)
|
131 |
+
- [How to Get Started With the Model](#how-to-get-started-with-the-model)
|
132 |
+
- [Training](#training)
|
133 |
+
- [Model Parameters](#parameters)
|
134 |
+
|
135 |
+
## Model Details
|
136 |
+
**Model Name:** Sentic-GCN
|
137 |
+
- **Description:** This is a neural network that utilises LSTM and GCN to detect the sentiment polarities of different aspects in the same sentence. The models used corresponds to the associated models described in the paper.
|
138 |
+
- **Paper:** Aspect-based sentiment analysis via affective knowledge enhanced graph convolutional networks, 2021: 107643.
|
139 |
+
- **Author(s):** Bin Liang, Hang Su, Lin Gui, Erik Cambria, Ruifeng Xu. (2021).
|
140 |
+
- **URL:** https://github.com/BinLiang-NLP/Sentic-GCN
|
141 |
+
|
142 |
+
# How to Get Started With the Model
|
143 |
+
|
144 |
+
## Install Python package
|
145 |
+
SGnlp is an initiative by AI Singapore's NLP Hub. They aim to bridge the gap between research and industry, promote translational research, and encourage adoption of NLP techniques in the industry. <br><br> Various NLP models, other than aspect sentiment analysis are available in the python package. You can try them out at [NLP Hub - Demo](https://sgnlp.aisingapore.net/).
|
146 |
+
|
147 |
+
```python
|
148 |
+
pip install sgnlp
|
149 |
+
|
150 |
+
```
|
151 |
+
|
152 |
+
## Examples
|
153 |
+
For more full code guide (such as SenticGCN), please refer to this [documentation](https://sgnlp.aisingapore.net/docs/model/senticgcn.html). <br> Alternatively, you can also try out the [demo](https://huggingface.co/spaces/aisingapore/aspect-based-sentiment-analysis) for SenticGCN-Bert.
|
154 |
+
|
155 |
+
Example of SenticGCN-Bert model (with embedding):
|
156 |
+
```python
|
157 |
+
from sgnlp.models.sentic_gcn import(
|
158 |
+
SenticGCNBertConfig,
|
159 |
+
SenticGCNBertModel,
|
160 |
+
SenticGCNBertEmbeddingConfig,
|
161 |
+
SenticGCNBertEmbeddingModel,
|
162 |
+
SenticGCNBertTokenizer,
|
163 |
+
SenticGCNBertPreprocessor,
|
164 |
+
SenticGCNBertPostprocessor
|
165 |
+
)
|
166 |
+
|
167 |
+
tokenizer = SenticGCNBertTokenizer.from_pretrained("bert-base-uncased")
|
168 |
+
|
169 |
+
# Load Model
|
170 |
+
config = SenticGCNBertConfig.from_pretrained("./senticgcn_bert/config.json")
|
171 |
+
model = SenticGCNBertModel.from_pretrained("./senticgcn_bert/pytorch_model.bin",config=config)
|
172 |
+
|
173 |
+
# Load Embedding Model
|
174 |
+
embed_config = SenticGCNBertEmbeddingConfig.from_pretrained("bert-base-uncased")
|
175 |
+
embed_model = SenticGCNBertEmbeddingModel.from_pretrained("bert-base-uncased", config=embed_config)
|
176 |
+
|
177 |
+
preprocessor = SenticGCNBertPreprocessor(
|
178 |
+
tokenizer=tokenizer, embedding_model=embed_model,
|
179 |
+
senticnet="./senticgcn_bert/senticnet.pickle",
|
180 |
+
device="cpu")
|
181 |
+
|
182 |
+
postprocessor = SenticGCNBertPostprocessor()
|
183 |
+
|
184 |
+
inputs = [
|
185 |
+
{ # Single word aspect
|
186 |
+
"aspects": ["service"],
|
187 |
+
"sentence": "To sum it up : service varies from good to mediorce , \
|
188 |
+
depending on which waiter you get ; generally it is just average ok .",
|
189 |
+
},
|
190 |
+
{ # Single-word, multiple aspects
|
191 |
+
"aspects": ["service", "decor"],
|
192 |
+
"sentence": "Everything is always cooked to perfection , the service \
|
193 |
+
is excellent, the decor cool and understated.",
|
194 |
+
},
|
195 |
+
{ # Multi-word aspect
|
196 |
+
"aspects": ["grilled chicken", "chicken"],
|
197 |
+
"sentence": "the only chicken i moderately enjoyed was their grilled chicken \
|
198 |
+
special with edamame puree .",
|
199 |
+
},
|
200 |
+
]
|
201 |
+
|
202 |
+
processed_inputs, processed_indices = preprocessor(inputs)
|
203 |
+
raw_outputs = model(processed_indices)
|
204 |
+
|
205 |
+
post_outputs = postprocessor(processed_inputs=processed_inputs, model_outputs=raw_outputs)
|
206 |
+
|
207 |
+
print(post_outputs[0])
|
208 |
+
# {'sentence': ['To', 'sum', 'it', 'up', ':', 'service', 'varies', 'from', 'good', 'to', 'mediorce', ',', 'depending', 'on', 'which'
|
209 |
+
# 'waiter', 'you', 'get', ';', 'generally', 'it', 'is', 'just', 'average', 'ok', '.'],
|
210 |
+
# 'aspects': [[5]],
|
211 |
+
# 'labels': [0]}
|
212 |
+
|
213 |
+
print(post_outputs[1])
|
214 |
+
# {'sentence': ['Everything', 'is', 'always', 'cooked', 'to', 'perfection', ',', 'the', 'service',
|
215 |
+
# 'is', 'excellent,', 'the', 'decor', 'cool', 'and', 'understated.'],
|
216 |
+
# 'aspects': [[8], [12]],
|
217 |
+
# 'labels': [1, 1]}
|
218 |
+
|
219 |
+
print(post_outputs[2])
|
220 |
+
# {'sentence': ['the', 'only', 'chicken', 'i', 'moderately', 'enjoyed', 'was', 'their', 'grilled',
|
221 |
+
# 'chicken', 'special', 'with', 'edamame', 'puree', '.'],
|
222 |
+
# 'aspects': [[8, 9], [2], [9]],
|
223 |
+
# 'labels': [1, 1, 1]}
|
224 |
+
|
225 |
+
|
226 |
+
```
|
227 |
+
|
228 |
+
|
229 |
+
# Training
|
230 |
+
The training datasets can be retrieved from the following Sentic-GCN([github](https://github.com/BinLiang-NLP/Sentic-GCN/tree/main/datasets)).
|
231 |
+
|
232 |
+
#### Training Results - For Sentic-GCN
|
233 |
+
- **Training Time:** ~10mins for ~35 epochs (early stopped)
|
234 |
+
- **Datasets:** SemEval14-Laptop/ SemEval14-Restaurant/ SemEval15-Restaurant/ SemEval16-Restaurant
|
235 |
+
|
236 |
+
#### Training Results - For Sentic-GCN Bert
|
237 |
+
- **Training Time:** ~1 hr for ~40 epochs (early stopped)
|
238 |
+
- **Datasets:** SemEval14-Laptop/ SemEval14-Restaurant/ SemEval15-Restaurant/ SemEval16-Restaurant
|
239 |
+
|
240 |
+
# Model Parameters
|
241 |
+
- **Model Weights:** [link](https://storage.googleapis.com/sgnlp/models/sentic_gcn/senticgcn_bert/pytorch_model.bin)
|
242 |
+
- **Model Config:** [link](https://storage.googleapis.com/sgnlp/models/sentic_gcn/senticgcn_bert/config.json)
|
243 |
+
- **Model Inputs:** Aspect (word), sentence containing the aspect
|
244 |
+
- **Model Outputs:** Sentiment of aspect, -1 (negative), 0 (neutral), 1 (postive)
|
245 |
+
- **Model Inference Info:** 1 sec on Intel(R) i7 Quad-Core @ 1.7GHz.
|
246 |
+
|
247 |
+
|
248 |
+
|
senticgcn/config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/tmp/tmpudjpaday/repeat5/pytorch_model.bin",
|
3 |
+
"architectures": [
|
4 |
+
"SenticGCNModel"
|
5 |
+
],
|
6 |
+
"device": "cuda",
|
7 |
+
"dropout": 0.3,
|
8 |
+
"embed_dim": 300,
|
9 |
+
"hidden_dim": 300,
|
10 |
+
"loss_function": "cross_entropy",
|
11 |
+
"polarities_dim": 3,
|
12 |
+
"torch_dtype": "float32",
|
13 |
+
"transformers_version": "4.15.0"
|
14 |
+
}
|
senticgcn/embedding/embed_config.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"SenticGCNEmbeddingModel"
|
4 |
+
],
|
5 |
+
"embed_dim": 300,
|
6 |
+
"torch_dtype": "float32",
|
7 |
+
"transformers_version": "4.15.0",
|
8 |
+
"vocab_size": 17662
|
9 |
+
}
|
senticgcn/embedding/embed_pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97637af7ed74acdc5973d6461591241e585d9d39379d7f0eb9a6619437d66db0
|
3 |
+
size 21195243
|
senticgcn/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef9b11db13b01a788f20ea0f8282e1b5b3fba6757f056058ab231eac15e6d402
|
3 |
+
size 8674259
|
senticgcn/senticnet.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b67b8db4be750c63bb431200b323029fbd58c9ac84106e15821cd856feaef52
|
3 |
+
size 777857
|
senticgcn/tokenizer/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "pad_token": "<pad>"}
|
senticgcn/tokenizer/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "<unk>", "pad_token": "<pad>", "tokenizer_class": "SenticGCNTokenizer"}
|
senticgcn/tokenizer/vocab.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90714969e8833474d1443f94b4ab9db22ea8c50eeea8bea591b55c87b29d37d4
|
3 |
+
size 228983
|
senticgcn_bert/config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/tmp/tmpjemmnqa9/repeat10/pytorch_model.bin",
|
3 |
+
"architectures": [
|
4 |
+
"SenticGCNBertModel"
|
5 |
+
],
|
6 |
+
"device": "cuda",
|
7 |
+
"dropout": 0.3,
|
8 |
+
"embed_dim": 300,
|
9 |
+
"hidden_dim": 768,
|
10 |
+
"loss_function": "cross_entropy",
|
11 |
+
"max_seq_len": 85,
|
12 |
+
"polarities_dim": 3,
|
13 |
+
"torch_dtype": "float32",
|
14 |
+
"transformers_version": "4.15.0"
|
15 |
+
}
|
senticgcn_bert/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95f3464e23cd8745441a0b75c3eb419b18871a3b9c851ed81b8a084772ce0ba7
|
3 |
+
size 7098703
|
senticgcn_bert/senticnet.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b67b8db4be750c63bb431200b323029fbd58c9ac84106e15821cd856feaef52
|
3 |
+
size 777857
|