Spaces:

madhavkotecha
/

CS626-NEI

Sleeping

App Files Files Community

madhavkotecha commited on 18 days ago

Commit

59d8c9f

•

1 Parent(s): ce62bb3

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -5

app.py CHANGED Viewed

@@ -94,12 +94,12 @@ class NEI:
         # plt.colorbar()
         # plt.savefig('Confusion_Matrix.png')
-    def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0, prev_token=None):
         is_titlecase = 1 if w[0].isupper() else 0
         is_allcaps = 1 if w.isupper() else 0
         is_sw = 1 if w.lower() in SW else 0
         is_punct = 1 if w in PUNCT else 0
-        is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
         is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
         # is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
         # is_start_of_sentence = 1 if scaled_position == 0 else 0
@@ -113,7 +113,7 @@ class NEI:
             for i, token in enumerate(tokens):
                 prev_tag = tags[i - 1] if i > 0 else 0
                 next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
-                x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag, prev_token=tokens[i-1] if i > 0 else None)
                 y = 1 if tags[i] > 0 else 0
                 features.append(x)
                 labels.append(y)
@@ -140,7 +140,7 @@ class NEI:
         tokens = word_tokenize(sentence)
         features = []
-        raw_features = [self.vectorize(token, i / len(tokens), prev_token=tokens[i-1] if i > 0 else None) for i, token in enumerate(tokens)]
         raw_features = np.array(raw_features, dtype=np.float32)
         scaled_features = self.scaler.transform(raw_features)
         y_pred = self.model.predict(scaled_features)
@@ -149,7 +149,7 @@ class NEI:
             prev_tag = y_pred[i - 1] if i > 0 else 0
             next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
-            feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag, prev_token=tokens[i-1] if i > 0 else None)
             features.append(feature_with_context)
         features = np.array(features, dtype=np.float32)

         # plt.colorbar()
         # plt.savefig('Confusion_Matrix.png')
+    def vectorize(self, w, scaled_position, prev_tag=0, next_tag=0):
         is_titlecase = 1 if w[0].isupper() else 0
         is_allcaps = 1 if w.isupper() else 0
         is_sw = 1 if w.lower() in SW else 0
         is_punct = 1 if w in PUNCT else 0
+        # is_surrounded_by_entities = 1 if (prev_tag > 0 and next_tag > 0) else 0
         is_connector = 1 if (w.lower() in connectors) and (prev_tag > 0 and next_tag > 0) else 0
         # is_start_of_sentence = 1 if (scaled_position == 0 or prev_token in [".", "!", "?"]) and w.lower() not in start_words else 0
         # is_start_of_sentence = 1 if scaled_position == 0 else 0
             for i, token in enumerate(tokens):
                 prev_tag = tags[i - 1] if i > 0 else 0
                 next_tag = tags[i + 1] if i < len(tokens) - 1 else 0
+                x = self.vectorize(token, scaled_position=(i / len(tokens)), prev_tag=prev_tag, next_tag=next_tag)
                 y = 1 if tags[i] > 0 else 0
                 features.append(x)
                 labels.append(y)
         tokens = word_tokenize(sentence)
         features = []
+        raw_features = [self.vectorize(token, i / len(tokens)) for i, token in enumerate(tokens)]
         raw_features = np.array(raw_features, dtype=np.float32)
         scaled_features = self.scaler.transform(raw_features)
         y_pred = self.model.predict(scaled_features)
             prev_tag = y_pred[i - 1] if i > 0 else 0
             next_tag = y_pred[i + 1] if i < len(tokens) - 1 else 0
+            feature_with_context = self.vectorize(token, i / len(tokens), prev_tag, next_tag)
             features.append(feature_with_context)
         features = np.array(features, dtype=np.float32)