KoichiYasuoka
commited on
Commit
•
860ac6f
1
Parent(s):
aff0386
algorithm improved
Browse files
ud.py
CHANGED
@@ -75,6 +75,18 @@ class UniversalDependenciesCausalPipeline(BellmanFordTokenClassificationPipeline
|
|
75 |
else:
|
76 |
t["entity_group"]=p
|
77 |
d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
v=self.tokenizer(d,add_special_tokens=False)
|
79 |
e=self.model.get_input_embeddings().weight
|
80 |
m=[]
|
|
|
75 |
else:
|
76 |
t["entity_group"]=p
|
77 |
d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
|
78 |
+
for i in range(len(d)-1,-1,-1):
|
79 |
+
if d[i].startswith(" "):
|
80 |
+
j=len(d[i])-len(d[i].lstrip())
|
81 |
+
d[i]=d[i].lstrip()
|
82 |
+
w[i]["start"]+=j
|
83 |
+
if d[i].endswith(" "):
|
84 |
+
j=len(d[i])-len(d[i].rstrip())
|
85 |
+
d[i]=d[i].rstrip()
|
86 |
+
w[i]["end"]-=j
|
87 |
+
if d[i].strip()=="":
|
88 |
+
d.pop(i)
|
89 |
+
w.pop(i)
|
90 |
v=self.tokenizer(d,add_special_tokens=False)
|
91 |
e=self.model.get_input_embeddings().weight
|
92 |
m=[]
|