KoichiYasuoka commited on
Commit
860ac6f
1 Parent(s): aff0386

algorithm improved

Browse files
Files changed (1) hide show
  1. ud.py +12 -0
ud.py CHANGED
@@ -75,6 +75,18 @@ class UniversalDependenciesCausalPipeline(BellmanFordTokenClassificationPipeline
75
  else:
76
  t["entity_group"]=p
77
  d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
 
 
 
 
 
 
 
 
 
 
 
 
78
  v=self.tokenizer(d,add_special_tokens=False)
79
  e=self.model.get_input_embeddings().weight
80
  m=[]
 
75
  else:
76
  t["entity_group"]=p
77
  d=[model_outputs["sentence"][t["start"]:t["end"]] for t in w]
78
+ for i in range(len(d)-1,-1,-1):
79
+ if d[i].startswith(" "):
80
+ j=len(d[i])-len(d[i].lstrip())
81
+ d[i]=d[i].lstrip()
82
+ w[i]["start"]+=j
83
+ if d[i].endswith(" "):
84
+ j=len(d[i])-len(d[i].rstrip())
85
+ d[i]=d[i].rstrip()
86
+ w[i]["end"]-=j
87
+ if d[i].strip()=="":
88
+ d.pop(i)
89
+ w.pop(i)
90
  v=self.tokenizer(d,add_special_tokens=False)
91
  e=self.model.get_input_embeddings().weight
92
  m=[]