Spaces:
Sleeping
Sleeping
Gabriela Nicole Gonzalez Saez
commited on
Commit
•
32fdb6f
1
Parent(s):
e4bccbf
zh-decode and best probabilities
Browse files
app.py
CHANGED
@@ -78,19 +78,18 @@ def get_k_prob_tokens(transition_scores, result, model, k_values=5):
|
|
78 |
gen_sequences = result.sequences[:, 1:]
|
79 |
|
80 |
result_output = []
|
81 |
-
# bs_alt = []
|
82 |
-
# bs_alt_scores = []
|
83 |
|
84 |
# First beam only...
|
85 |
bs = 0
|
86 |
text = ' '
|
87 |
for tok, score, i_step in zip(gen_sequences[bs], transition_scores[bs],range(len(gen_sequences[bs]))):
|
88 |
-
# bs_alt.append([tokenizer_tr.decode(tok) for tok in result.scores[i_step][bs].topk(k_values).indices ] )
|
89 |
-
# bs_alt_scores.append(np.exp(result.scores[i_step][bs].topk(k_values).values))
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
94 |
|
95 |
return result_output
|
96 |
|
@@ -100,15 +99,19 @@ def split_token_from_sequences(sequences, model) -> dict :
|
|
100 |
|
101 |
gen_sequences_texts = []
|
102 |
for bs in range(n_sentences):
|
|
|
103 |
#### decoder per token.
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
107 |
|
|
|
108 |
#raw dict is bos
|
109 |
text = 'bos'
|
110 |
new_id = text +'--1'
|
111 |
-
dict_parent = [{'id': new_id, 'parentId': None , 'text': text, 'name': 'bos', 'prob':score }]
|
112 |
id_dict_pos = {}
|
113 |
step_i = 0
|
114 |
cont = True
|
@@ -151,8 +154,10 @@ def split_token_from_sequences(sequences, model) -> dict :
|
|
151 |
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
152 |
id_dict_pos[new_id] = len(dict_parent) - 1
|
153 |
else:
|
154 |
-
|
155 |
-
|
|
|
|
|
156 |
|
157 |
step_i += 1
|
158 |
return dict_parent
|
|
|
78 |
gen_sequences = result.sequences[:, 1:]
|
79 |
|
80 |
result_output = []
|
|
|
|
|
81 |
|
82 |
# First beam only...
|
83 |
bs = 0
|
84 |
text = ' '
|
85 |
for tok, score, i_step in zip(gen_sequences[bs], transition_scores[bs],range(len(gen_sequences[bs]))):
|
|
|
|
|
86 |
|
87 |
+
beam_i = result.beam_indices[0][i_step]
|
88 |
+
if beam_i < 0:
|
89 |
+
beam_i = bs
|
90 |
+
bs_alt = [tokenizer_tr.decode(tok) for tok in result.scores[i_step][beam_i].topk(k_values).indices ]
|
91 |
+
bs_alt_scores = np.exp(result.scores[i_step][beam_i].topk(k_values).values)
|
92 |
+
result_output.append([np.array(result.scores[i_step][beam_i].topk(k_values).indices), np.array(bs_alt_scores),bs_alt])
|
93 |
|
94 |
return result_output
|
95 |
|
|
|
99 |
|
100 |
gen_sequences_texts = []
|
101 |
for bs in range(n_sentences):
|
102 |
+
# gen_sequences_texts.append(dict_tokenizer_tr[model].decode(sequences[:, 1:][bs], skip_special_tokens=True).split(' '))
|
103 |
#### decoder per token.
|
104 |
+
seq_bs = []
|
105 |
+
|
106 |
+
for token in sequences[:, 1:][bs]:
|
107 |
+
seq_bs.append(dict_tokenizer_tr[model].decode(token, skip_special_tokens=True))
|
108 |
+
gen_sequences_texts.append(seq_bs)
|
109 |
|
110 |
+
score = 0
|
111 |
#raw dict is bos
|
112 |
text = 'bos'
|
113 |
new_id = text +'--1'
|
114 |
+
dict_parent = [{'id': new_id, 'parentId': None , 'text': text, 'name': 'bos', 'prob': score }]
|
115 |
id_dict_pos = {}
|
116 |
step_i = 0
|
117 |
cont = True
|
|
|
154 |
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
155 |
id_dict_pos[new_id] = len(dict_parent) - 1
|
156 |
else:
|
157 |
+
if not (new_id in id_dict_pos):
|
158 |
+
dict_parent.append({'id': new_id, 'parentId': parent_id , 'text': step_w, 'name': step_w, 'prob' : score })
|
159 |
+
id_dict_pos[new_id] = len(dict_parent) - 1
|
160 |
+
|
161 |
|
162 |
step_i += 1
|
163 |
return dict_parent
|