Spaces:
Running
Running
Update UI
Browse files- .streamlit/config.toml +8 -0
- Code_Browser.py +13 -7
- utils.py +31 -30
.streamlit/config.toml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[global]
|
2 |
+
disableWidgetStateDuplicationWarning = true
|
3 |
+
|
4 |
+
[theme]
|
5 |
+
base="dark"
|
6 |
+
primaryColor="DeepSkyBlue"
|
7 |
+
backgroundColor="#121821"
|
8 |
+
secondaryBackgroundColor="#303540"
|
Code_Browser.py
CHANGED
@@ -25,10 +25,12 @@ dirs = glob.glob(base_cache_dir + "models/*/")
|
|
25 |
model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
|
26 |
model_name_options = ["_".join(m) for m in model_name_options]
|
27 |
model_name_options = sorted(set(model_name_options))
|
|
|
28 |
|
29 |
model_name = st.selectbox(
|
30 |
"Model",
|
31 |
model_name_options,
|
|
|
32 |
key=webapp_utils.persist("model_name"),
|
33 |
)
|
34 |
|
@@ -87,10 +89,11 @@ st.session_state["ccb"] = ccb
|
|
87 |
st.session_state["cb_at"] = cb_at
|
88 |
st.session_state["is_attn"] = is_attn
|
89 |
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
st.
|
|
|
94 |
|
95 |
st.markdown("## Demo Codes")
|
96 |
demo_file_path = codes_cache_path + "demo_codes.txt"
|
@@ -122,7 +125,7 @@ if st.checkbox("Show Demo Codes"):
|
|
122 |
if len(demo_codes) == 0:
|
123 |
st.markdown(
|
124 |
f"""
|
125 |
-
<div style="font-size: 1.
|
126 |
No demo codes found in file {demo_file_path}
|
127 |
</div>
|
128 |
""",
|
@@ -214,7 +217,10 @@ if regex_pattern:
|
|
214 |
ccb,
|
215 |
model_name,
|
216 |
)
|
217 |
-
st.markdown(
|
|
|
|
|
|
|
218 |
num_search_cols = 7 if is_attn else 6
|
219 |
non_deploy_offset = 0
|
220 |
if not DEPLOY_MODE:
|
@@ -296,7 +302,7 @@ if regex_pattern:
|
|
296 |
|
297 |
st.markdown("## Code Token Activations")
|
298 |
|
299 |
-
filter_codes = st.checkbox("
|
300 |
act_range, layer_code_acts = None, None
|
301 |
if filter_codes:
|
302 |
act_range = st.slider(
|
|
|
25 |
model_name_options = [d.split("/")[-2].split("_")[:-2] for d in dirs]
|
26 |
model_name_options = ["_".join(m) for m in model_name_options]
|
27 |
model_name_options = sorted(set(model_name_options))
|
28 |
+
def_model_idx = ["attn" in m for m in model_name_options].index(True)
|
29 |
|
30 |
model_name = st.selectbox(
|
31 |
"Model",
|
32 |
model_name_options,
|
33 |
+
index=def_model_idx,
|
34 |
key=webapp_utils.persist("model_name"),
|
35 |
)
|
36 |
|
|
|
89 |
st.session_state["cb_at"] = cb_at
|
90 |
st.session_state["is_attn"] = is_attn
|
91 |
|
92 |
+
if not DEPLOY_MODE:
|
93 |
+
st.markdown("## Metrics")
|
94 |
+
# hide metrics by default
|
95 |
+
if st.checkbox("Show Model Metrics"):
|
96 |
+
st.write(metrics)
|
97 |
|
98 |
st.markdown("## Demo Codes")
|
99 |
demo_file_path = codes_cache_path + "demo_codes.txt"
|
|
|
125 |
if len(demo_codes) == 0:
|
126 |
st.markdown(
|
127 |
f"""
|
128 |
+
<div style="font-size: 1.0rem; color: red;">
|
129 |
No demo codes found in file {demo_file_path}
|
130 |
</div>
|
131 |
""",
|
|
|
217 |
ccb,
|
218 |
model_name,
|
219 |
)
|
220 |
+
st.markdown(
|
221 |
+
f"Found <span style='color:green;'>{re_token_matches}</span> matches",
|
222 |
+
unsafe_allow_html=True,
|
223 |
+
)
|
224 |
num_search_cols = 7 if is_attn else 6
|
225 |
non_deploy_offset = 0
|
226 |
if not DEPLOY_MODE:
|
|
|
302 |
|
303 |
st.markdown("## Code Token Activations")
|
304 |
|
305 |
+
filter_codes = st.checkbox("Show filters", key="filter_codes")
|
306 |
act_range, layer_code_acts = None, None
|
307 |
if filter_codes:
|
308 |
act_range = st.slider(
|
utils.py
CHANGED
@@ -152,39 +152,42 @@ def features_to_tokens(cb_key, cb_acts, num_codes, code=None):
|
|
152 |
return features_tokens
|
153 |
|
154 |
|
155 |
-
def color_str(s: str,
|
156 |
"""Color the string for html or terminal."""
|
|
|
157 |
if html:
|
|
|
158 |
return f"<span style='color:{color}'>{s}</span>"
|
159 |
else:
|
|
|
160 |
return colored(s, color)
|
161 |
|
162 |
|
163 |
-
def
|
164 |
-
"""Separate states with a dash and color red the tokens in
|
165 |
ret_string = ""
|
166 |
-
|
167 |
tokens_enumerate = enumerate(tokens)
|
168 |
if tokens[0] == "<|endoftext|>":
|
169 |
next(tokens_enumerate)
|
170 |
-
if
|
171 |
-
|
172 |
for i, c in tokens_enumerate:
|
173 |
if i % 2 == 1:
|
174 |
ret_string += "-"
|
175 |
-
if
|
176 |
-
ret_string += color_str(c,
|
177 |
-
|
178 |
else:
|
179 |
ret_string += c
|
180 |
return ret_string
|
181 |
|
182 |
|
183 |
-
def
|
184 |
-
"""Color
|
185 |
ret_string = ""
|
186 |
last_colored_token_idx = -1
|
187 |
-
for i in
|
188 |
c_str = tokens[i]
|
189 |
if i <= last_colored_token_idx + 2 * n + 1:
|
190 |
ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
|
@@ -194,7 +197,7 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
|
|
194 |
)
|
195 |
ret_string += " ... "
|
196 |
ret_string += "".join(tokens[i - n : i])
|
197 |
-
ret_string += color_str(c_str,
|
198 |
last_colored_token_idx = i
|
199 |
ret_string += "".join(
|
200 |
tokens[
|
@@ -207,15 +210,15 @@ def color_tokens_red(tokens, red_idx, n=3, html=False):
|
|
207 |
def prepare_example_print(
|
208 |
example_id,
|
209 |
example_tokens,
|
210 |
-
|
211 |
html,
|
212 |
-
|
213 |
):
|
214 |
"""Format example to print."""
|
215 |
-
example_output = color_str(example_id, "green"
|
216 |
example_output += (
|
217 |
": "
|
218 |
-
+
|
219 |
+ ("<br>" if html else "\n")
|
220 |
)
|
221 |
return example_output
|
@@ -238,10 +241,8 @@ def tkn_print(
|
|
238 |
print_output = [] if return_example_list else ""
|
239 |
curr_ex = ll[0][0]
|
240 |
total_examples = 0
|
241 |
-
|
242 |
-
|
243 |
-
color_tokens_red_automata if separate_states else partial(color_tokens_red, n=n)
|
244 |
-
)
|
245 |
for idx in indices:
|
246 |
if total_examples > max_examples:
|
247 |
break
|
@@ -251,31 +252,31 @@ def tkn_print(
|
|
251 |
curr_ex_output = prepare_example_print(
|
252 |
curr_ex,
|
253 |
tokens[curr_ex],
|
254 |
-
|
255 |
html,
|
256 |
-
|
257 |
)
|
258 |
total_examples += 1
|
259 |
if return_example_list:
|
260 |
-
print_output.append((curr_ex_output, len(
|
261 |
else:
|
262 |
print_output += curr_ex_output
|
263 |
curr_ex = i
|
264 |
-
|
265 |
-
|
266 |
curr_ex_output = prepare_example_print(
|
267 |
curr_ex,
|
268 |
tokens[curr_ex],
|
269 |
-
|
270 |
html,
|
271 |
-
|
272 |
)
|
273 |
if return_example_list:
|
274 |
-
print_output.append((curr_ex_output, len(
|
275 |
else:
|
276 |
print_output += curr_ex_output
|
277 |
asterisk_str = "********************************************"
|
278 |
-
print_output += color_str(asterisk_str, "green"
|
279 |
total_examples += 1
|
280 |
|
281 |
return print_output
|
|
|
152 |
return features_tokens
|
153 |
|
154 |
|
155 |
+
def color_str(s: str, html: bool, color: Optional[str] = None):
|
156 |
"""Color the string for html or terminal."""
|
157 |
+
|
158 |
if html:
|
159 |
+
color = "DeepSkyBlue" if color is None else color
|
160 |
return f"<span style='color:{color}'>{s}</span>"
|
161 |
else:
|
162 |
+
color = "light_cyan" if color is None else color
|
163 |
return colored(s, color)
|
164 |
|
165 |
|
166 |
+
def color_tokens_automata(tokens, color_idx, html=False):
|
167 |
+
"""Separate states with a dash and color red the tokens in color_idx."""
|
168 |
ret_string = ""
|
169 |
+
itr_over_color_idx = 0
|
170 |
tokens_enumerate = enumerate(tokens)
|
171 |
if tokens[0] == "<|endoftext|>":
|
172 |
next(tokens_enumerate)
|
173 |
+
if color_idx[0] == 0:
|
174 |
+
itr_over_color_idx += 1
|
175 |
for i, c in tokens_enumerate:
|
176 |
if i % 2 == 1:
|
177 |
ret_string += "-"
|
178 |
+
if itr_over_color_idx < len(color_idx) and i == color_idx[itr_over_color_idx]:
|
179 |
+
ret_string += color_str(c, html)
|
180 |
+
itr_over_color_idx += 1
|
181 |
else:
|
182 |
ret_string += c
|
183 |
return ret_string
|
184 |
|
185 |
|
186 |
+
def color_tokens(tokens, color_idx, n=3, html=False):
|
187 |
+
"""Color the tokens in color_idx."""
|
188 |
ret_string = ""
|
189 |
last_colored_token_idx = -1
|
190 |
+
for i in color_idx:
|
191 |
c_str = tokens[i]
|
192 |
if i <= last_colored_token_idx + 2 * n + 1:
|
193 |
ret_string += "".join(tokens[last_colored_token_idx + 1 : i])
|
|
|
197 |
)
|
198 |
ret_string += " ... "
|
199 |
ret_string += "".join(tokens[i - n : i])
|
200 |
+
ret_string += color_str(c_str, html)
|
201 |
last_colored_token_idx = i
|
202 |
ret_string += "".join(
|
203 |
tokens[
|
|
|
210 |
def prepare_example_print(
|
211 |
example_id,
|
212 |
example_tokens,
|
213 |
+
tokens_to_color,
|
214 |
html,
|
215 |
+
color_fn=color_tokens,
|
216 |
):
|
217 |
"""Format example to print."""
|
218 |
+
example_output = color_str(example_id, html, "green")
|
219 |
example_output += (
|
220 |
": "
|
221 |
+
+ color_fn(example_tokens, tokens_to_color, html=html)
|
222 |
+ ("<br>" if html else "\n")
|
223 |
)
|
224 |
return example_output
|
|
|
241 |
print_output = [] if return_example_list else ""
|
242 |
curr_ex = ll[0][0]
|
243 |
total_examples = 0
|
244 |
+
tokens_to_color = []
|
245 |
+
color_fn = color_tokens_automata if separate_states else partial(color_tokens, n=n)
|
|
|
|
|
246 |
for idx in indices:
|
247 |
if total_examples > max_examples:
|
248 |
break
|
|
|
252 |
curr_ex_output = prepare_example_print(
|
253 |
curr_ex,
|
254 |
tokens[curr_ex],
|
255 |
+
tokens_to_color,
|
256 |
html,
|
257 |
+
color_fn,
|
258 |
)
|
259 |
total_examples += 1
|
260 |
if return_example_list:
|
261 |
+
print_output.append((curr_ex_output, len(tokens_to_color)))
|
262 |
else:
|
263 |
print_output += curr_ex_output
|
264 |
curr_ex = i
|
265 |
+
tokens_to_color = []
|
266 |
+
tokens_to_color.append(j)
|
267 |
curr_ex_output = prepare_example_print(
|
268 |
curr_ex,
|
269 |
tokens[curr_ex],
|
270 |
+
tokens_to_color,
|
271 |
html,
|
272 |
+
color_fn,
|
273 |
)
|
274 |
if return_example_list:
|
275 |
+
print_output.append((curr_ex_output, len(tokens_to_color)))
|
276 |
else:
|
277 |
print_output += curr_ex_output
|
278 |
asterisk_str = "********************************************"
|
279 |
+
print_output += color_str(asterisk_str, html, "green")
|
280 |
total_examples += 1
|
281 |
|
282 |
return print_output
|