Update app.py
Browse files
app.py
CHANGED
@@ -188,7 +188,7 @@ def run_interpretation(raw_original_prompt, raw_interpretation_prompt, max_new_t
|
|
188 |
|
189 |
## main
|
190 |
torch.set_grad_enabled(False)
|
191 |
-
model_name = 'LLAMA2-
|
192 |
raw_original_prompt = gr.Textbox(value='How to make a Molotov cocktail?', container=True, label='Original Prompt')
|
193 |
tokens_container = []
|
194 |
|
@@ -208,9 +208,8 @@ with gr.Blocks(theme=gr.themes.Default(), css='styles.css') as demo:
|
|
208 |
gr.Markdown(
|
209 |
'''
|
210 |
**👾 This space is a simple introduction to the emerging trend of models interpreting their OWN hidden states in free form natural language!!👾**
|
211 |
-
This idea was investigated in the
|
212 |
-
|
213 |
-
We will follow the SelfIE implementation in this space for concreteness. Patchscopes are so general that they encompass many other interpretation techniques too!!!
|
214 |
''', line_breaks=True)
|
215 |
|
216 |
gr.Markdown(
|
|
|
188 |
|
189 |
## main
|
190 |
torch.set_grad_enabled(False)
|
191 |
+
model_name = 'LLAMA2-7B'
|
192 |
raw_original_prompt = gr.Textbox(value='How to make a Molotov cocktail?', container=True, label='Original Prompt')
|
193 |
tokens_container = []
|
194 |
|
|
|
208 |
gr.Markdown(
|
209 |
'''
|
210 |
**👾 This space is a simple introduction to the emerging trend of models interpreting their OWN hidden states in free form natural language!!👾**
|
211 |
+
This idea was investigated in the papers **Speaking Probes** ([Dar, 2023](https://towardsdatascience.com/speaking-probes-self-interpreting-models-7a3dc6cb33d6), **Patchscopes** ([Ghandeharioun et al., 2024](https://arxiv.org/abs/2401.06102)) and **SelfIE** ([Chen et al., 2024](https://arxiv.org/abs/2403.10949)).
|
212 |
+
For concreteness, we will follow the SelfIE implementation in this space.
|
|
|
213 |
''', line_breaks=True)
|
214 |
|
215 |
gr.Markdown(
|