kcarnold commited on
Commit
1b6a5d7
1 Parent(s): d3555f7

Consolidate to a single file with shared code

Browse files
Files changed (5) hide show
  1. Home.py +0 -10
  2. README.md +2 -2
  3. app.py +118 -0
  4. pages/1_Rewrite.py +1 -46
  5. pages/2_Highlights.py +0 -53
Home.py DELETED
@@ -1,10 +0,0 @@
1
- import streamlit as st
2
-
3
- st.title("Writing Tools Prototypes")
4
-
5
- st.markdown("Click one of the links below to see a prototype in action.")
6
-
7
- st.page_link("pages/1_Rewrite.py", label="Rewrite with predictions", icon="📝")
8
- st.page_link("pages/2_Highlights.py", label="Highlight locations for possible edits", icon="🖍️")
9
-
10
- st.markdown("*Note*: These services send data to a remote server for processing. The server logs requests. Don't use sensitive or identifiable information on this page.")
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -4,8 +4,8 @@ emoji: 🏢
4
  colorFrom: yellow
5
  colorTo: yellow
6
  sdk: streamlit
7
- sdk_version: 1.33.0
8
- app_file: Home.py
9
  pinned: false
10
  license: mit
11
  ---
 
4
  colorFrom: yellow
5
  colorTo: yellow
6
  sdk: streamlit
7
+ sdk_version: 1.36.0
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+
4
+ def landing():
5
+ st.title("Writing Tools Prototypes")
6
+ st.markdown("Click one of the links below to see a prototype in action.")
7
+
8
+ st.page_link(st.Page(rewrite_with_predictions), label="Rewrite with predictions", icon="📝")
9
+ st.page_link(highlight_page, label="Highlight locations for possible edits", icon="🖍️")
10
+
11
+ st.markdown("*Note*: These services send data to a remote server for processing. The server logs requests. Don't use sensitive or identifiable information on this page.")
12
+
13
+
14
+ def show_token(token):
15
+ token_display = token.replace('\n', '↵').replace('\t', '⇥')
16
+ if token_display.startswith("#"):
17
+ token_display = "\\" + token_display
18
+ return token_display
19
+
20
+
21
+ def get_prompt(default="Rewrite this document to be more clear and concise."):
22
+ # pick a preset prompt or "other"
23
+ with st.popover("Prompt options"):
24
+ prompt_options = [
25
+ "Rewrite this document to be ...",
26
+ "Summarize this document in one sentence.",
27
+ "Translate this document into Spanish.",
28
+ "Other"
29
+ ]
30
+ prompt = st.radio("Prompt", prompt_options, help="Instructions for what the bot should do.")
31
+ if prompt.startswith("Rewrite this document to be"):
32
+ rewrite_adjs = ["clear and concise", "more detailed and engaging", "more formal and professional", "more casual and conversational", "more technical and precise", "more creative and imaginative", "more persuasive and compelling"]
33
+ prompt = "Rewrite this document to be " + st.radio("to be ...", rewrite_adjs) + "."
34
+ elif prompt == "Other":
35
+ prompt = st.text_area("Prompt", "Rewrite this document to be more clear and concise.")
36
+ return prompt
37
+
38
+
39
+ def rewrite_with_predictions():
40
+ st.title("Rewrite with Predictive Text")
41
+
42
+ prompt = get_prompt()
43
+ st.write("Prompt:", prompt)
44
+
45
+ doc = st.text_area("Document", "", placeholder="Paste your document here.", height=300)
46
+ st.button("Update document")
47
+ rewrite_in_progress = st.text_area("Rewrite in progress", key='rewrite_in_progress', value="", placeholder="Clicking the buttons below will update this field. You can also edit it directly; press Ctrl+Enter to apply changes.", height=300)
48
+
49
+ if doc.strip() == "" and rewrite_in_progress.strip() == "":
50
+ # Allow partial rewrites as a hack to enable autocomplete from the prompt
51
+ st.stop()
52
+
53
+ def get_preds_api(prompt, original_doc, rewrite_in_progress, k=5):
54
+ response = requests.get("https://tools.kenarnold.org/api/next_token", params=dict(prompt=prompt, original_doc=original_doc, doc_in_progress=rewrite_in_progress, k=k))
55
+ response.raise_for_status()
56
+ return response.json()['next_tokens']
57
+
58
+ tokens = get_preds_api(prompt, doc, rewrite_in_progress)
59
+
60
+ def append_token(word):
61
+ st.session_state['rewrite_in_progress'] = (
62
+ st.session_state['rewrite_in_progress'] + word
63
+ )
64
+
65
+ allow_multi_word = st.checkbox("Allow multi-word predictions", value=False)
66
+
67
+ for i, (col, token) in enumerate(zip(st.columns(len(tokens)), tokens)):
68
+ with col:
69
+ if not allow_multi_word and ' ' in token[1:]:
70
+ token = token[0] + token[1:].split(' ', 1)[0]
71
+ token_display = show_token(token)
72
+ st.button(token_display, on_click=append_token, args=(token,), key=i, use_container_width=True)
73
+
74
+
75
+ def highlight_edits():
76
+ import html
77
+ prompt = get_prompt()
78
+ st.write("Prompt:", prompt)
79
+ doc = st.text_area("Document", placeholder="Paste your document here.")
80
+ updated_doc = st.text_area("Updated Doc", placeholder="Your edited document. Leave this blank to use your original document.")
81
+
82
+
83
+ response = requests.get("https://tools.kenarnold.org/api/highlights", params=dict(prompt=prompt, doc=doc, updated_doc=updated_doc))
84
+ spans = response.json()['highlights']
85
+
86
+ if len(spans) < 2:
87
+ st.write("No spans found.")
88
+ st.stop()
89
+
90
+ highest_loss = max(span['token_loss'] for span in spans[1:])
91
+ for span in spans:
92
+ span['loss_ratio'] = span['token_loss'] / highest_loss
93
+
94
+ html_out = ''
95
+ for span in spans:
96
+ is_different = span['token'] != span['most_likely_token']
97
+ html_out += '<span style="color: {color}" title="{title}">{orig_token}</span>'.format(
98
+ color="blue" if is_different else "black",
99
+ title=html.escape(span["most_likely_token"]).replace('\n', ' '),
100
+ orig_token=html.escape(span["token"]).replace('\n', '<br>')
101
+ )
102
+ html_out = f"<p style=\"background: white;\">{html_out}</p>"
103
+
104
+ st.write(html_out, unsafe_allow_html=True)
105
+ import pandas as pd
106
+ st.write(pd.DataFrame(spans)[['token', 'token_loss', 'most_likely_token', 'loss_ratio']])
107
+
108
+
109
+ rewrite_page = st.Page(rewrite_with_predictions, title="Rewrite with predictions", icon="📝")
110
+ highlight_page = st.Page(highlight_edits, title="Highlight locations for possible edits", icon="🖍️")
111
+
112
+ # Manually specify the sidebar
113
+ page = st.navigation([
114
+ st.Page(landing, title="Home", icon="🏠"),
115
+ rewrite_page,
116
+ highlight_page
117
+ ])
118
+ page.run()
pages/1_Rewrite.py CHANGED
@@ -1,48 +1,3 @@
1
  import streamlit as st
2
- import requests
3
 
4
-
5
- st.title("Rewrite with Predictive Text")
6
-
7
- # pick a preset prompt or "other"
8
- prompt_options = [
9
- "Rewrite this document to be ...",
10
- "Summarize this document in one sentence.",
11
- "Translate this document into Spanish.",
12
- "Other"
13
- ]
14
- prompt = st.radio("Prompt", prompt_options, help="Instructions for what the bot should do.")
15
- if prompt.startswith("Rewrite this document to be"):
16
- rewrite_adjs = ["clear and concise", "more detailed and engaging", "more formal and professional", "more casual and conversational", "more technical and precise", "more creative and imaginative", "more persuasive and compelling"]
17
- prompt = "Rewrite this document to be " + st.radio("to be ...", rewrite_adjs) + "."
18
- elif prompt == "Other":
19
- prompt = st.text_area("Prompt", "Rewrite this document to be more clear and concise.")
20
- st.write("Prompt:", prompt)
21
- doc = st.text_area("Document", "", placeholder="Paste your document here.", height=300)
22
- st.button("Update document")
23
- rewrite_in_progress = st.text_area("Rewrite in progress", key='rewrite_in_progress', value="", placeholder="Clicking the buttons below will update this field. You can also edit it directly; press Ctrl+Enter to apply changes.", height=300)
24
-
25
- if doc.strip() == "" and rewrite_in_progress.strip() == "":
26
- # Allow partial rewrites as a hack to enable autocomplete from the prompt
27
- st.stop()
28
-
29
- def get_preds_api(prompt, original_doc, rewrite_in_progress, k=5):
30
- response = requests.get("https://tools.kenarnold.org/api/next_token", params=dict(prompt=prompt, original_doc=original_doc, doc_in_progress=rewrite_in_progress, k=k))
31
- response.raise_for_status()
32
- return response.json()['next_tokens']
33
-
34
- tokens = get_preds_api(prompt, doc, rewrite_in_progress)
35
-
36
- def append_token(word):
37
- st.session_state['rewrite_in_progress'] = (
38
- st.session_state['rewrite_in_progress'] + word
39
- )
40
-
41
- allow_multi_word = st.checkbox("Allow multi-word predictions", value=False)
42
-
43
- for i, (col, token) in enumerate(zip(st.columns(len(tokens)), tokens)):
44
- with col:
45
- if not allow_multi_word and ' ' in token[1:]:
46
- token = token[0] + token[1:].split(' ', 1)[0]
47
- st.button(token, on_click=append_token, args=(token,), key=i)
48
-
 
1
  import streamlit as st
2
+ from util import get_prompt
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/2_Highlights.py CHANGED
@@ -2,20 +2,6 @@ import streamlit as st
2
  import pandas as pd
3
  import html
4
 
5
- model_options = [
6
- 'API',
7
- 'google/gemma-1.1-2b-it',
8
- 'google/gemma-1.1-7b-it'
9
- ]
10
-
11
- if False:
12
- model_name = st.selectbox("Select a model", model_options + ['other'])
13
-
14
- if model_name == 'other':
15
- model_name = st.text_input("Enter model name", model_options[0])
16
- else:
17
- model_name = model_options[0]
18
-
19
  @st.cache_resource
20
  def get_tokenizer(model_name):
21
  from transformers import AutoTokenizer
@@ -29,11 +15,6 @@ def get_model(model_name):
29
  print(f"Loaded model, {model.num_parameters():,d} parameters.")
30
  return model
31
 
32
- prompt = st.text_area("Prompt", "Rewrite this document to be more clear and concise.")
33
- doc = st.text_area("Document", placeholder="Paste your document here.")
34
- updated_doc = st.text_area("Updated Doc", placeholder="Your edited document. Leave this blank to use your original document.")
35
-
36
-
37
  def get_spans_local(prompt, doc, updated_doc):
38
  import torch
39
 
@@ -76,37 +57,3 @@ def get_spans_local(prompt, doc, updated_doc):
76
  ))
77
  length_so_far += len(token)
78
  return spans
79
-
80
- def get_highlights_api(prompt, doc, updated_doc):
81
- # Make a request to the API. prompt and doc are query parameters:
82
- # https://tools.kenarnold.org/api/highlights?prompt=Rewrite%20this%20document&doc=This%20is%20a%20document
83
- # The response is a JSON array
84
- import requests
85
- response = requests.get("https://tools.kenarnold.org/api/highlights", params=dict(prompt=prompt, doc=doc, updated_doc=updated_doc))
86
- return response.json()['highlights']
87
-
88
- if model_name == 'API':
89
- spans = get_highlights_api(prompt, doc, updated_doc)
90
- else:
91
- spans = get_spans_local(prompt, doc, updated_doc)
92
-
93
- if len(spans) < 2:
94
- st.write("No spans found.")
95
- st.stop()
96
-
97
- highest_loss = max(span['token_loss'] for span in spans[1:])
98
- for span in spans:
99
- span['loss_ratio'] = span['token_loss'] / highest_loss
100
-
101
- html_out = ''
102
- for span in spans:
103
- is_different = span['token'] != span['most_likely_token']
104
- html_out += '<span style="color: {color}" title="{title}">{orig_token}</span>'.format(
105
- color="blue" if is_different else "black",
106
- title=html.escape(span["most_likely_token"]).replace('\n', ' '),
107
- orig_token=html.escape(span["token"]).replace('\n', '<br>')
108
- )
109
- html_out = f"<p style=\"background: white;\">{html_out}</p>"
110
-
111
- st.write(html_out, unsafe_allow_html=True)
112
- st.write(pd.DataFrame(spans)[['token', 'token_loss', 'most_likely_token', 'loss_ratio']])
 
2
  import pandas as pd
3
  import html
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  @st.cache_resource
6
  def get_tokenizer(model_name):
7
  from transformers import AutoTokenizer
 
15
  print(f"Loaded model, {model.num_parameters():,d} parameters.")
16
  return model
17
 
 
 
 
 
 
18
  def get_spans_local(prompt, doc, updated_doc):
19
  import torch
20
 
 
57
  ))
58
  length_so_far += len(token)
59
  return spans