Uwais commited on
Commit
60d2d8a
β€’
1 Parent(s): 50b327c

updating organise demo to take pdfs and some design changes

Browse files
pages/1_🏷_Label_Clause_Demo.py CHANGED
@@ -43,8 +43,6 @@ st.write("""
43
  This demo shows how AI can be used to label text.
44
  We've trained an AI model to label a clause by its clause type.
45
  """)
46
- st.write("**πŸ‘ˆ Enter a clause on the left** and hit the button **Label Clause** to see the demo in action")
47
-
48
 
49
  @st.cache(allow_output_mutation=True)
50
  def load_model():
@@ -61,8 +59,9 @@ def get_prediction_prob(text):
61
  return y_pred, y_probs
62
 
63
 
64
- text = st.sidebar.text_area(label='Enter Clause Text', value=EXAMPLE_TEXT, height=250)
65
- button = st.sidebar.button('**Label Clause**', type='primary', use_container_width=True)
 
66
 
67
  with st.spinner('βš™οΈ Loading model...'):
68
  model = load_model()
 
43
  This demo shows how AI can be used to label text.
44
  We've trained an AI model to label a clause by its clause type.
45
  """)
 
 
46
 
47
  @st.cache(allow_output_mutation=True)
48
  def load_model():
 
59
  return y_pred, y_probs
60
 
61
 
62
+ st.markdown('### πŸ–Š Enter clause text')
63
+ text = st.text_area(label='**Enter Clause Text**', label_visibility='collapsed', value=EXAMPLE_TEXT, height=100)
64
+ button = st.button('**Label Clause**', type='primary', use_container_width=True)
65
 
66
  with st.spinner('βš™οΈ Loading model...'):
67
  model = load_model()
pages/2_🏷_Label_Contract_Demo.py CHANGED
@@ -67,8 +67,6 @@ st.write("""
67
  This demo shows how AI can be used to label text.
68
  We've trained an AI model to label a contract by its contract type.
69
  """)
70
- st.write("**πŸ‘ˆ Enter a contract on the left** and hit the button **Label Contract** to see the demo in action")
71
-
72
 
73
  @st.cache(allow_output_mutation=True)
74
  def load_model():
@@ -90,8 +88,9 @@ with st.spinner('βš™οΈ Loading model...'):
90
 
91
  classes = [s.title() for s in model.classes_]
92
 
93
- text = st.sidebar.text_area('Enter Contract Text', value=EXAMPLE_TEXT, height=250)
94
- button = st.sidebar.button('Label Contract', type='primary', use_container_width=True)
 
95
 
96
  if button:
97
  text = text[:250]
 
67
  This demo shows how AI can be used to label text.
68
  We've trained an AI model to label a contract by its contract type.
69
  """)
 
 
70
 
71
  @st.cache(allow_output_mutation=True)
72
  def load_model():
 
88
 
89
  classes = [s.title() for s in model.classes_]
90
 
91
+ st.markdown("### πŸ–Š Enter contract text")
92
+ text = st.text_area('Enter Contract Text', label_visibility='collapsed', value=EXAMPLE_TEXT, height=250)
93
+ button = st.button('Label Contract', type='primary', use_container_width=True)
94
 
95
  if button:
96
  text = text[:250]
pages/3_⛏_Extract_Demo.py CHANGED
@@ -48,8 +48,6 @@ st.write("""
48
  This demo shows how AI can be used to extract information from text.
49
  We've trained an AI model to extract key pieces of information from a contract recital.
50
  """)
51
- st.write("**πŸ‘ˆ Enter a contract recital on the left** and hit the button **Extract Data** to see the demo in action")
52
-
53
 
54
  @st.cache(allow_output_mutation=True)
55
  def load_model():
@@ -57,9 +55,9 @@ def load_model():
57
  nlp = spacy.load('model-best')
58
  return nlp
59
 
60
-
61
- text = st.sidebar.text_area('Enter Clause Text', value=EXAMPLE_TEXT, height=250)
62
- button = st.sidebar.button('Extract Data', type='primary', use_container_width=True)
63
 
64
  with st.spinner('βš™οΈ Loading model...'):
65
  nlp = load_model()
 
48
  This demo shows how AI can be used to extract information from text.
49
  We've trained an AI model to extract key pieces of information from a contract recital.
50
  """)
 
 
51
 
52
  @st.cache(allow_output_mutation=True)
53
  def load_model():
 
55
  nlp = spacy.load('model-best')
56
  return nlp
57
 
58
+ st.markdown('### πŸ–Š Enter a contract recital')
59
+ text = st.text_area('Enter Clause Text', label_visibility='collapsed', value=EXAMPLE_TEXT, height=100)
60
+ button = st.button('Extract Data', type='primary', use_container_width=True)
61
 
62
  with st.spinner('βš™οΈ Loading model...'):
63
  nlp = load_model()
pages/4_πŸ”—_Compare_Demo.py CHANGED
@@ -31,7 +31,6 @@ add_logo_to_sidebar()
31
 
32
  st.title('πŸ”— Compare Demo')
33
  st.write("""This demo shows how AI can be used to compare passages of text.""")
34
- st.write("**πŸ‘ˆ Enter two passages of text on the left** and hit the button **Compare** to see the demo in action")
35
 
36
  with st.spinner('βš™οΈ Loading model...'):
37
  nlp = load_model()
@@ -41,10 +40,15 @@ State of Delaware without regard to its conflicts of law provisions."""
41
 
42
  EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
43
 
44
- text_1 = st.sidebar.text_area('Enter a passage of text', value=EXAMPLE_TEXT_1, height=150, key='input1')
45
- text_2 = st.sidebar.text_area('Enter a second passage of text', value=EXAMPLE_TEXT_2, height=150, key='input2')
 
 
 
 
 
46
 
47
- button = st.sidebar.button('Compare', type='primary', use_container_width=True)
48
 
49
 
50
  def get_tokens(doc):
 
31
 
32
  st.title('πŸ”— Compare Demo')
33
  st.write("""This demo shows how AI can be used to compare passages of text.""")
 
34
 
35
  with st.spinner('βš™οΈ Loading model...'):
36
  nlp = load_model()
 
40
 
41
  EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
42
 
43
+ col1, col2 = st.columns(2)
44
+ with col1:
45
+ st.markdown('### πŸ–Š Enter a passage of text')
46
+ text_1 = st.text_area('Enter a passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_1, height=100, key='input1')
47
+ with col2:
48
+ st.markdown('### πŸ–Š Enter a second passage of text')
49
+ text_2 = st.text_area('Enter a second passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_2, height=100, key='input2')
50
 
51
+ button = st.button('Compare', type='primary', use_container_width=True)
52
 
53
 
54
  def get_tokens(doc):
pages/5_πŸ—‚_Organise_Demo.py CHANGED
@@ -1,8 +1,10 @@
1
  import os
 
 
2
  import joblib
3
 
4
  from copy import deepcopy
5
-
6
  import pandas as pd
7
  import plotly.express as px
8
 
@@ -33,18 +35,19 @@ st.set_page_config(
33
  )
34
 
35
  add_logo_to_sidebar()
36
- st.sidebar.success("πŸ‘† Select a demo above.")
37
 
38
  st.title('πŸ—‚ Organise Demo')
39
  st.write("""
40
- This demo shows how AI can be used to organise contracts.
41
- We've trained a model to group contracts into similar types.
42
  The plot below shows a sample set of contracts that have been automatically grouped together.
43
  Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
44
  Similar documents are grouped by color.
45
  \n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
46
  """)
47
- st.write("**πŸ‘ˆ Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
 
 
48
 
49
  @st.cache(allow_output_mutation=True)
50
  def load_model():
@@ -53,24 +56,27 @@ def load_model():
53
  )
54
  return model
55
 
 
56
  @st.cache(allow_output_mutation=True)
57
  def load_dataset():
58
  snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
59
  df = pd.read_json(DATA_FILENAME)
60
  return df
61
 
 
62
  def get_transform_and_predictions(model, X):
63
  y = model.predict(X)
64
  X_transform = model[:2].transform(X)
65
  return X_transform, y
66
 
 
67
  def generate_plot(X, y, filenames):
68
  fig = px.scatter_3d(
69
- x=X[:,0],
70
- y=X[:,1],
71
- z=X[:,2],
72
  color=[str(y_i) for y_i in y], hover_name=filenames)
73
-
74
  fig.update_traces(
75
  marker_size=8,
76
  marker_line=dict(width=2),
@@ -91,6 +97,7 @@ def generate_plot(X, y, filenames):
91
 
92
  return fig
93
 
 
94
  @st.cache(allow_output_mutation=True)
95
  def prepare_figure(model, df):
96
  X = [text[:500] for text in df['text'].to_list()]
@@ -102,6 +109,7 @@ def prepare_figure(model, df):
102
 
103
  return fig
104
 
 
105
  @st.cache()
106
  def prepare_page():
107
  model = load_model()
@@ -117,31 +125,43 @@ def prepare_page():
117
  return fig, model
118
 
119
 
120
- uploaded_files = st.sidebar.file_uploader("Select contracts to organise ", accept_multiple_files=True)
 
 
121
 
122
- button = st.sidebar.button('Organise Contracts', type='primary', use_container_width=True)
123
 
124
  with st.spinner('βš™οΈ Loading model...'):
125
  fig, cuad_tfidf_umap_kmeans = prepare_page()
126
  figure = st.plotly_chart(fig, use_container_width=True)
127
 
128
- if button:
129
- figure.empty()
130
 
131
- with st.spinner('βš™οΈ Training model...'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
- if not uploaded_files or not len(uploaded_files) > 2:
134
- st.write(
135
- "**Please add at least three contracts**"
136
- )
137
- else:
138
  if len(uploaded_files) < 10:
139
  n_clusters = 3
140
  else:
141
  n_clusters = 8
142
-
143
- X_train = [uploaded_file.read()[:500] for uploaded_file in uploaded_files]
144
- filenames = [uploaded_file.name for uploaded_file in uploaded_files]
145
 
146
  tfidf_umap_kmeans = deepcopy(cuad_tfidf_umap_kmeans)
147
  tfidf_umap_kmeans.set_params(kmeans__n_clusters=n_clusters)
@@ -149,14 +169,15 @@ if button:
149
 
150
  X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X_train)
151
 
152
- fig = generate_plot(X_transform, y, filenames)
 
 
 
 
153
 
154
- st.write("**Your organised contracts:**")
155
 
156
- st.plotly_chart(fig, use_container_width=True)
157
-
158
  add_email_signup_form()
159
 
160
  add_footer()
161
 
162
- streamlit_analytics.stop_tracking(unsafe_password=os.environ["ANALYTICS_PASSWORD"])
 
1
  import os
2
+ from io import StringIO
3
+
4
  import joblib
5
 
6
  from copy import deepcopy
7
+ from pypdf import PdfReader
8
  import pandas as pd
9
  import plotly.express as px
10
 
 
35
  )
36
 
37
  add_logo_to_sidebar()
 
38
 
39
  st.title('πŸ—‚ Organise Demo')
40
  st.write("""
41
+ This demo shows how AI can be used to organise a collection of texts.
42
+ We've trained a model to group documents into similar types.
43
  The plot below shows a sample set of contracts that have been automatically grouped together.
44
  Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
45
  Similar documents are grouped by color.
46
  \n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
47
  """)
48
+
49
+ st.info("πŸ‘ˆ Upload your own documents on the left (as .txt or .pdf files) to see how your own documents can be organised using AI.")
50
+
51
 
52
  @st.cache(allow_output_mutation=True)
53
  def load_model():
 
56
  )
57
  return model
58
 
59
+
60
  @st.cache(allow_output_mutation=True)
61
  def load_dataset():
62
  snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
63
  df = pd.read_json(DATA_FILENAME)
64
  return df
65
 
66
+
67
  def get_transform_and_predictions(model, X):
68
  y = model.predict(X)
69
  X_transform = model[:2].transform(X)
70
  return X_transform, y
71
 
72
+
73
  def generate_plot(X, y, filenames):
74
  fig = px.scatter_3d(
75
+ x=X[:, 0],
76
+ y=X[:, 1],
77
+ z=X[:, 2],
78
  color=[str(y_i) for y_i in y], hover_name=filenames)
79
+
80
  fig.update_traces(
81
  marker_size=8,
82
  marker_line=dict(width=2),
 
97
 
98
  return fig
99
 
100
+
101
  @st.cache(allow_output_mutation=True)
102
  def prepare_figure(model, df):
103
  X = [text[:500] for text in df['text'].to_list()]
 
109
 
110
  return fig
111
 
112
+
113
  @st.cache()
114
  def prepare_page():
115
  model = load_model()
 
125
  return fig, model
126
 
127
 
128
+ uploaded_files = st.sidebar.file_uploader("Upload your documents", accept_multiple_files=True,
129
+ type=['pdf', 'txt'],
130
+ help="Upload your own documents. Don't worry we don't store any data.")
131
 
132
+ # button = st.sidebar.button('Organise Contracts', type='primary', use_container_width=True)
133
 
134
  with st.spinner('βš™οΈ Loading model...'):
135
  fig, cuad_tfidf_umap_kmeans = prepare_page()
136
  figure = st.plotly_chart(fig, use_container_width=True)
137
 
 
 
138
 
139
+ if uploaded_files:
140
+ figure.empty()
141
+ filenames = []
142
+ X_train = []
143
+ if len(uploaded_files) < 5:
144
+ st.error('### πŸ’” Please upload more than 4 files.')
145
+ else:
146
+ with st.spinner('βš™οΈ Training model...'):
147
+ for uploaded_file in uploaded_files:
148
+ print(uploaded_file.name)
149
+ if '.pdf' in uploaded_file.name.lower():
150
+ reader = PdfReader(uploaded_file)
151
+ page_texts = [page.extract_text() for page in reader.pages]
152
+ text = "\n".join(page_texts)
153
+
154
+ if '.txt' in uploaded_file.name.lower():
155
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
156
+ text = stringio.read()
157
+
158
+ X_train.append(text[:500])
159
+ filenames.append(uploaded_file.name)
160
 
 
 
 
 
 
161
  if len(uploaded_files) < 10:
162
  n_clusters = 3
163
  else:
164
  n_clusters = 8
 
 
 
165
 
166
  tfidf_umap_kmeans = deepcopy(cuad_tfidf_umap_kmeans)
167
  tfidf_umap_kmeans.set_params(kmeans__n_clusters=n_clusters)
 
169
 
170
  X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X_train)
171
 
172
+ fig = generate_plot(X_transform, y, filenames)
173
+
174
+ st.markdown("## πŸ—‚ Your Organised Documents")
175
+
176
+ st.plotly_chart(fig, use_container_width=True)
177
 
 
178
 
 
 
179
  add_email_signup_form()
180
 
181
  add_footer()
182
 
183
+ streamlit_analytics.stop_tracking(unsafe_password=os.environ["ANALYTICS_PASSWORD"])
requirements.txt CHANGED
@@ -35,6 +35,7 @@ pyarrow==11.0.0
35
  pydeck==0.8.0
36
  Pygments==2.14.0
37
  Pympler==1.0.1
 
38
  pyrsistent==0.19.3
39
  python-dateutil==2.8.2
40
  pytz==2022.7.1
 
35
  pydeck==0.8.0
36
  Pygments==2.14.0
37
  Pympler==1.0.1
38
+ pypdf==3.7.1
39
  pyrsistent==0.19.3
40
  python-dateutil==2.8.2
41
  pytz==2022.7.1
utils.py CHANGED
@@ -52,12 +52,14 @@ def add_share_to_twitter_button():
52
 
53
 
54
  def add_footer():
55
- st.markdown("""
56
  ### πŸ™‹β€β™‚οΈ Interested in building out your own tailored Legal AI solutions?
57
  - 🌐 Check out our [website](https://simplexico.ai)
58
  - πŸ“ž Book a call with [us](https://calendly.com/uwais-iqbal/discovery-call)
59
  - βœ‰οΈ Send us an [email](mailto:[email protected])
60
-
 
 
61
  #### πŸ™Œ Follow Us on Social Media - [πŸ₯ Twitter](https://twitter.com/_simplexico) | [πŸ’Ό LinkedIn](https://www.linkedin.com/company/simplexico/?viewAsMember=true)
62
  """)
63
 
 
52
 
53
 
54
  def add_footer():
55
+ st.info("""
56
  ### πŸ™‹β€β™‚οΈ Interested in building out your own tailored Legal AI solutions?
57
  - 🌐 Check out our [website](https://simplexico.ai)
58
  - πŸ“ž Book a call with [us](https://calendly.com/uwais-iqbal/discovery-call)
59
  - βœ‰οΈ Send us an [email](mailto:[email protected])
60
+ """)
61
+
62
+ st.success("""
63
  #### πŸ™Œ Follow Us on Social Media - [πŸ₯ Twitter](https://twitter.com/_simplexico) | [πŸ’Ό LinkedIn](https://www.linkedin.com/company/simplexico/?viewAsMember=true)
64
  """)
65
 
🏑_Home.py CHANGED
@@ -23,7 +23,7 @@ add_logo_to_sidebar()
23
 
24
  st.title("πŸ‘‹ Welcome - Legal AI Demos from simplexico!")
25
 
26
- st.sidebar.success("πŸ‘† Select a demo above.")
27
 
28
  st.markdown(
29
  """
@@ -40,13 +40,13 @@ st.markdown(
40
  - πŸ”Ž **Find** - Using AI to **find** relevant information from a collection of texts
41
  - ✍️ **Draft** - Using AI to **draft** text
42
  - πŸ“ **Summarise** - Using AI to **summarise** text
43
-
44
- πŸ“’ FYI - These demos are to help you understand AI better. The AI models have not been optimised for prediction performance.
45
-
46
- ### πŸ‘ˆ Select a demo from the sidebar to see some examples of what Legal AI can do!
47
-
48
  """)
49
 
 
 
 
 
 
50
  add_email_signup_form()
51
 
52
  st.markdown(
 
23
 
24
  st.title("πŸ‘‹ Welcome - Legal AI Demos from simplexico!")
25
 
26
+ # st.sidebar.success("πŸ‘† Select a demo above.")
27
 
28
  st.markdown(
29
  """
 
40
  - πŸ”Ž **Find** - Using AI to **find** relevant information from a collection of texts
41
  - ✍️ **Draft** - Using AI to **draft** text
42
  - πŸ“ **Summarise** - Using AI to **summarise** text
 
 
 
 
 
43
  """)
44
 
45
+ st.warning(" πŸ“’ These demos are to help you understand AI better. The AI models have not been optimised for prediction performance. "
46
+ "Read more about what went into making these demos in our [blog post](https://www.simplexico.ai/blog/legal-ai-demos-intro).")
47
+
48
+ st.info("#### πŸ‘ˆ Select a demo from the sidebar to see some examples of what Legal AI can do!")
49
+
50
  add_email_signup_form()
51
 
52
  st.markdown(