Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
updating organise demo to take pdfs and some design changes
Browse files- pages/1_π·_Label_Clause_Demo.py +3 -4
- pages/2_π·_Label_Contract_Demo.py +3 -4
- pages/3_β_Extract_Demo.py +3 -5
- pages/4_π_Compare_Demo.py +8 -4
- pages/5_π_Organise_Demo.py +48 -27
- requirements.txt +1 -0
- utils.py +4 -2
- π‘_Home.py +6 -6
pages/1_π·_Label_Clause_Demo.py
CHANGED
@@ -43,8 +43,6 @@ st.write("""
|
|
43 |
This demo shows how AI can be used to label text.
|
44 |
We've trained an AI model to label a clause by its clause type.
|
45 |
""")
|
46 |
-
st.write("**π Enter a clause on the left** and hit the button **Label Clause** to see the demo in action")
|
47 |
-
|
48 |
|
49 |
@st.cache(allow_output_mutation=True)
|
50 |
def load_model():
|
@@ -61,8 +59,9 @@ def get_prediction_prob(text):
|
|
61 |
return y_pred, y_probs
|
62 |
|
63 |
|
64 |
-
|
65 |
-
|
|
|
66 |
|
67 |
with st.spinner('βοΈ Loading model...'):
|
68 |
model = load_model()
|
|
|
43 |
This demo shows how AI can be used to label text.
|
44 |
We've trained an AI model to label a clause by its clause type.
|
45 |
""")
|
|
|
|
|
46 |
|
47 |
@st.cache(allow_output_mutation=True)
|
48 |
def load_model():
|
|
|
59 |
return y_pred, y_probs
|
60 |
|
61 |
|
62 |
+
st.markdown('### π Enter clause text')
|
63 |
+
text = st.text_area(label='**Enter Clause Text**', label_visibility='collapsed', value=EXAMPLE_TEXT, height=100)
|
64 |
+
button = st.button('**Label Clause**', type='primary', use_container_width=True)
|
65 |
|
66 |
with st.spinner('βοΈ Loading model...'):
|
67 |
model = load_model()
|
pages/2_π·_Label_Contract_Demo.py
CHANGED
@@ -67,8 +67,6 @@ st.write("""
|
|
67 |
This demo shows how AI can be used to label text.
|
68 |
We've trained an AI model to label a contract by its contract type.
|
69 |
""")
|
70 |
-
st.write("**π Enter a contract on the left** and hit the button **Label Contract** to see the demo in action")
|
71 |
-
|
72 |
|
73 |
@st.cache(allow_output_mutation=True)
|
74 |
def load_model():
|
@@ -90,8 +88,9 @@ with st.spinner('βοΈ Loading model...'):
|
|
90 |
|
91 |
classes = [s.title() for s in model.classes_]
|
92 |
|
93 |
-
|
94 |
-
|
|
|
95 |
|
96 |
if button:
|
97 |
text = text[:250]
|
|
|
67 |
This demo shows how AI can be used to label text.
|
68 |
We've trained an AI model to label a contract by its contract type.
|
69 |
""")
|
|
|
|
|
70 |
|
71 |
@st.cache(allow_output_mutation=True)
|
72 |
def load_model():
|
|
|
88 |
|
89 |
classes = [s.title() for s in model.classes_]
|
90 |
|
91 |
+
st.markdown("### π Enter contract text")
|
92 |
+
text = st.text_area('Enter Contract Text', label_visibility='collapsed', value=EXAMPLE_TEXT, height=250)
|
93 |
+
button = st.button('Label Contract', type='primary', use_container_width=True)
|
94 |
|
95 |
if button:
|
96 |
text = text[:250]
|
pages/3_β_Extract_Demo.py
CHANGED
@@ -48,8 +48,6 @@ st.write("""
|
|
48 |
This demo shows how AI can be used to extract information from text.
|
49 |
We've trained an AI model to extract key pieces of information from a contract recital.
|
50 |
""")
|
51 |
-
st.write("**π Enter a contract recital on the left** and hit the button **Extract Data** to see the demo in action")
|
52 |
-
|
53 |
|
54 |
@st.cache(allow_output_mutation=True)
|
55 |
def load_model():
|
@@ -57,9 +55,9 @@ def load_model():
|
|
57 |
nlp = spacy.load('model-best')
|
58 |
return nlp
|
59 |
|
60 |
-
|
61 |
-
text = st.
|
62 |
-
button = st.
|
63 |
|
64 |
with st.spinner('βοΈ Loading model...'):
|
65 |
nlp = load_model()
|
|
|
48 |
This demo shows how AI can be used to extract information from text.
|
49 |
We've trained an AI model to extract key pieces of information from a contract recital.
|
50 |
""")
|
|
|
|
|
51 |
|
52 |
@st.cache(allow_output_mutation=True)
|
53 |
def load_model():
|
|
|
55 |
nlp = spacy.load('model-best')
|
56 |
return nlp
|
57 |
|
58 |
+
st.markdown('### π Enter a contract recital')
|
59 |
+
text = st.text_area('Enter Clause Text', label_visibility='collapsed', value=EXAMPLE_TEXT, height=100)
|
60 |
+
button = st.button('Extract Data', type='primary', use_container_width=True)
|
61 |
|
62 |
with st.spinner('βοΈ Loading model...'):
|
63 |
nlp = load_model()
|
pages/4_π_Compare_Demo.py
CHANGED
@@ -31,7 +31,6 @@ add_logo_to_sidebar()
|
|
31 |
|
32 |
st.title('π Compare Demo')
|
33 |
st.write("""This demo shows how AI can be used to compare passages of text.""")
|
34 |
-
st.write("**π Enter two passages of text on the left** and hit the button **Compare** to see the demo in action")
|
35 |
|
36 |
with st.spinner('βοΈ Loading model...'):
|
37 |
nlp = load_model()
|
@@ -41,10 +40,15 @@ State of Delaware without regard to its conflicts of law provisions."""
|
|
41 |
|
42 |
EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
|
43 |
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
-
button = st.
|
48 |
|
49 |
|
50 |
def get_tokens(doc):
|
|
|
31 |
|
32 |
st.title('π Compare Demo')
|
33 |
st.write("""This demo shows how AI can be used to compare passages of text.""")
|
|
|
34 |
|
35 |
with st.spinner('βοΈ Loading model...'):
|
36 |
nlp = load_model()
|
|
|
40 |
|
41 |
EXAMPLE_TEXT_2 = """This agreement will be governed by and must be construed in accordance with the laws of the State of Israel."""
|
42 |
|
43 |
+
col1, col2 = st.columns(2)
|
44 |
+
with col1:
|
45 |
+
st.markdown('### π Enter a passage of text')
|
46 |
+
text_1 = st.text_area('Enter a passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_1, height=100, key='input1')
|
47 |
+
with col2:
|
48 |
+
st.markdown('### π Enter a second passage of text')
|
49 |
+
text_2 = st.text_area('Enter a second passage of text', label_visibility='collapsed', value=EXAMPLE_TEXT_2, height=100, key='input2')
|
50 |
|
51 |
+
button = st.button('Compare', type='primary', use_container_width=True)
|
52 |
|
53 |
|
54 |
def get_tokens(doc):
|
pages/5_π_Organise_Demo.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import joblib
|
3 |
|
4 |
from copy import deepcopy
|
5 |
-
|
6 |
import pandas as pd
|
7 |
import plotly.express as px
|
8 |
|
@@ -33,18 +35,19 @@ st.set_page_config(
|
|
33 |
)
|
34 |
|
35 |
add_logo_to_sidebar()
|
36 |
-
st.sidebar.success("π Select a demo above.")
|
37 |
|
38 |
st.title('π Organise Demo')
|
39 |
st.write("""
|
40 |
-
This demo shows how AI can be used to organise
|
41 |
-
We've trained a model to group
|
42 |
The plot below shows a sample set of contracts that have been automatically grouped together.
|
43 |
Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
|
44 |
Similar documents are grouped by color.
|
45 |
\n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
|
46 |
""")
|
47 |
-
|
|
|
|
|
48 |
|
49 |
@st.cache(allow_output_mutation=True)
|
50 |
def load_model():
|
@@ -53,24 +56,27 @@ def load_model():
|
|
53 |
)
|
54 |
return model
|
55 |
|
|
|
56 |
@st.cache(allow_output_mutation=True)
|
57 |
def load_dataset():
|
58 |
snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
|
59 |
df = pd.read_json(DATA_FILENAME)
|
60 |
return df
|
61 |
|
|
|
62 |
def get_transform_and_predictions(model, X):
|
63 |
y = model.predict(X)
|
64 |
X_transform = model[:2].transform(X)
|
65 |
return X_transform, y
|
66 |
|
|
|
67 |
def generate_plot(X, y, filenames):
|
68 |
fig = px.scatter_3d(
|
69 |
-
x=X[:,0],
|
70 |
-
y=X[:,1],
|
71 |
-
z=X[:,2],
|
72 |
color=[str(y_i) for y_i in y], hover_name=filenames)
|
73 |
-
|
74 |
fig.update_traces(
|
75 |
marker_size=8,
|
76 |
marker_line=dict(width=2),
|
@@ -91,6 +97,7 @@ def generate_plot(X, y, filenames):
|
|
91 |
|
92 |
return fig
|
93 |
|
|
|
94 |
@st.cache(allow_output_mutation=True)
|
95 |
def prepare_figure(model, df):
|
96 |
X = [text[:500] for text in df['text'].to_list()]
|
@@ -102,6 +109,7 @@ def prepare_figure(model, df):
|
|
102 |
|
103 |
return fig
|
104 |
|
|
|
105 |
@st.cache()
|
106 |
def prepare_page():
|
107 |
model = load_model()
|
@@ -117,31 +125,43 @@ def prepare_page():
|
|
117 |
return fig, model
|
118 |
|
119 |
|
120 |
-
uploaded_files = st.sidebar.file_uploader("
|
|
|
|
|
121 |
|
122 |
-
button = st.sidebar.button('Organise Contracts', type='primary', use_container_width=True)
|
123 |
|
124 |
with st.spinner('βοΈ Loading model...'):
|
125 |
fig, cuad_tfidf_umap_kmeans = prepare_page()
|
126 |
figure = st.plotly_chart(fig, use_container_width=True)
|
127 |
|
128 |
-
if button:
|
129 |
-
figure.empty()
|
130 |
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
|
133 |
-
if not uploaded_files or not len(uploaded_files) > 2:
|
134 |
-
st.write(
|
135 |
-
"**Please add at least three contracts**"
|
136 |
-
)
|
137 |
-
else:
|
138 |
if len(uploaded_files) < 10:
|
139 |
n_clusters = 3
|
140 |
else:
|
141 |
n_clusters = 8
|
142 |
-
|
143 |
-
X_train = [uploaded_file.read()[:500] for uploaded_file in uploaded_files]
|
144 |
-
filenames = [uploaded_file.name for uploaded_file in uploaded_files]
|
145 |
|
146 |
tfidf_umap_kmeans = deepcopy(cuad_tfidf_umap_kmeans)
|
147 |
tfidf_umap_kmeans.set_params(kmeans__n_clusters=n_clusters)
|
@@ -149,14 +169,15 @@ if button:
|
|
149 |
|
150 |
X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X_train)
|
151 |
|
152 |
-
|
|
|
|
|
|
|
|
|
153 |
|
154 |
-
st.write("**Your organised contracts:**")
|
155 |
|
156 |
-
st.plotly_chart(fig, use_container_width=True)
|
157 |
-
|
158 |
add_email_signup_form()
|
159 |
|
160 |
add_footer()
|
161 |
|
162 |
-
streamlit_analytics.stop_tracking(unsafe_password=os.environ["ANALYTICS_PASSWORD"])
|
|
|
1 |
import os
|
2 |
+
from io import StringIO
|
3 |
+
|
4 |
import joblib
|
5 |
|
6 |
from copy import deepcopy
|
7 |
+
from pypdf import PdfReader
|
8 |
import pandas as pd
|
9 |
import plotly.express as px
|
10 |
|
|
|
35 |
)
|
36 |
|
37 |
add_logo_to_sidebar()
|
|
|
38 |
|
39 |
st.title('π Organise Demo')
|
40 |
st.write("""
|
41 |
+
This demo shows how AI can be used to organise a collection of texts.
|
42 |
+
We've trained a model to group documents into similar types.
|
43 |
The plot below shows a sample set of contracts that have been automatically grouped together.
|
44 |
Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
|
45 |
Similar documents are grouped by color.
|
46 |
\n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
|
47 |
""")
|
48 |
+
|
49 |
+
st.info("π Upload your own documents on the left (as .txt or .pdf files) to see how your own documents can be organised using AI.")
|
50 |
+
|
51 |
|
52 |
@st.cache(allow_output_mutation=True)
|
53 |
def load_model():
|
|
|
56 |
)
|
57 |
return model
|
58 |
|
59 |
+
|
60 |
@st.cache(allow_output_mutation=True)
|
61 |
def load_dataset():
|
62 |
snapshot_download(repo_id=DATA_REPO_ID, token=HF_TOKEN, local_dir='./', repo_type='dataset')
|
63 |
df = pd.read_json(DATA_FILENAME)
|
64 |
return df
|
65 |
|
66 |
+
|
67 |
def get_transform_and_predictions(model, X):
|
68 |
y = model.predict(X)
|
69 |
X_transform = model[:2].transform(X)
|
70 |
return X_transform, y
|
71 |
|
72 |
+
|
73 |
def generate_plot(X, y, filenames):
|
74 |
fig = px.scatter_3d(
|
75 |
+
x=X[:, 0],
|
76 |
+
y=X[:, 1],
|
77 |
+
z=X[:, 2],
|
78 |
color=[str(y_i) for y_i in y], hover_name=filenames)
|
79 |
+
|
80 |
fig.update_traces(
|
81 |
marker_size=8,
|
82 |
marker_line=dict(width=2),
|
|
|
97 |
|
98 |
return fig
|
99 |
|
100 |
+
|
101 |
@st.cache(allow_output_mutation=True)
|
102 |
def prepare_figure(model, df):
|
103 |
X = [text[:500] for text in df['text'].to_list()]
|
|
|
109 |
|
110 |
return fig
|
111 |
|
112 |
+
|
113 |
@st.cache()
|
114 |
def prepare_page():
|
115 |
model = load_model()
|
|
|
125 |
return fig, model
|
126 |
|
127 |
|
128 |
+
uploaded_files = st.sidebar.file_uploader("Upload your documents", accept_multiple_files=True,
|
129 |
+
type=['pdf', 'txt'],
|
130 |
+
help="Upload your own documents. Don't worry we don't store any data.")
|
131 |
|
132 |
+
# button = st.sidebar.button('Organise Contracts', type='primary', use_container_width=True)
|
133 |
|
134 |
with st.spinner('βοΈ Loading model...'):
|
135 |
fig, cuad_tfidf_umap_kmeans = prepare_page()
|
136 |
figure = st.plotly_chart(fig, use_container_width=True)
|
137 |
|
|
|
|
|
138 |
|
139 |
+
if uploaded_files:
|
140 |
+
figure.empty()
|
141 |
+
filenames = []
|
142 |
+
X_train = []
|
143 |
+
if len(uploaded_files) < 5:
|
144 |
+
st.error('### π Please upload more than 4 files.')
|
145 |
+
else:
|
146 |
+
with st.spinner('βοΈ Training model...'):
|
147 |
+
for uploaded_file in uploaded_files:
|
148 |
+
print(uploaded_file.name)
|
149 |
+
if '.pdf' in uploaded_file.name.lower():
|
150 |
+
reader = PdfReader(uploaded_file)
|
151 |
+
page_texts = [page.extract_text() for page in reader.pages]
|
152 |
+
text = "\n".join(page_texts)
|
153 |
+
|
154 |
+
if '.txt' in uploaded_file.name.lower():
|
155 |
+
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
|
156 |
+
text = stringio.read()
|
157 |
+
|
158 |
+
X_train.append(text[:500])
|
159 |
+
filenames.append(uploaded_file.name)
|
160 |
|
|
|
|
|
|
|
|
|
|
|
161 |
if len(uploaded_files) < 10:
|
162 |
n_clusters = 3
|
163 |
else:
|
164 |
n_clusters = 8
|
|
|
|
|
|
|
165 |
|
166 |
tfidf_umap_kmeans = deepcopy(cuad_tfidf_umap_kmeans)
|
167 |
tfidf_umap_kmeans.set_params(kmeans__n_clusters=n_clusters)
|
|
|
169 |
|
170 |
X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X_train)
|
171 |
|
172 |
+
fig = generate_plot(X_transform, y, filenames)
|
173 |
+
|
174 |
+
st.markdown("## π Your Organised Documents")
|
175 |
+
|
176 |
+
st.plotly_chart(fig, use_container_width=True)
|
177 |
|
|
|
178 |
|
|
|
|
|
179 |
add_email_signup_form()
|
180 |
|
181 |
add_footer()
|
182 |
|
183 |
+
streamlit_analytics.stop_tracking(unsafe_password=os.environ["ANALYTICS_PASSWORD"])
|
requirements.txt
CHANGED
@@ -35,6 +35,7 @@ pyarrow==11.0.0
|
|
35 |
pydeck==0.8.0
|
36 |
Pygments==2.14.0
|
37 |
Pympler==1.0.1
|
|
|
38 |
pyrsistent==0.19.3
|
39 |
python-dateutil==2.8.2
|
40 |
pytz==2022.7.1
|
|
|
35 |
pydeck==0.8.0
|
36 |
Pygments==2.14.0
|
37 |
Pympler==1.0.1
|
38 |
+
pypdf==3.7.1
|
39 |
pyrsistent==0.19.3
|
40 |
python-dateutil==2.8.2
|
41 |
pytz==2022.7.1
|
utils.py
CHANGED
@@ -52,12 +52,14 @@ def add_share_to_twitter_button():
|
|
52 |
|
53 |
|
54 |
def add_footer():
|
55 |
-
st.
|
56 |
### πββοΈ Interested in building out your own tailored Legal AI solutions?
|
57 |
- π Check out our [website](https://simplexico.ai)
|
58 |
- π Book a call with [us](https://calendly.com/uwais-iqbal/discovery-call)
|
59 |
- βοΈ Send us an [email](mailto:[email protected])
|
60 |
-
|
|
|
|
|
61 |
#### π Follow Us on Social Media - [π₯ Twitter](https://twitter.com/_simplexico) | [πΌ LinkedIn](https://www.linkedin.com/company/simplexico/?viewAsMember=true)
|
62 |
""")
|
63 |
|
|
|
52 |
|
53 |
|
54 |
def add_footer():
|
55 |
+
st.info("""
|
56 |
### πββοΈ Interested in building out your own tailored Legal AI solutions?
|
57 |
- π Check out our [website](https://simplexico.ai)
|
58 |
- π Book a call with [us](https://calendly.com/uwais-iqbal/discovery-call)
|
59 |
- βοΈ Send us an [email](mailto:[email protected])
|
60 |
+
""")
|
61 |
+
|
62 |
+
st.success("""
|
63 |
#### π Follow Us on Social Media - [π₯ Twitter](https://twitter.com/_simplexico) | [πΌ LinkedIn](https://www.linkedin.com/company/simplexico/?viewAsMember=true)
|
64 |
""")
|
65 |
|
π‘_Home.py
CHANGED
@@ -23,7 +23,7 @@ add_logo_to_sidebar()
|
|
23 |
|
24 |
st.title("π Welcome - Legal AI Demos from simplexico!")
|
25 |
|
26 |
-
st.sidebar.success("π Select a demo above.")
|
27 |
|
28 |
st.markdown(
|
29 |
"""
|
@@ -40,13 +40,13 @@ st.markdown(
|
|
40 |
- π **Find** - Using AI to **find** relevant information from a collection of texts
|
41 |
- βοΈ **Draft** - Using AI to **draft** text
|
42 |
- π **Summarise** - Using AI to **summarise** text
|
43 |
-
|
44 |
-
π’ FYI - These demos are to help you understand AI better. The AI models have not been optimised for prediction performance.
|
45 |
-
|
46 |
-
### π Select a demo from the sidebar to see some examples of what Legal AI can do!
|
47 |
-
|
48 |
""")
|
49 |
|
|
|
|
|
|
|
|
|
|
|
50 |
add_email_signup_form()
|
51 |
|
52 |
st.markdown(
|
|
|
23 |
|
24 |
st.title("π Welcome - Legal AI Demos from simplexico!")
|
25 |
|
26 |
+
# st.sidebar.success("π Select a demo above.")
|
27 |
|
28 |
st.markdown(
|
29 |
"""
|
|
|
40 |
- π **Find** - Using AI to **find** relevant information from a collection of texts
|
41 |
- βοΈ **Draft** - Using AI to **draft** text
|
42 |
- π **Summarise** - Using AI to **summarise** text
|
|
|
|
|
|
|
|
|
|
|
43 |
""")
|
44 |
|
45 |
+
st.warning(" π’ These demos are to help you understand AI better. The AI models have not been optimised for prediction performance. "
|
46 |
+
"Read more about what went into making these demos in our [blog post](https://www.simplexico.ai/blog/legal-ai-demos-intro).")
|
47 |
+
|
48 |
+
st.info("#### π Select a demo from the sidebar to see some examples of what Legal AI can do!")
|
49 |
+
|
50 |
add_email_signup_form()
|
51 |
|
52 |
st.markdown(
|