Spaces:

simplexico
/

legal-ai-actions

Running on CPU Upgrade

App Files Files Community

JMuscatello commited on Apr 3, 2023

Commit

6d70e63

•

1 Parent(s): 916cbfe

Add custom model+display

Browse files

Files changed (1) hide show

pages/5_🗂_Organise_Demo.py +65 -13

pages/5_🗂_Organise_Demo.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import joblib
 import pandas as pd
 import plotly.express as px
@@ -39,6 +41,7 @@ This demo shows how AI can be used to organise contracts.
 We've trained a model to group contracts into similar types.
 The plot below shows a sample set of contracts that have been automatically grouped together.
 Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
 \n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
 """)
 st.write("**👈 Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
@@ -56,23 +59,23 @@ def load_dataset():
     df = pd.read_json(DATA_FILENAME)
     return df
-def get_transform_and_predictions(model, df):
-    X = [text[:500] for text in df['text'].to_list()]
     y = model.predict(X)
     X_transform = model[:2].transform(X)
     return X_transform, y
-with st.spinner('⚙️ Loading model...'):
-    cuad_tfidf_umap_kmeans = load_model()
-    cuad_df = load_dataset()
-    X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, cuad_df)
     fig = px.scatter_3d(
-        x=X_transform[:,0],
-        y=X_transform[:,1],
-        z=X_transform[:,2],
-        color=[str(y_i) for y_i in y], hover_name=cuad_df['filename'].to_list())
     fig.update_layout(
         legend=dict(
@@ -85,8 +88,57 @@ with st.spinner('⚙️ Loading model...'):
         width=1100,
         height=900
     )
-    st.plotly_chart(fig, use_container_width=True, height=1600)
 add_email_signup_form()
 add_footer()

 import os
 import joblib
+from copy import deepcopy
 import pandas as pd
 import plotly.express as px
 We've trained a model to group contracts into similar types.
 The plot below shows a sample set of contracts that have been automatically grouped together.
 Each point in the plot represents how the model interprets a contract, the closer together a pair of points are, the more similar they appear to the model.
+Similar documents are grouped by color.
 \n**TIP:** Hover over each point to see the filename of the contract. Groups can be added or removed by clicking on the symbol in the plot legend.
 """)
 st.write("**👈 Upload your own contracts on the left (as .txt files)** and hit the button **Organise Data** to see how your own contracts can be grouped together")
     df = pd.read_json(DATA_FILENAME)
     return df
+def get_transform_and_predictions(model, X):
     y = model.predict(X)
     X_transform = model[:2].transform(X)
     return X_transform, y
+def generate_plot(X, y, filenames):
     fig = px.scatter_3d(
+        x=X[:,0],
+        y=X[:,1],
+        z=X[:,2],
+        color=[str(y_i) for y_i in y], hover_name=filenames)
+    fig.update_traces(
+        marker_size=8,
+        marker_line=dict(width=2),
+        selector=dict(mode='markers')
+    )
     fig.update_layout(
         legend=dict(
         width=1100,
         height=900
     )
+    return fig
+uploaded_files = st.sidebar.file_uploader("Select contracts to organise ", accept_multiple_files=True)
+button = st.sidebar.button('Organise Contracts', type='primary', use_container_width=True)
+with st.container():
+    with st.spinner('⚙️ Loading model...'):
+        cuad_tfidf_umap_kmeans = load_model()
+        cuad_df = load_dataset()
+        X = [text[:500] for text in cuad_df['text'].to_list()]
+        filenames = cuad_df['filename'].to_list()
+        X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X)
+        fig = generate_plot(X_transform, y, filenames)
+        figure = st.plotly_chart(fig, use_container_width=True)
+    if button:
+        figure.empty()
+        with st.spinner('⚙️ Training model...'):
+            if not uploaded_files or not len(uploaded_files) > 1:
+                st.write(
+                    "Please add at least two contracts"
+                )
+            else:
+                if len(uploaded_files) < 10:
+                    n_clusters = 3
+                else:
+                    n_clusters = 8
+                X_train = [uploaded_file.read()[:500] for uploaded_file in uploaded_files]
+                filenames = [uploaded_file.name for uploaded_file in uploaded_files]
+                tfidf_umap_kmeans = deepcopy(cuad_tfidf_umap_kmeans)
+                tfidf_umap_kmeans.set_params(kmeans__n_clusters=4)
+                tfidf_umap_kmeans.fit(X_train)
+                X_transform, y = get_transform_and_predictions(cuad_tfidf_umap_kmeans, X_train)
+                fig = generate_plot(X_transform, y, filenames)
+                st.write("**Your organised contracts:**")
+                st.plotly_chart(fig, use_container_width=True)
 add_email_signup_form()
 add_footer()