import streamlit as st from topics import TopicModelling import mdforest import utils import os col1, mid, col2 = st.columns([30,5,20]) with col1: st.title("Welcome to Embeddr") st.markdown("This is a demo of _one of the many_ use cases for an embedding of all your notes. This application lets you find **common ideas** between any two notes.") st.markdown("You can upload two markdown files and the application will find the common ideas between them. It will generate insights based on the common ideas.") st.markdown("**I will be building a better embedding model soon.** Stay tuned for updates. This is just a demo of what is possible with a good embedding model.") with col2: st.markdown("### [Sign up for updates](https://embeddr.my.canva.site/)") st.image("media/qrcode.png") st.markdown("---") st.markdown("## Drop in two documents and get insights between them.") col3, mid2, col4 = st.columns([40,5,40]) with col3: st.markdown("### Drop the first document") file1 = st.file_uploader("Upload a file", type=["md", "txt"], key="first") with col4: st.markdown("### Drop the second document") file2 = st.file_uploader("Upload a file", type=["md", "txt"], key="second") topics = {} results = {} embedder = utils.load_model() nlp = utils.load_nlp() if not os.path.exists("./prompter/"): os.mkdir("./prompter/") if file1 is not None and file2 is not None: input_text1 = file1.read().decode("utf-8") input_text2 = file2.read().decode("utf-8") cleaned_text1 = mdforest.clean_markdown(input_text1) cleaned_text2 = mdforest.clean_markdown(input_text2) st.title("Generating insights") with st.spinner('Generating insights...'): insight1 = TopicModelling(cleaned_text1) insight2 = TopicModelling(cleaned_text2) keywords1, concepts1 = insight1.generate_topics() topics['insight1'] = [keywords1, concepts1] keywords2, concepts2 = insight2.generate_topics() topics['insight2'] = [keywords2, concepts2] with st.spinner("Flux capacitor is fluxing..."): clutered = utils.cluster_based_on_topics(nlp, embedder, cleaned_text1, cleaned_text2, num_clusters=3) with st.spinner("Polishing up"): results = utils.generate_insights(topics, file1.name, file2.name, cleaned_text1, cleaned_text2, clutered) st.success("Done!") st.title("Insights generated") st.markdown("### The following insights are common to both documents.") for result in results: with st.expander(result["name"]): st.write(result["description"]) st.markdown("Related Concepts:") for insight in result["concepts"]: st.markdown(f" - {insight}")