import streamlit as st st.set_page_config(layout="wide") from annotated_text import annotated_text, annotation import fitz import os import chromadb import uuid from pathlib import Path os.environ['OPENAI_API_KEY'] = os.environ['OPEN_API_KEY'] st.title("Contracts Summary ") import pandas as pd from langchain.retrievers import BM25Retriever, EnsembleRetriever from langchain.schema import Document from langchain.vectorstores import Chroma from langchain.embeddings import HuggingFaceEmbeddings import spacy # Load the English model from SpaCy nlp = spacy.load("en_core_web_md") def util_upload_file_and_return_list_docs(uploaded_files): #util_del_cwd() list_docs = [] list_save_path = [] for uploaded_file in uploaded_files: save_path = Path(os.getcwd(), uploaded_file.name) with open(save_path, mode='wb') as w: w.write(uploaded_file.getvalue()) #print('save_path:', save_path) docs = fitz.open(save_path) list_docs.append(docs) list_save_path.append(save_path) return(list_docs, list_save_path) def util_get_list_page_and_passage(list_docs, list_save_path): #page_documents = [] documents = [] for ind_doc, docs in enumerate(list_docs): text = '' for txt_index, txt_page in enumerate(docs): text = text + txt_page.get_text() documents.append(text) return(documents) documents = [] def get_summary_single_doc(text): from langchain.llms import OpenAI from langchain.chains.summarize import load_summarize_chain from langchain.text_splitter import CharacterTextSplitter from langchain.prompts import PromptTemplate from langchain.llms import OpenAI from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler LLM_KEY=os.environ.get("OPEN_API_KEY") text_splitter = CharacterTextSplitter( separator="\n", chunk_size=3000, chunk_overlap=20 ) #create the documents from list of texts texts = text_splitter.create_documents([text]) prompt_template = """Write a concise summary of the following: {text} CONCISE SUMMARY:""" prompt = PromptTemplate.from_template(prompt_template) refine_template = ( "Your job is to produce a final summary with key learnings\n" "We have provided an existing summary up to a certain point: {existing_answer}\n" "We have the opportunity to refine the existing summary" "(only if needed) with detailed context below.\n" "------------\n" "{text}\n" "------------\n" "Given the new context, refine the original summary" "If the context isn't useful, return the original summary." ) refine_prompt = PromptTemplate.from_template(refine_template) #Define the LLM # here we are using OpenAI's ChatGPT from langchain.chat_models import ChatOpenAI model_name = "gpt-3.5-turbo" llm=ChatOpenAI(temperature=0, openai_api_key=LLM_KEY, model_name=model_name) refine_chain = load_summarize_chain( llm, chain_type="refine", question_prompt=prompt, refine_prompt=refine_prompt, return_intermediate_steps=True, ) refine_outputs = refine_chain({'input_documents': texts}) return(refine_outputs['output_text']) with st.form("my_form"): multi = '''1. Download and Upload contract (PDF) . e.g. https://www.barc.gov.in/tenders/GCC-LPS.pdf e.g. https://www.montrosecounty.net/DocumentCenter/View/823/Sample-Construction-Contract ''' st.markdown(multi) multi = '''2. Press Summary .''' st.markdown(multi) multi = ''' ** Attempt is made for summary ** \n ''' st.markdown(multi) #uploaded_file = st.file_uploader("Choose a file") list_docs = [] list_save_path = [] uploaded_files = st.file_uploader("Choose file(s)", accept_multiple_files=True) submitted = st.form_submit_button("Summary") if submitted and (uploaded_files is not None): list_docs, list_save_path = util_upload_file_and_return_list_docs(uploaded_files) documents = util_get_list_page_and_passage(list_docs, list_save_path) for index, item in enumerate(documents): st.write('Summary' + str(index+1) + ' :: ') st.write(get_summary_single_doc(item))