import os import re import streamlit as st import google.generativeai as genai from dotenv import load_dotenv from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders import PyPDFLoader from langchain.docstore.document import Document from langchain import PromptTemplate from langchain_google_genai import ChatGoogleGenerativeAI # Loading Google Gemini API Key from Environment Variables load_dotenv() genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) # Display user Error, Warning or Success Message def fn_display_user_messages(lv_text, lv_type, mv_processing_message): """Display user Info, Error, Warning or Success Message""" if lv_type == "Success": with mv_processing_message.container(): st.success(lv_text) elif lv_type == "Error": with mv_processing_message.container(): st.error(lv_text) elif lv_type == "Warning": with mv_processing_message.container(): st.warning(lv_text) else: with mv_processing_message.container(): st.info(lv_text) # Upload pdf file into 'pdf-data' folder if it does not exist def fn_upload_pdf(mv_pdf_input_file, mv_processing_message): """Upload pdf file into 'pdf-data' folder if it does not exist""" lv_file_name = mv_pdf_input_file.name if not os.path.exists("pdf-data"): os.makedirs("pdf-data") lv_temp_file_path = os.path.join("pdf-data",lv_file_name) if os.path.exists(lv_temp_file_path): print("Step1: File already available") fn_display_user_messages("Step1: File already available","Warning", mv_processing_message) else: with open(lv_temp_file_path,"wb") as lv_file: lv_file.write(mv_pdf_input_file.getbuffer()) print("Step1: PDF uploaded successfully at -> " + lv_temp_file_path) fn_display_user_messages("Step1: PDF uploaded successfully at -> " + lv_temp_file_path, "Info", mv_processing_message) # Extract uploaded pdf data def fn_extract_pdf_data(mv_pdf_input_file, mv_processing_message): """Extract uploaded pdf data""" lv_temp_pdf_file_path = os.path.join("pdf-data",mv_pdf_input_file.name) # -- Loading PDF Data lv_pdf_loader = PyPDFLoader(lv_temp_pdf_file_path) lv_pdf_content = lv_pdf_loader.load() # -- Define patterns with flexibility pattern1 = r"(\w+)-\n(\w+)" # Match hyphenated words separated by a line break pattern2 = r"(?