import base64 import pdfplumber from transformers import pipeline # Function to extract text from a PDF and summarize it def get_pdf_text(pdf_file): text = "" # Open the PDF file and extract text with pdfplumber.open(pdf_file) as pdf: for page in pdf.pages: text += page.extract_text() # Extract text from each page return text def display_pdf(file_path): # Read the PDF file with open(file_path, "rb") as f: data = f.read() # Convert PDF content to base64 base64_pdf = base64.b64encode(data).decode("utf-8") # Create an iframe to display the PDF pdf_display = f'' return pdf_display def split_text(text, max_length): """Split text into smaller chunks based on a specified length.""" words = text.split() chunks = [' '.join(words[i:i+max_length]) for i in range(0, len(words), max_length)] return chunks def summarize(text,max_length): summarizer = pipeline(task="summarization", model='facebook/bart-large-cnn') text_chunks = split_text(text, max_length=max_length) # Split into chunks of 500 words # Summarize each chunk and combine the results summaries = [summarizer(chunk)[0]['summary_text'] for chunk in text_chunks] # Combine the summaries into a final summary final_summary = ' '.join(summaries) return final_summary # return text_chunks[0]