import PyPDF2 from docx import Document import io async def extract_text_from_pdf(pdf_data): with io.BytesIO(pdf_data) as pdf_file: pdf_reader = PyPDF2.PdfReader(pdf_file) text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text += page.extract_text() return text async def extract_text_from_docx(docx_data): doc = Document(io.BytesIO(docx_data)) text = "" for para in doc.paragraphs: text += para.text + "\n" return text async def extract_text_from_attachment(filename, data): if filename.endswith('.pdf'): return await extract_text_from_pdf(data) elif filename.endswith('.docx'): return await extract_text_from_docx(data) else: # Add handling for other document types if needed return "Unsupported document type"