Spaces:

Hushh
/

hushh-valet-chat

Sleeping

File size: 889 Bytes

d1a66a2

import PyPDF2
from docx import Document
import io

async def extract_text_from_pdf(pdf_data):
    with io.BytesIO(pdf_data) as pdf_file:
        pdf_reader = PyPDF2.PdfReader(pdf_file)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
        return text

async def extract_text_from_docx(docx_data):
    doc = Document(io.BytesIO(docx_data))
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

async def extract_text_from_attachment(filename, data):
    if filename.endswith('.pdf'):
        return await extract_text_from_pdf(data)
    elif filename.endswith('.docx'):
        return await extract_text_from_docx(data)
    else:
        # Add handling for other document types if needed
        return "Unsupported document type"