Spaces:

Hushh
/

hushh-valet-chat

Sleeping

Upload 17 files

d1a66a2 verified 7 months ago

No virus

889 Bytes

	import PyPDF2
	from docx import Document
	import io

	async def extract_text_from_pdf(pdf_data):
	with io.BytesIO(pdf_data) as pdf_file:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text()
	return text

	async def extract_text_from_docx(docx_data):
	doc = Document(io.BytesIO(docx_data))
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text

	async def extract_text_from_attachment(filename, data):
	if filename.endswith('.pdf'):
	return await extract_text_from_pdf(data)
	elif filename.endswith('.docx'):
	return await extract_text_from_docx(data)
	else:
	# Add handling for other document types if needed
	return "Unsupported document type"