import gradio as gr from pdf2image import convert_from_path import pdfplumber from docx import Document def convert_pdf_to_image(file): images = convert_from_path(file) return images def extract_text_from_pdf(file): text = "" with pdfplumber.open(file) as pdf: for page in pdf.pages: text += page.extract_text() + "\n" return text def extract_text_from_docx(file): text = "" doc = Document(file.name) for paragraph in doc.paragraphs: text += paragraph.text + "\n" return text pdf_to_img = gr.Interface(convert_pdf_to_image, gr.File(), gr.Gallery(), api_name="pdf_to_img") pdf_to_text = gr.Interface(extract_text_from_pdf, gr.File(), gr.Textbox(placeholder="Extracted text will appear here"), api_name="pdf_to_text") docx_to_text = gr.Interface(extract_text_from_docx, gr.File(), gr.Textbox(placeholder="Extracted text from DOCX will appear here"), api_name="docx_to_text") demo = gr.TabbedInterface([pdf_to_img, pdf_to_text, docx_to_text], ["PDF to Image", "Extract PDF Text", "Extract DOCX Text"]) demo.launch(debug=True)