import io import os from fastapi import FastAPI, File, HTTPException, UploadFile from fastapi.responses import JSONResponse from starlette.responses import FileResponse from starlette.middleware.cors import CORSMiddleware from PIL import Image from pdftoword import convertPDFtoWORD from model import inference app = FastAPI() origins = ["http://localhost:3000"] # Replace with your frontend origin URL app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allows all origins allow_credentials=True, allow_methods=["*"], # Allows all methods allow_headers=["*"], # Allows all headers ) @app.post("/upload") async def extract_table_data(image: UploadFile = File(...)): # return f"table ocr is disabled 😔" try: # Read image data image_data = await image.read() # Open image in memory image = Image.open(io.BytesIO(image_data)) image = image.convert("RGB") #rgb_img.save('output.jpg') #image = Image.open('output.jpg') table_fram= inference(image) if table_fram.empty: return "

💡 the image has no tables 💡

" return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '') except Exception as e: # Handle and log exceptions appropriately print(f"Error processing image: {e}") raise HTTPException(status_code=500, detail="Internal server error") @app.post("/convert") async def convert_pdf(docxFile: UploadFile = File(...)): uploaded_file = docxFile try: if not uploaded_file.content_type.startswith("application/pdf"): raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.") # Create uploads directory if it doesn't exist os.makedirs("uploads", exist_ok=True) # Save the uploaded file pdf_file_path = os.path.join("uploads", uploaded_file.filename) with open(pdf_file_path, "wb+") as file_object: file_object.write(uploaded_file.file.read()) # Process the PDF docx_path = convertPDFtoWORD(pdf_file_path) # remove the uploaded pdf os.unlink(pdf_file_path) return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx") except FileNotFoundError as e: # Handle case where conversion fails (e.g., missing converter) return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500) except Exception as e: # Catch any unexpected errors return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)