import io
import os
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from starlette.responses import FileResponse
from starlette.middleware.cors import CORSMiddleware
from PIL import Image
from pdftoword import convertPDFtoWORD
from model import inference
app = FastAPI()
origins = ["http://localhost:3000"] # Replace with your frontend origin URL
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
@app.post("/upload")
async def extract_table_data(image: UploadFile = File(...)):
# return f"table ocr is disabled 😔"
try:
# Read image data
image_data = await image.read()
# Open image in memory
image = Image.open(io.BytesIO(image_data))
image = image.convert("RGB")
#rgb_img.save('output.jpg')
#image = Image.open('output.jpg')
table_fram= inference(image)
if table_fram.empty:
return "
💡 the image has no tables 💡
"
return table_fram.to_html(escape=True,border=1,index=False).replace('\n', '')
except Exception as e:
# Handle and log exceptions appropriately
print(f"Error processing image: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@app.post("/convert")
async def convert_pdf(docxFile: UploadFile = File(...)):
uploaded_file = docxFile
try:
if not uploaded_file.content_type.startswith("application/pdf"):
raise HTTPException(415, detail="Unsupported file format. Please upload a PDF file.")
# Create uploads directory if it doesn't exist
os.makedirs("uploads", exist_ok=True)
# Save the uploaded file
pdf_file_path = os.path.join("uploads", uploaded_file.filename)
with open(pdf_file_path, "wb+") as file_object:
file_object.write(uploaded_file.file.read())
# Process the PDF
docx_path = convertPDFtoWORD(pdf_file_path)
# remove the uploaded pdf
os.unlink(pdf_file_path)
return FileResponse(docx_path, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", filename="converted_document.docx")
except FileNotFoundError as e:
# Handle case where conversion fails (e.g., missing converter)
return JSONResponse({"error": "Conversion failed. Please check the converter or file."}, status_code=500)
except Exception as e:
# Catch any unexpected errors
return JSONResponse({"error": f"An unexpected error occurred: {str(e)}"}, status_code=500)