Spaces:
Running
Running
File size: 1,850 Bytes
1bdfad3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
import nest_asyncio
from llama_parse import LlamaParse
from dotenv import load_dotenv
from fastapi import UploadFile
from fastapi.responses import JSONResponse
from script.get_metadata import Metadata
load_dotenv()
nest_asyncio.apply()
def parse_journal(content: bytes, file_name: str):
"""Parse the journal using LlamaParse."""
try:
# Initialize the parser
parser = LlamaParse(
api_key=os.getenv("LLAMA_PARSE_API_KEY"),
result_type="markdown",
# use_vendor_multimodal_model=True,
# vendor_multimodal_model_name="openai-gpt-4o-mini",
)
# Load and process the document
llama_parse_documents = parser.load_data(
content, extra_info={"file_name": file_name}
)
return llama_parse_documents
except Exception as e:
return JSONResponse(status_code=400, content=f"Error processing file: {e}")
async def upload_file(reference, file: UploadFile):
try:
# Read the binary content of the uploaded file once
content = await file.read()
# Parse the journal
parsed_documents = parse_journal(content, file.filename)
# Extract metadata
# metadata_dict = await extract_metadata(content)
# print("Metadata Dictionary : \n\n", metadata_dict)
metadata_gen = Metadata(reference)
documents_with_metadata = metadata_gen.apply_metadata(parsed_documents)
# document_with_metadata =
print("Document with Metadata : \n\n", documents_with_metadata)
print("Banyak documents : \n", len(documents_with_metadata))
# Return both parsed documents and metadata
return documents_with_metadata
except Exception as e:
return JSONResponse(status_code=500, content=f"Error processing file: {e}") |