Spaces:
Running
Running
import os | |
import nest_asyncio | |
from llama_parse import LlamaParse | |
from dotenv import load_dotenv | |
from fastapi import UploadFile | |
from fastapi.responses import JSONResponse | |
from script.get_metadata import Metadata | |
load_dotenv() | |
nest_asyncio.apply() | |
def parse_journal(content: bytes, file_name: str): | |
"""Parse the journal using LlamaParse.""" | |
try: | |
# Initialize the parser | |
parser = LlamaParse( | |
api_key=os.getenv("LLAMA_PARSE_API_KEY"), | |
result_type="markdown", | |
# use_vendor_multimodal_model=True, | |
# vendor_multimodal_model_name="openai-gpt-4o-mini", | |
) | |
# Load and process the document | |
llama_parse_documents = parser.load_data( | |
content, extra_info={"file_name": file_name} | |
) | |
return llama_parse_documents | |
except Exception as e: | |
return JSONResponse(status_code=400, content=f"Error processing file: {e}") | |
async def upload_file(reference, file: UploadFile): | |
try: | |
# Read the binary content of the uploaded file once | |
content = await file.read() | |
# Parse the journal | |
parsed_documents = parse_journal(content, file.filename) | |
# Extract metadata | |
# metadata_dict = await extract_metadata(content) | |
# print("Metadata Dictionary : \n\n", metadata_dict) | |
metadata_gen = Metadata(reference) | |
documents_with_metadata = metadata_gen.apply_metadata(parsed_documents) | |
# document_with_metadata = | |
print("Document with Metadata : \n\n", documents_with_metadata) | |
print("Banyak documents : \n", len(documents_with_metadata)) | |
# Return both parsed documents and metadata | |
return documents_with_metadata | |
except Exception as e: | |
return JSONResponse(status_code=500, content=f"Error processing file: {e}") |