Spaces:
Running
Running
dsmultimedika
commited on
Commit
•
19556b1
1
Parent(s):
d0ede85
fix: improve llamaparse
Browse files- service/llamaparse.py +0 -57
service/llamaparse.py
CHANGED
@@ -153,60 +153,3 @@ class LlamaParseWithS3(LlamaParse):
|
|
153 |
raise RuntimeError(nest_asyncio_msg)
|
154 |
else:
|
155 |
raise e
|
156 |
-
|
157 |
-
async def _aget_json(
|
158 |
-
self, job_id, file_path: FileInput, extra_info: Optional[dict] = None
|
159 |
-
) -> List[dict]:
|
160 |
-
"""Load data from the input path."""
|
161 |
-
try:
|
162 |
-
if self.verbose:
|
163 |
-
print("Started parsing the file under job_id %s" % job_id)
|
164 |
-
result = await self._get_job_result(job_id, "json")
|
165 |
-
result["job_id"] = job_id
|
166 |
-
|
167 |
-
if not isinstance(file_path, (bytes, BufferedIOBase)):
|
168 |
-
result["file_path"] = str(file_path)
|
169 |
-
|
170 |
-
return [result]
|
171 |
-
except Exception as e:
|
172 |
-
file_repr = file_path if isinstance(file_path, str) else "<bytes/buffer>"
|
173 |
-
print(f"Error while parsing the file '{file_repr}':", e)
|
174 |
-
if self.ignore_errors:
|
175 |
-
return []
|
176 |
-
else:
|
177 |
-
raise e
|
178 |
-
|
179 |
-
async def aget_json(
|
180 |
-
self,
|
181 |
-
file_path: Union[List[FileInput], FileInput],
|
182 |
-
extra_info: Optional[dict] = None,
|
183 |
-
) -> List[dict]:
|
184 |
-
"""Load data from the input path."""
|
185 |
-
if isinstance(file_path, (str, Path, bytes, BufferedIOBase)):
|
186 |
-
# return await self._aget_json(file_path, extra_info=extra_info)
|
187 |
-
return await self._aget_json(
|
188 |
-
job_id="cda0870a-b896-4140-84ea-1565e1aa1565",
|
189 |
-
file_path=file_path,
|
190 |
-
extra_info=extra_info,
|
191 |
-
)
|
192 |
-
elif isinstance(file_path, list):
|
193 |
-
jobs = [self._aget_json(f, extra_info=extra_info) for f in file_path]
|
194 |
-
try:
|
195 |
-
results = await run_jobs(
|
196 |
-
jobs,
|
197 |
-
workers=self.num_workers,
|
198 |
-
desc="Parsing files",
|
199 |
-
show_progress=self.show_progress,
|
200 |
-
)
|
201 |
-
|
202 |
-
# return flattened results
|
203 |
-
return [item for sublist in results for item in sublist]
|
204 |
-
except RuntimeError as e:
|
205 |
-
if nest_asyncio_err in str(e):
|
206 |
-
raise RuntimeError(nest_asyncio_msg)
|
207 |
-
else:
|
208 |
-
raise e
|
209 |
-
else:
|
210 |
-
raise ValueError(
|
211 |
-
"The input file_path must be a string or a list of strings."
|
212 |
-
)
|
|
|
153 |
raise RuntimeError(nest_asyncio_msg)
|
154 |
else:
|
155 |
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|