Spaces:
Runtime error
Runtime error
File size: 2,852 Bytes
e84d35a 7b6ee4d e84d35a 7b6ee4d e84d35a 7b6ee4d e84d35a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
"""Process uploads."""
# pylint: disable=invalid-name, unused-import
from typing import Union
from pathlib import Path
import tempfile
import cchardet
from logzero import logger
def process_upload(upload: Union[tempfile._TemporaryFileWrapper, bytes]) -> str:
"""Process upload (fileobj or bytes(zip file: io.BytesIO further to zipfile.ZipFile)).
gr.inputs.File("file"): upload normal file
gr.inputs.File("bytes"): upload zip file
"""
if isinstance(upload, bytes):
logger.warning("Not implemented, yet, for zip file")
return "Not implemented, yet, for zip file"
try:
fpath = Path(upload.name)
except Exception as e:
logger.error("Path(upload.name) error: %s", e)
return str(e)
suffixes = [
"",
".txt",
".text",
".md",
"tsv",
]
# check .txt .md ''(no suffix)
if fpath.suffix.lower() not in suffixes:
logger.warning('suffix: [%s] not in %s', fpath.suffix, suffixes)
# return "File type not supported, yet."
try:
data = Path(upload.name).read_bytes()
except Exception as e:
logger.error("Unable to read data from %s, errors: %s", fpath, e)
data = str(e).encode()
# no data, empty file, return ""
if not data:
logger.info("empty file: %s", upload.name)
return ""
encoding = cchardet.detect(data).get("encoding")
if encoding is not None:
try:
text = fpath.read_text(encoding=encoding)
except Exception as e:
logger.error("Unable to retrieve text, error: %s", e)
text = str(e)
# return f"{upload.name} {type(upload)}\n\n{text}"
# return f"{upload.name}\n{text}"
return text
# not able to cchardet: encoding is None, docx, pdf, epub, zip etc
logger.info("Trying docx...to be implemented")
# T ODO .docx .epub .mobi .pdf etc.
_ = Path(upload.name)
msg = f"binary file: {_.stem[:-8]}{_.suffix}"
logger.warning("%s", msg)
return msg
_ = ''' # colab gradio-file-inputs-upload.ipynb
# file_to_text/process_file
def zip_to_text(file_obj):
"""
# zf = zipfile.ZipFile('german-recipes-dataset.zip')
zf = file_obj
namelist = zipfile.ZipFile.namelist(zf);
# filename = zf.open(namelist[0]);
file_contents = []
for filename in namelist:
with zf.open(filename) as fhandle:
file_contents.append(fhandle.read().decode())
"""
# fileobj is <class 'tempfile._TemporaryFileWrapper'>
# gr.inputs.File("bytes")
if isinstance(file_obj, bytes):
data = file_obj.decode()
return f"{type(file_obj)}\n{dir(file_obj)}\n{data}"
# "file"/gr.inputs.File("file") file_obj.name: /tmp/READMEzm8hc5ze.md
data = Path(file_obj.name).read_bytes()
return f"{file_obj.name} {type(file_obj)}\n{dir(file_obj)} \n{data}"
# '''
|