Handwritten Text Character Recognition Task " \ + "Text Detection + Text Rearrangement + Text Recognition" \ + "For More Information" \ + "GitBlog |" \ + "Source code
" + +#Source code +#examples = [['zidane.jpg'], ['bus.jpg']] +#examples=examples, +gr.Interface(fn=HCR, inputs="image", outputs = "text", title=title, description=description, article=article, analytics_enabled=False).launch( + debug=True) \ No newline at end of file diff --git a/directories.py b/directories.py new file mode 100644 index 0000000000000000000000000000000000000000..8f4bdf94940a981a6f02a8324b96b0dd7abd13a9 --- /dev/null +++ b/directories.py @@ -0,0 +1,11 @@ +home_dir = "/HCR" +yolo_dir = "/HCR/TextDetection/" +input_img = "/HCR/TextDetection/cookie/user_input.jpg" +detect_model_dir = "/runs/wordDetection/weights/best.pt" +cropped_img_folder_name = "user_output" +cropped_img_path = "/runs/detect/" + cropped_img_folder_name +folder_path = cropped_img_path + "/crops/word" +DBSCAN_dir = "/HCR/TextRearrange" +recog_dir = "/HCR/TextRecognition" +recog_model_dir='/HCR/TextRecognition/best_accuracy_s/best_accuracy_s.pth' +txt_file_path = "/HCR/TextRecognition/log_demo_result.txt" \ No newline at end of file diff --git a/process.py b/process.py new file mode 100644 index 0000000000000000000000000000000000000000..736e209185a4caf722e101968f476f65e586f614 --- /dev/null +++ b/process.py @@ -0,0 +1,76 @@ +import os +import shutil +import subprocess +from PIL import Image +import directories as Dir + +def clearDir(): + + #/text_detection/cookie/user_input + #shutil.rmtree('/cookie') + #os.remove("/cookie/user_input.jpg") + + #cropped_img_path = "/runs/detect/" + cropped_img_folder_name + shutil.rmtree(Dir.cropped_img_path) #'/runs/detect/user_output' + + #txt_file_path = "/HCR/TextRecognition/log_demo_result.txt" + os.remove(Dir.txt_file_path) + +def textDetection(im): + + #change dir to yolo folder + #yolo_dir = "/HCR/TextDetection/" + subprocess.call('cd'+ Dir.yolo_dir, shell=True) + + #transfrom ndarray type to PIL type + im = Image.fromarray(im) + + # save input image to cookie folder + subprocess.call('cd cookie', shell=True) + im.save("user_input.jpg", 'JPEG') + + #yolo_dir = "/HCR/TextDetection/" + subprocess.call('cd'+ Dir.yolo_dir, shell=True) + + # (Shell) run detect.py to get cropped word images + subprocess.call(['python','detect.py', + #User Input Data : /text_detection/cookie + '--source','/cookie', + #Text Detection Model : /runs/wordDetection/weights/best.pt + '--weights', Dir.detect_model_dir, + '--conf','0.25', + #Output Images Save Directory /runs/detect/user_output + '--name', Dir.cropped_img_folder_name, + '--save-crop', + '--save-conf']) + + #g = (size / max(im.size)) # gain + #im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize + + #results = model(im) # inference + #results.render() # updates results.imgs with boxes and labels + #return Image.fromarray(results.imgs[0]) + +def textRearrange(): + subprocess.call('cd' + Dir.DBSCAN_dir, shell=True) + subprocess.call(['python','DBSCAN.py']) + +def textRecognition(): + #%cd /content/drive/MyDrive/KITA/Text/lmdb/deep-text-recognition-benchmark + subprocess.call('cd '+Dir.recog_dir, shell=True) + #!CUDA_VISIBLE_DEVICES=0 python3 demo.py --Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn --image_folder /content/drive/MyDrive/KITA/Text/YOLO/runs/detect/youtube_data2/crops/word --saved_model /content/drive/MyDrive/KITA/Text/best_accuracy_s/best_accuracy_s.pth + subprocess.call('CUDA_VISIBLE_DEVICES="" python3 demo.py --Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn --image_folder ' + Dir.home_dir + Dir.cropped_img_path + '/crops/word --saved_model '+ Dir.recog_model_dir, shell=True) + +def getHcrResult(file_path):#*# + texts = "" + with open(file_path, 'r') as file: + lines = file.readlines() + for line in lines[3:]: + line = line.replace("\t","*",1) + line = line.replace(" ","*",1) + parts = line.replace(" ","") + parts2 = parts.split("*",2) + #print(len(parts2)) + texts = texts +" "+ str(parts2[1:2])[2:-2] + + return texts \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..98025810654130199001b2fffa89bc276a1108c0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,502 @@ +absl-py==1.4.0 +aiofiles==23.2.1 +aiohttp==3.9.1 +aiosignal==1.3.1 +alabaster==0.7.13 +albumentations==1.3.1 +altair==4.2.2 +annotated-types==0.6.0 +anyio==3.7.1 +appdirs==1.4.4 +argon2-cffi==23.1.0 +argon2-cffi-bindings==21.2.0 +array-record==0.5.0 +arviz==0.15.1 +astropy==5.3.4 +astunparse==1.6.3 +async-timeout==4.0.3 +atpublic==4.0 +attrs==23.1.0 +audioread==3.0.1 +autograd==1.6.2 +Babel==2.14.0 +backcall==0.2.0 +beautifulsoup4==4.11.2 +bidict==0.22.1 +bigframes==0.17.0 +bleach==6.1.0 +blinker==1.4 +blis==0.7.11 +blosc2==2.0.0 +bokeh==3.3.2 +bqplot==0.12.42 +branca==0.7.0 +build==1.0.3 +CacheControl==0.13.1 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2023.11.17 +cffi==1.16.0 +chardet==5.2.0 +charset-normalizer==3.3.2 +chex==0.1.7 +click==8.1.7 +click-plugins==1.1.1 +cligj==0.7.2 +cloudpickle==2.2.1 +cmake==3.27.9 +cmdstanpy==1.2.0 +colorama==0.4.6 +colorcet==3.0.1 +colorlover==0.3.0 +colour==0.1.5 +community==1.0.0b1 +confection==0.1.4 +cons==0.4.6 +contextlib2==21.6.0 +contourpy==1.2.0 +cryptography==41.0.7 +cufflinks==0.17.3 +cupy-cuda12x==12.2.0 +cvxopt==1.3.2 +cvxpy==1.3.2 +cycler==0.12.1 +cymem==2.0.8 +Cython==3.0.7 +dask==2023.8.1 +datascience==0.17.6 +db-dtypes==1.2.0 +dbus-python==1.2.18 +debugpy==1.6.6 +decorator==4.4.2 +defusedxml==0.7.1 +diskcache==5.6.3 +distributed==2023.8.1 +distro==1.7.0 +dlib==19.24.2 +dm-tree==0.1.8 +docutils==0.18.1 +dopamine-rl==4.0.6 +duckdb==0.9.2 +earthengine-api==0.1.384 +easydict==1.11 +ecos==2.0.12 +editdistance==0.6.2 +eerepr==0.0.4 +en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0-py3-none-any.whl#sha256=83276fc78a70045627144786b52e1f2728ad5e29e5e43916ec37ea9c26a11212 +entrypoints==0.4 +et-xmlfile==1.1.0 +etils==1.6.0 +etuples==0.3.9 +exceptiongroup==1.2.0 +fastai==2.7.13 +fastapi==0.108.0 +fastcore==1.5.29 +fastdownload==0.0.7 +fastjsonschema==2.19.0 +fastprogress==1.0.3 +fastrlock==0.8.2 +ffmpy==0.3.1 +filelock==3.13.1 +fiona==1.9.5 +firebase-admin==5.3.0 +Flask==2.2.5 +flatbuffers==23.5.26 +flax==0.7.5 +folium==0.14.0 +fonttools==4.47.0 +frozendict==2.3.10 +frozenlist==1.4.1 +fsspec==2023.6.0 +future==0.18.3 +gast==0.5.4 +gcsfs==2023.6.0 +GDAL==3.4.3 +gdown==4.6.6 +geemap==0.29.6 +gensim==4.3.2 +geocoder==1.38.1 +geographiclib==2.0 +geopandas==0.13.2 +geopy==2.3.0 +gin-config==0.5.0 +gitdb==4.0.11 +GitPython==3.1.40 +glob2==0.7 +google==2.0.3 +google-ai-generativelanguage==0.4.0 +google-api-core==2.11.1 +google-api-python-client==2.84.0 +google-auth==2.17.3 +google-auth-httplib2==0.1.1 +google-auth-oauthlib==1.2.0 +google-cloud-aiplatform==1.38.1 +google-cloud-bigquery==3.12.0 +google-cloud-bigquery-connection==1.12.1 +google-cloud-bigquery-storage==2.24.0 +google-cloud-core==2.3.3 +google-cloud-datastore==2.15.2 +google-cloud-firestore==2.11.1 +google-cloud-functions==1.13.3 +google-cloud-iam==2.13.0 +google-cloud-language==2.9.1 +google-cloud-resource-manager==1.11.0 +google-cloud-storage==2.8.0 +google-cloud-translate==3.11.3 +google-colab @ file:///colabtools/dist/google-colab-1.0.0.tar.gz#sha256=eb6190db7e94f83570d0663e5324b48dbc8d7ffbf066a99973922bc15318ecda +google-crc32c==1.5.0 +google-generativeai==0.3.2 +google-pasta==0.2.0 +google-resumable-media==2.7.0 +googleapis-common-protos==1.62.0 +googledrivedownloader==0.4 +gradio==4.13.0 +gradio_client==0.8.0 +graphviz==0.20.1 +greenlet==3.0.2 +grpc-google-iam-v1==0.13.0 +grpcio==1.60.0 +grpcio-status==1.48.2 +gspread==3.4.2 +gspread-dataframe==3.3.1 +gym==0.25.2 +gym-notices==0.0.8 +h11==0.14.0 +h5netcdf==1.3.0 +h5py==3.9.0 +holidays==0.39 +holoviews==1.17.1 +html5lib==1.1 +httpcore==1.0.2 +httpimport==1.3.1 +httplib2==0.22.0 +httpx==0.26.0 +huggingface-hub==0.20.1 +humanize==4.7.0 +hyperopt==0.2.7 +ibis-framework==6.2.0 +idna==3.6 +imageio==2.31.6 +imageio-ffmpeg==0.4.9 +imagesize==1.4.1 +imbalanced-learn==0.10.1 +imgaug==0.4.0 +importlib-metadata==7.0.0 +importlib-resources==6.1.1 +imutils==0.5.4 +inflect==7.0.0 +iniconfig==2.0.0 +install==1.3.5 +intel-openmp==2023.2.3 +ipyevents==2.0.2 +ipyfilechooser==0.6.0 +ipykernel==5.5.6 +ipyleaflet==0.18.1 +ipython==7.34.0 +ipython-genutils==0.2.0 +ipython-sql==0.5.0 +ipytree==0.2.2 +ipywidgets==7.7.1 +itsdangerous==2.1.2 +jax==0.4.23 +jaxlib @ https://storage.googleapis.com/jax-releases/cuda12/jaxlib-0.4.23+cuda12.cudnn89-cp310-cp310-manylinux2014_x86_64.whl#sha256=8e42000672599e7ec0ea7f551acfcc95dcdd0e22b05a1d1f12f97b56a9fce4a8 +jeepney==0.7.1 +jieba==0.42.1 +Jinja2==3.1.2 +joblib==1.3.2 +jsonpickle==3.0.2 +jsonschema==4.19.2 +jsonschema-specifications==2023.11.2 +jupyter-client==6.1.12 +jupyter-console==6.1.0 +jupyter-server==1.24.0 +jupyter_core==5.5.1 +jupyterlab-widgets==3.0.9 +jupyterlab_pygments==0.3.0 +kaggle==1.5.16 +kagglehub==0.1.4 +kaleido==0.2.1 +keras==2.15.0 +keyring==23.5.0 +kiwisolver==1.4.5 +langcodes==3.3.0 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lazy_loader==0.3 +libclang==16.0.6 +librosa==0.10.1 +lida==0.0.10 +lightgbm==4.1.0 +linkify-it-py==2.0.2 +llmx==0.0.15a0 +llvmlite==0.41.1 +lmdb==1.4.1 +locket==1.0.0 +logical-unification==0.4.6 +lxml==4.9.4 +malloy==2023.1067 +Markdown==3.5.1 +markdown-it-py==3.0.0 +MarkupSafe==2.1.3 +matplotlib==3.7.1 +matplotlib-inline==0.1.6 +matplotlib-venn==0.11.9 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +miniKanren==1.0.3 +missingno==0.5.2 +mistune==0.8.4 +mizani==0.9.3 +mkl==2023.2.0 +ml-dtypes==0.2.0 +mlxtend==0.22.0 +more-itertools==10.1.0 +moviepy==1.0.3 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +multipledispatch==1.0.0 +multitasking==0.0.11 +murmurhash==1.0.10 +music21==9.1.0 +natsort==8.4.0 +nbclassic==1.0.0 +nbclient==0.9.0 +nbconvert==6.5.4 +nbformat==5.9.2 +nest-asyncio==1.5.8 +networkx==3.2.1 +nibabel==4.0.2 +nltk==3.8.1 +notebook==6.5.5 +notebook_shim==0.2.3 +numba==0.58.1 +numexpr==2.8.8 +numpy==1.23.5 +oauth2client==4.1.3 +oauthlib==3.2.2 +opencv-contrib-python==4.8.0.76 +opencv-python==4.8.0.76 +opencv-python-headless==4.8.1.78 +openpyxl==3.1.2 +opt-einsum==3.3.0 +optax==0.1.7 +orbax-checkpoint==0.4.4 +orjson==3.9.10 +osqp==0.6.2.post8 +packaging==23.2 +pandas==1.5.3 +pandas-datareader==0.10.0 +pandas-gbq==0.19.2 +pandas-stubs==1.5.3.230304 +pandocfilters==1.5.0 +panel==1.3.6 +param==2.0.1 +parso==0.8.3 +parsy==2.1 +partd==1.4.1 +pathlib==1.0.1 +pathy==0.10.3 +patsy==0.5.4 +peewee==3.17.0 +pexpect==4.9.0 +pickleshare==0.7.5 +Pillow==9.4.0 +pip-tools==6.13.0 +platformdirs==4.1.0 +plotly==5.15.0 +plotnine==0.12.4 +pluggy==1.3.0 +polars==0.17.3 +pooch==1.8.0 +portpicker==1.5.2 +prefetch-generator==1.0.3 +preshed==3.0.9 +prettytable==3.9.0 +proglog==0.1.10 +progressbar2==4.2.0 +prometheus-client==0.19.0 +promise==2.3 +prompt-toolkit==3.0.43 +prophet==1.1.5 +proto-plus==1.23.0 +protobuf==3.20.3 +psutil==5.9.5 +psycopg2==2.9.9 +ptyprocess==0.7.0 +py-cpuinfo==9.0.0 +py4j==0.10.9.7 +pyarrow==10.0.1 +pyasn1==0.5.1 +pyasn1-modules==0.3.0 +pycocotools==2.0.7 +pycparser==2.21 +pyct==0.5.0 +pydantic==2.5.3 +pydantic_core==2.14.6 +pydata-google-auth==1.8.2 +pydot==1.4.2 +pydot-ng==2.0.0 +pydotplus==2.0.2 +PyDrive==1.3.1 +PyDrive2==1.6.3 +pydub==0.25.1 +pyerfa==2.0.1.1 +pygame==2.5.2 +Pygments==2.16.1 +PyGObject==3.42.1 +PyJWT==2.3.0 +pymc==5.7.2 +pymystem3==0.2.0 +PyOpenGL==3.1.7 +pyOpenSSL==23.3.0 +pyparsing==3.1.1 +pyperclip==1.8.2 +pyproj==3.6.1 +pyproject_hooks==1.0.0 +pyshp==2.3.1 +PySocks==1.7.1 +pytensor==2.14.2 +pytest==7.4.3 +python-apt==0.0.0 +python-box==7.1.1 +python-dateutil==2.8.2 +python-louvain==0.16 +python-multipart==0.0.6 +python-slugify==8.0.1 +python-utils==3.8.1 +pytz==2023.3.post1 +pyviz_comms==3.0.0 +PyWavelets==1.5.0 +PyYAML==6.0.1 +pyzmq==23.2.1 +qdldl==0.1.7.post0 +qudida==0.0.4 +ratelim==0.1.6 +referencing==0.32.0 +regex==2023.6.3 +requests==2.31.0 +requests-oauthlib==1.3.1 +requirements-parser==0.5.0 +rich==13.7.0 +rpds-py==0.15.2 +rpy2==3.4.2 +rsa==4.9 +safetensors==0.4.1 +scikit-image==0.19.3 +scikit-learn==1.2.2 +scipy==1.11.4 +scooby==0.9.2 +scs==3.2.4.post1 +seaborn==0.12.2 +SecretStorage==3.3.1 +semantic-version==2.10.0 +Send2Trash==1.8.2 +shapely==2.0.2 +shellingham==1.5.4 +six==1.16.0 +sklearn-pandas==2.2.0 +smart-open==6.4.0 +smmap==5.0.1 +sniffio==1.3.0 +snowballstemmer==2.2.0 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy==3.6.1 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +Sphinx==5.0.2 +sphinxcontrib-applehelp==1.0.7 +sphinxcontrib-devhelp==1.0.5 +sphinxcontrib-htmlhelp==2.0.4 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.6 +sphinxcontrib-serializinghtml==1.1.9 +SQLAlchemy==2.0.23 +sqlglot==17.16.2 +sqlparse==0.4.4 +srsly==2.4.8 +stanio==0.3.0 +starlette==0.32.0.post1 +statsmodels==0.14.1 +sympy==1.12 +tables==3.8.0 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tenacity==8.2.3 +tensorboard==2.15.1 +tensorboard-data-server==0.7.2 +tensorflow==2.15.0 +tensorflow-datasets==4.9.4 +tensorflow-estimator==2.15.0 +tensorflow-gcs-config==2.15.0 +tensorflow-hub==0.15.0 +tensorflow-io-gcs-filesystem==0.35.0 +tensorflow-metadata==1.14.0 +tensorflow-probability==0.22.0 +tensorstore==0.1.45 +termcolor==2.4.0 +terminado==0.18.0 +text-unidecode==1.3 +textblob==0.17.1 +tf-slim==1.1.0 +thinc==8.1.12 +thop==0.1.1.post2209072238 +threadpoolctl==3.2.0 +tifffile==2023.12.9 +tinycss2==1.2.1 +tokenizers==0.15.0 +toml==0.10.2 +tomli==2.0.1 +tomlkit==0.12.0 +toolz==0.12.0 +torch @ https://download.pytorch.org/whl/cu121/torch-2.1.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=0d4e8c52a1fcf5ed6cfc256d9a370fcf4360958fc79d0b08a51d55e70914df46 +torchaudio @ https://download.pytorch.org/whl/cu121/torchaudio-2.1.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=676bda4042734eda99bc59b2d7f761f345d3cde0cad492ad34e3aefde688c6d8 +torchdata==0.7.0 +torchsummary==1.5.1 +torchtext==0.16.0 +torchvision @ https://download.pytorch.org/whl/cu121/torchvision-0.16.0%2Bcu121-cp310-cp310-linux_x86_64.whl#sha256=e76e78d0ad43636c9884b3084ffaea8a8b61f21129fbfa456a5fe734f0affea9 +tornado==6.3.2 +tqdm==4.66.1 +traitlets==5.7.1 +traittypes==0.2.1 +transformers==4.35.2 +triton==2.1.0 +tweepy==4.14.0 +typer==0.9.0 +types-pytz==2023.3.1.1 +types-setuptools==69.0.0.0 +typing_extensions==4.9.0 +tzlocal==5.2 +uc-micro-py==1.0.2 +ultralytics==8.0.236 +uritemplate==4.1.1 +urllib3==2.0.7 +uvicorn==0.25.0 +vega-datasets==0.9.0 +wadllib==1.3.6 +wasabi==1.1.2 +wcwidth==0.2.12 +webcolors==1.13 +webencodings==0.5.1 +websocket-client==1.7.0 +websockets==11.0.3 +Werkzeug==3.0.1 +widgetsnbextension==3.6.6 +wordcloud==1.9.3 +wrapt==1.14.1 +xarray==2023.7.0 +xarray-einstats==0.6.0 +xgboost==2.0.3 +xlrd==2.0.1 +xxhash==3.4.1 +xyzservices==2023.10.1 +yarl==1.9.4 +yellowbrick==1.5 +yfinance==0.2.33 +zict==3.0.0 +zipp==3.17.0