{ "cells": [ { "cell_type": "code", "execution_count": 30, "id": "b52e9a66-a8e9-4f56-91fd-8564b5b636fc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "जीरो एक दो तीन चार पांच छह सात आठ नौ दस जीरो एक दो तीन चार पांच\n" ] } ], "source": [ "# import nbimporter\n", "import nbimporter\n", "from Text2List import text_to_list\n", "def convert_to_list(text, text_list):\n", " matched_words = []\n", " unmatched_text = '' # To accumulate unmatched characters\n", "\n", " # Sort text_list by length in descending order to prioritize longest matches first\n", " text_list_sorted = sorted(text_list, key=len, reverse=True)\n", "\n", " while text:\n", " matched = False\n", " for word in text_list_sorted:\n", " if text.startswith(word):\n", " # Add any accumulated unmatched text before appending the matched word\n", " if unmatched_text:\n", " matched_words.append(unmatched_text)\n", " unmatched_text = '' # Reset unmatched text accumulator\n", "\n", " matched_words.append(word)\n", " text = text[len(word):] # Remove the matched part from text\n", " matched = True\n", " break\n", "\n", " if not matched:\n", " # Accumulate unmatched characters\n", " unmatched_text += text[0]\n", " text = text[1:]\n", "\n", " # If there's any remaining unmatched text, add it to the result\n", " if unmatched_text:\n", " matched_words.append(unmatched_text)\n", "\n", " # Join matched words and unmatched text with a space\n", " result = ' '.join(matched_words)\n", " return result\n", " \n", "text = \"जीरोएकदोतीनचारपांचछहसातआठनौदसजीरोएकदोतीनचारपांच\"\n", "\n", "if __name__==\"__main__\":\n", " converted=convert_to_list(text, text_to_list())\n", " print(converted)" ] }, { "cell_type": "code", "execution_count": 33, "id": "f6655a7c-7481-4a73-a2e6-5327f589bb8b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "जीरो तीन तीन चार र\n" ] } ], "source": [ "# # import nbimporter\n", "# import nbimporter\n", "# from Text2List import text_to_list\n", "# def convert_to_list(text, text_list):\n", "# matched_words = []\n", "# unmatched_text = '' # To accumulate unmatched characters\n", "\n", "# # Sort text_list by length in descending order to prioritize longest matches first\n", "# text_list_sorted = sorted(text_list, key=len, reverse=True)\n", "\n", "# while text:\n", "# matched = False\n", "# for word in text_list_sorted:\n", "# if word in text:\n", "# # Add any accumulated unmatched text before appending the matched word\n", "# if unmatched_text:\n", "# matched_words.append(unmatched_text)\n", "# unmatched_text = '' # Reset unmatched text accumulator\n", "\n", "# matched_words.append(word)\n", "# text = text[len(word):] # Remove the matched part from text\n", "# matched = True\n", "# break\n", "\n", "# if not matched:\n", "# # Accumulate unmatched characters\n", "# unmatched_text += text[0]\n", "# text = text[1:]\n", "\n", "# # If there's any remaining unmatched text, add it to the result\n", "# if unmatched_text:\n", "# matched_words.append(unmatched_text)\n", "\n", "# # Join matched words and unmatched text with a space\n", "# result = ' '.join(matched_words)\n", "# return result\n", " \n", "# text = \"जीरोएकदोतीनचार\"\n", "\n", "# if __name__==\"__main__\":\n", "# converted=convert_to_list(text, text_to_list())\n", "# print(converted)" ] }, { "cell_type": "code", "execution_count": null, "id": "26b725cd-d14f-4d8a-9829-99a7b9a5eeb3", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 5 }