Spaces:

cdactvm
/

Hindi_ASR

Running

File size: 3,341 Bytes

f63b5a2

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "bb78ac37-de4f-407a-8fd5-1a269fd937c9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
     ]
    }
   ],
   "source": [
    "# Import necessary libraries and filter warnings\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "import nbimporter\n",
    "import os\n",
    "import re\n",
    "import numpy as np\n",
    "import torchaudio\n",
    "from transformers import pipeline\n",
    "from text2int import text_to_int\n",
    "from isNumber import is_number\n",
    "from Text2List import text_to_list\n",
    "from convert2list import convert_to_list\n",
    "from processDoubles import process_doubles\n",
    "from replaceWords import replace_words\n",
    "transcriber = pipeline(task=\"automatic-speech-recognition\", model=\"cdactvm/w2v-bert-2.0-hindi_v1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "02b787e8-6d08-4351-a830-7f7cae7f8243",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7860\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import gradio as gr\n",
    "\n",
    "def transcribe(audio):\n",
    "    # # Process the audio file\n",
    "    transcript = transcriber(audio)\n",
    "    text_value = transcript['text']\n",
    "    print(text_value)\n",
    "    processd_doubles=process_doubles(text_value)\n",
    "    converted_to_list=convert_to_list(processd_doubles,text_to_list())\n",
    "    replaced_words = replace_words(converted_to_list)\n",
    "    converted_text=text_to_int(replaced_words)\n",
    "    return converted_text\n",
    "\n",
    "\n",
    "demo = gr.Interface(\n",
    "    transcribe,\n",
    "    gr.Audio(sources=\"microphone\", type=\"filepath\"),\n",
    "    \"text\",\n",
    ")\n",
    "\n",
    "demo.launch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "756c0b55-17b4-4aa0-baac-d8f1c4b003df",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}