{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install sounddevice scipy torch transformers lang_trans nltk tqdm pyquran" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from os import path\n", "import sounddevice as sd\n", "import scipy.io.wavfile as wav\n", "import torch\n", "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n", "from lang_trans.arabic import buckwalter\n", "from nltk import edit_distance\n", "from tqdm import tqdm\n", "import pyquran as q" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def record():\n", " fs = 16000 # Sample rate\n", " seconds = 5 # Duration of recording\n", " print(\"Recording...\")\n", " myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=1)\n", " sd.wait() # Wait until recording is finished\n", " print(\"Finished recording.\")\n", " return fs , myrecording[:,0]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor():\n", " global loaded_model, loaded_processor\n", " loaded_model = Wav2Vec2ForCTC.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\").eval()\n", " loaded_processor = Wav2Vec2Processor.from_pretrained(\"Nuwaisir/Quran_speech_recognizer\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def load_elgeish_xlsr_53_model_and_processor():\n", " global loaded_model, loaded_processor\n", " loaded_model = Wav2Vec2ForCTC.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\").eval()\n", " loaded_processor = Wav2Vec2Processor.from_pretrained(\"elgeish/wav2vec2-large-xlsr-53-arabic\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def predict(single):\n", " inputs = loaded_processor(single[\"speech\"], sampling_rate=16000, return_tensors=\"pt\", padding=True)\n", " with torch.no_grad():\n", " predicted = torch.argmax(loaded_model(inputs.input_values).logits, dim=-1)\n", " predicted[predicted == -100] = loaded_processor.tokenizer.pad_token_id # see fine-tuning script\n", " pred_1 = loaded_processor.tokenizer.batch_decode(predicted)[0]\n", " single[\"predicted\"] = buckwalter.untrans(pred_1)\n", " return single" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def last_para_str(taskeel=False):\n", " quran_string = ''\n", " for i in range (78, 115):\n", " quran_string += ' '.join(q.quran.get_sura(i, with_tashkeel=taskeel,basmalah=False))\n", " quran_string += ' '\n", " return quran_string\n", "\n", "def find_match_2(q_str, s, spaces, threshhold = 10):\n", " len_q = len(q_str)\n", " len_s = len(s)\n", " min_dist = 1000000000\n", " min_dist_pos = []\n", " for i in tqdm(spaces):\n", " j = i+1\n", " k = j + len_s + len_s // 3\n", " if k > len_q:\n", " break\n", " dist = edit_distance(q_str[j:k],s)\n", " if dist < min_dist:\n", " min_dist = dist\n", " min_dist_pos = [j]\n", " elif dist == min_dist:\n", " min_dist_pos.append(j)\n", " return min_dist, min_dist_pos\n", "\n", "def find_all_index(s, ch):\n", " return [i for i, ltr in enumerate(s) if ltr == ch]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "last_para = last_para_str(taskeel=True)\n", "last_para_spaces = find_all_index(last_para,' ')\n", "last_para_spaces.insert(0, -1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def pipeline():\n", " fs, myrecording = record()\n", " single_example = {\n", " \"speech\": myrecording,\n", " \"sampling_rate\": fs,\n", " }\n", " predicted = predict(single_example)\n", " print(predicted[\"predicted\"])\n", " dist,poses = find_match_2(last_para, predicted['predicted'], spaces=last_para_spaces)\n", " print(\"distance:\",dist)\n", " print(\"number of matches:\", len(poses))\n", " for i in poses:\n", " print(last_para[i:i+200],'\\n')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load the elgeish_xlsr_53 model" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# load_elgeish_xlsr_53_model_and_processor()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Quran fine-tuned elgeish_xlsr_53 model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "load_Quran_fine_tuned_elgeish_xlsr_53_model_and_processor()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recording...\n", "Finished recording.\n", "لِإِلَا فِ قْرَايشِ إِلَا فِيهِ\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|█████████▉| 2304/2309 [00:03<00:00, 587.76it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "distance: 23\n", "number of matches: 1\n", "لِإِيلَفِ قُرَيْشٍ إِلَفِهِمْ رِحْلَةَ الشِّتَاءِ وَالصَّيْفِ فَلْيَعْبُدُوا رَبَّ هَذَا الْبَيْتِ الَّذِى أَطْعَمَهُم مِّن جُوعٍ وَءَامَنَهُم مِّنْ خَوْفٍ أَرَءَيْتَ الَّذِى يُكَذِّبُ بِالدِّينِ فَذَ \n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# Recite after running this cell. The first 5 seconds will capture your audio\n", "pipeline()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "35541def04ad193058c9b5b3afd24560c7277f209ee76d36789dee7d6c5bcde6" }, "kernelspec": { "display_name": "Python 3.10.2 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }