{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\spn\\anaconda3\\envs\\capstone\\Lib\\site-packages\\torchvision\\io\\image.py:13: UserWarning: Failed to load image Python extension: '[WinError 127] The specified procedure could not be found'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", " warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[2024-06-10 23:30:49,190] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[2024-06-10 23:30:49,544] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.\n", "[NeMo W 2024-06-10 23:30:52 nemo_logging:393] Could not import NeMo NLP collection which is required for speech translation model.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[NeMo I 2024-06-10 23:31:08 nemo_logging:381] Tokenizer SentencePieceTokenizer initialized with 1024 tokens\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n", " Train config : \n", " manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-train-all.json\n", " sample_rate: 16000\n", " batch_size: 16\n", " shuffle: true\n", " num_workers: 8\n", " pin_memory: true\n", " use_start_end_token: false\n", " trim_silence: false\n", " max_duration: 16.7\n", " min_duration: 0.1\n", " is_tarred: false\n", " tarred_audio_filepaths: null\n", " shuffle_n: 2048\n", " bucketing_strategy: fully_randomized\n", " bucketing_batch_size: null\n", " \n", "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n", " Validation config : \n", " manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-dev-clean.json\n", " sample_rate: 16000\n", " batch_size: 16\n", " shuffle: false\n", " use_start_end_token: false\n", " num_workers: 8\n", " pin_memory: true\n", " \n", "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n", " Test config : \n", " manifest_filepath: null\n", " sample_rate: 16000\n", " batch_size: 16\n", " shuffle: false\n", " use_start_end_token: false\n", " num_workers: 8\n", " pin_memory: true\n", " \n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[NeMo I 2024-06-10 23:31:08 nemo_logging:381] PADDING: 0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n", "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[NeMo I 2024-06-10 23:31:16 nemo_logging:381] Model EncDecCTCModelBPE was successfully restored from C:\\Users\\spn\\.cache\\huggingface\\hub\\models--nvidia--parakeet-ctc-0.6b\\snapshots\\097ffc5b027beabc73acb627def2d1d278e774e9\\parakeet-ctc-0.6b.nemo.\n" ] } ], "source": [ "from models.nllb import nllb\n", "#from models.TTS_utils import xtts_v2\n", "from models.parakeet import parakeet_ctc_model\n", "from models.es_fastconformer import stt_es_model\n", "model_nllb, tokinizer_nllb = nllb()\n", "#xtts_v2_model = xtts_v2()\n", "parakeet = parakeet_ctc_model()\n", "#sst = stt_es_model()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing audio_segments\\segment_0.wav...\n", "Processing segment...\n", "0.021454915\n", "Noise reduction done!\n", "Noise removed. Time: 0.06042814254760742\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6909654da05f4b0a88458139a9b37d6d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Transcribing: 0%| | 0/1 [00:00