yalsaffar commited on
Commit
aa7cb02
1 Parent(s): 668cc09
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +1 -0
  2. Dockerfile +100 -0
  3. __pycache__/app.cpython-311.pyc +0 -0
  4. __pycache__/inference_functions.cpython-311.pyc +0 -0
  5. __pycache__/load_models.cpython-311.pyc +0 -0
  6. __pycache__/server.cpython-311.pyc +0 -0
  7. __pycache__/silence_removal.cpython-311.pyc +0 -0
  8. __pycache__/stream_VAD.cpython-311.pyc +0 -0
  9. __pycache__/stream_VAD2.cpython-311.pyc +0 -0
  10. __pycache__/stream_prod_main2.cpython-311.pyc +0 -0
  11. app.py +79 -0
  12. app/package-lock.json +985 -0
  13. app/package.json +19 -0
  14. app/public/app.js +99 -0
  15. app/public/index.html +35 -0
  16. app/public/styles.css +96 -0
  17. app/server.js +102 -0
  18. app/temp_wav_files/audio-1718725396714.wav +0 -0
  19. app/uploads/1/audio_2.wav +0 -0
  20. app/uploads/1/transcription_2.txt +1 -0
  21. audio_segments/readme +0 -0
  22. inference_functions.py +80 -0
  23. load_models.py +18 -0
  24. main.ipynb +395 -0
  25. main.py +79 -0
  26. main_stream.ipynb +87 -0
  27. models/TTS_utils.py +365 -0
  28. models/__init__.py +0 -0
  29. models/__pycache__/TTS_utils.cpython-311.pyc +0 -0
  30. models/__pycache__/__init__.cpython-311.pyc +0 -0
  31. models/__pycache__/__init__.cpython-38.pyc +0 -0
  32. models/__pycache__/es_fastconformer.cpython-311.pyc +0 -0
  33. models/__pycache__/nllb.cpython-311.pyc +0 -0
  34. models/__pycache__/nllb.cpython-38.pyc +0 -0
  35. models/__pycache__/noise_red.cpython-311.pyc +0 -0
  36. models/__pycache__/parakeet.cpython-311.pyc +0 -0
  37. models/__pycache__/parakeet.cpython-38.pyc +0 -0
  38. models/es_fastconformer.py +37 -0
  39. models/nllb.py +72 -0
  40. models/noise_red.py +28 -0
  41. models/parakeet.py +43 -0
  42. models/status.txt +1 -0
  43. record_per.json +1 -0
  44. record_temp.json +1 -0
  45. requirements.txt +25 -0
  46. results/readme +0 -0
  47. run.py +73 -0
  48. setup.sh +18 -0
  49. status.txt +1 -0
  50. stream_VAD.py +249 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ app/node_modules
Dockerfile ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official CUDA-enabled image from NVIDIA with CUDA 12.1
2
+ FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu20.04
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Set the environment variable to suppress interactive prompts
8
+ ENV DEBIAN_FRONTEND=noninteractive
9
+
10
+ # Install necessary OS packages and Python 3.9
11
+ RUN apt-get update && apt-get install -y \
12
+ software-properties-common \
13
+ && add-apt-repository ppa:deadsnakes/ppa \
14
+ && apt-get update && apt-get install -y \
15
+ python3.9 \
16
+ python3.9-distutils \
17
+ python3.9-venv \
18
+ python3.9-dev \
19
+ build-essential \
20
+ cmake \
21
+ libsndfile1 \
22
+ ffmpeg \
23
+ portaudio19-dev \
24
+ alsa-utils \
25
+ curl \
26
+ git \
27
+ nodejs \
28
+ npm \
29
+ && rm -rf /var/lib/apt/lists/*
30
+
31
+ # Install pip for Python 3.9
32
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
33
+
34
+ # Create a symlink for python3.9 and pip3.9
35
+ RUN ln -s /usr/bin/python3.9 /usr/bin/python
36
+ RUN ln -s /usr/local/bin/pip /usr/bin/pip
37
+
38
+ # Set CUDA_HOME environment variable
39
+ ENV CUDA_HOME=/usr/local/cuda
40
+
41
+ # Add CUDA to PATH
42
+ ENV PATH=${CUDA_HOME}/bin:${PATH}
43
+
44
+ # Optionally set LD_LIBRARY_PATH for CUDA libraries
45
+ ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
46
+
47
+ # Set environment variable for NeMo cache directory
48
+ ENV NEMO_NLP_TMP=/app/.cache
49
+
50
+ # Create cache directory
51
+ RUN mkdir -p /app/.cache
52
+
53
+ # Copy the setup script and requirements file into the container
54
+ COPY setup.sh requirements.txt /app/
55
+
56
+ # Make the setup script executable
57
+ RUN chmod +x setup.sh
58
+
59
+ # Copy the application code into the container
60
+ COPY . /app
61
+
62
+ # Copy wait-for-it script
63
+ COPY wait-for-it.sh /app/wait-for-it.sh
64
+
65
+ # Make wait-for-it script executable
66
+ RUN chmod +x /app/wait-for-it.sh
67
+
68
+ # Install dependencies
69
+ RUN pip install --upgrade pip setuptools wheel
70
+ RUN pip install pybind11
71
+ RUN pip install fasttext
72
+ RUN pip install Cython
73
+ RUN pip install pyaudio
74
+ RUN pip install fastapi uvicorn
75
+ RUN pip install uvloop
76
+
77
+ # Install PyTorch and torchaudio
78
+ RUN pip install torch==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
79
+ RUN pip install torchaudio==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
80
+
81
+ # Install the requirements
82
+ RUN pip install -r requirements.txt
83
+
84
+ # Clone and install TTS
85
+ RUN git clone https://github.com/coqui-ai/TTS/ && \
86
+ cd TTS && \
87
+ make install
88
+
89
+ # Install Node.js dependencies
90
+ RUN cd /app/app && npm install
91
+
92
+ # Expose the ports
93
+ EXPOSE 8000
94
+ EXPOSE 3000
95
+
96
+ # Set the environment variable to indicate running in Docker
97
+ ENV IN_DOCKER=True
98
+
99
+ # Run the FastAPI app and Node.js server
100
+ CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port 8000 & /app/wait-for-it.sh --url http://0.0.0.0:8000/health --strict -- node /app/app/server.js"]
__pycache__/app.cpython-311.pyc ADDED
Binary file (5.74 kB). View file
 
__pycache__/inference_functions.cpython-311.pyc ADDED
Binary file (7.07 kB). View file
 
__pycache__/load_models.cpython-311.pyc ADDED
Binary file (817 Bytes). View file
 
__pycache__/server.cpython-311.pyc ADDED
Binary file (2.21 kB). View file
 
__pycache__/silence_removal.cpython-311.pyc ADDED
Binary file (1.6 kB). View file
 
__pycache__/stream_VAD.cpython-311.pyc ADDED
Binary file (13 kB). View file
 
__pycache__/stream_VAD2.cpython-311.pyc ADDED
Binary file (13.5 kB). View file
 
__pycache__/stream_prod_main2.cpython-311.pyc ADDED
Binary file (4.17 kB). View file
 
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fastapi
2
+ import uvicorn
3
+ from fastapi import File, UploadFile, Form, HTTPException
4
+ from fastapi.responses import JSONResponse, FileResponse
5
+ from load_models import get_nllb_model_and_tokenizer, get_xtts_model
6
+ from inference_functions import translate, just_inference
7
+ import os
8
+ import torch
9
+
10
+ # Set GPU memory fraction
11
+ torch.cuda.set_per_process_memory_fraction(0.75, 0)
12
+
13
+ # Load models
14
+ model_nllb, tokenizer_nllb = get_nllb_model_and_tokenizer()
15
+ model_xtts = get_xtts_model()
16
+
17
+ app = fastapi.FastAPI()
18
+
19
+ @app.get("/health")
20
+ def health_check():
21
+ return {"status": "ok"}
22
+
23
+ @app.post("/translate/")
24
+ def translate_text(text: str = Form(...), target_lang: str = Form(...)):
25
+ translation = translate(model_nllb, tokenizer_nllb, text, target_lang)
26
+ return {"translation": translation}
27
+
28
+ @app.post("/inference/")
29
+ def inference_audio(original_path: UploadFile = File(...), text: str = Form(...), lang: str = Form(...)):
30
+ # Save the uploaded file
31
+ file_location = f"/tmp/{original_path.filename}"
32
+ with open(file_location, "wb") as file:
33
+ file.write(original_path.file.read())
34
+
35
+ output_dir = f"/tmp/generated_audio_{os.path.basename(file_location)}.wav"
36
+ torch.cuda.empty_cache()
37
+ generated_audio = just_inference(model_xtts, file_location, output_dir, text, lang)
38
+ return {"path_to_save": output_dir}
39
+
40
+ @app.post("/process-audio/")
41
+ async def process_audio(original_path: UploadFile = File(...), text: str = Form(...), lang: str = Form(...), target_lang: str = Form(...)):
42
+ print(f"original_path: {original_path.filename}")
43
+ print(f"text: {text}")
44
+ print(f"lang: {lang}")
45
+ print(f"target_lang: {target_lang}")
46
+
47
+ # Validate target language
48
+ if target_lang not in ["es", "en"]: # Use 'es' and 'en' to match the example values
49
+ print("Unsupported language")
50
+ raise HTTPException(status_code=400, detail="Unsupported language. Use 'spanish' or 'english'.")
51
+
52
+ try:
53
+ # Translate the text first
54
+ translated_text = translate(model_nllb, tokenizer_nllb, text, target_lang)
55
+ print(f"translated_text: {translated_text}")
56
+
57
+ # Save the uploaded file
58
+ file_location = f"/tmp/{original_path.filename}"
59
+ with open(file_location, "wb") as file:
60
+ file.write(original_path.file.read())
61
+
62
+ output_dir = f"/tmp/generated_audio_{os.path.basename(file_location)}.wav"
63
+ torch.cuda.empty_cache()
64
+ generated_audio = just_inference(model_xtts, file_location, output_dir, translated_text, target_lang)
65
+
66
+ return JSONResponse(content={"audio_path": output_dir, "translation": translated_text})
67
+
68
+ except Exception as e:
69
+ print(f"Error during processing: {e}")
70
+ raise HTTPException(status_code=500, detail="Error during processing")
71
+
72
+ @app.get("/download-audio/")
73
+ def download_audio(file_path: str):
74
+ if not os.path.exists(file_path):
75
+ raise HTTPException(status_code=404, detail="File not found")
76
+ return FileResponse(file_path, media_type='audio/wav', filename=os.path.basename(file_path))
77
+
78
+ if __name__ == "__main__":
79
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app/package-lock.json ADDED
@@ -0,0 +1,985 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "audio-transcription",
3
+ "version": "1.0.0",
4
+ "lockfileVersion": 3,
5
+ "requires": true,
6
+ "packages": {
7
+ "": {
8
+ "name": "audio-transcription",
9
+ "version": "1.0.0",
10
+ "license": "ISC",
11
+ "dependencies": {
12
+ "express": "^4.19.2",
13
+ "form-data": "^4.0.0",
14
+ "multer": "^1.4.5-lts.1",
15
+ "node-fetch": "^2.7.0",
16
+ "wav": "^1.0.2"
17
+ }
18
+ },
19
+ "node_modules/accepts": {
20
+ "version": "1.3.8",
21
+ "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz",
22
+ "integrity": "sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==",
23
+ "dependencies": {
24
+ "mime-types": "~2.1.34",
25
+ "negotiator": "0.6.3"
26
+ },
27
+ "engines": {
28
+ "node": ">= 0.6"
29
+ }
30
+ },
31
+ "node_modules/append-field": {
32
+ "version": "1.0.0",
33
+ "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz",
34
+ "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw=="
35
+ },
36
+ "node_modules/array-flatten": {
37
+ "version": "1.1.1",
38
+ "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz",
39
+ "integrity": "sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg=="
40
+ },
41
+ "node_modules/asynckit": {
42
+ "version": "0.4.0",
43
+ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
44
+ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
45
+ },
46
+ "node_modules/body-parser": {
47
+ "version": "1.20.2",
48
+ "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
49
+ "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
50
+ "dependencies": {
51
+ "bytes": "3.1.2",
52
+ "content-type": "~1.0.5",
53
+ "debug": "2.6.9",
54
+ "depd": "2.0.0",
55
+ "destroy": "1.2.0",
56
+ "http-errors": "2.0.0",
57
+ "iconv-lite": "0.4.24",
58
+ "on-finished": "2.4.1",
59
+ "qs": "6.11.0",
60
+ "raw-body": "2.5.2",
61
+ "type-is": "~1.6.18",
62
+ "unpipe": "1.0.0"
63
+ },
64
+ "engines": {
65
+ "node": ">= 0.8",
66
+ "npm": "1.2.8000 || >= 1.4.16"
67
+ }
68
+ },
69
+ "node_modules/buffer-alloc": {
70
+ "version": "1.2.0",
71
+ "resolved": "https://registry.npmjs.org/buffer-alloc/-/buffer-alloc-1.2.0.tgz",
72
+ "integrity": "sha512-CFsHQgjtW1UChdXgbyJGtnm+O/uLQeZdtbDo8mfUgYXCHSM1wgrVxXm6bSyrUuErEb+4sYVGCzASBRot7zyrow==",
73
+ "dependencies": {
74
+ "buffer-alloc-unsafe": "^1.1.0",
75
+ "buffer-fill": "^1.0.0"
76
+ }
77
+ },
78
+ "node_modules/buffer-alloc-unsafe": {
79
+ "version": "1.1.0",
80
+ "resolved": "https://registry.npmjs.org/buffer-alloc-unsafe/-/buffer-alloc-unsafe-1.1.0.tgz",
81
+ "integrity": "sha512-TEM2iMIEQdJ2yjPJoSIsldnleVaAk1oW3DBVUykyOLsEsFmEc9kn+SFFPz+gl54KQNxlDnAwCXosOS9Okx2xAg=="
82
+ },
83
+ "node_modules/buffer-fill": {
84
+ "version": "1.0.0",
85
+ "resolved": "https://registry.npmjs.org/buffer-fill/-/buffer-fill-1.0.0.tgz",
86
+ "integrity": "sha512-T7zexNBwiiaCOGDg9xNX9PBmjrubblRkENuptryuI64URkXDFum9il/JGL8Lm8wYfAXpredVXXZz7eMHilimiQ=="
87
+ },
88
+ "node_modules/buffer-from": {
89
+ "version": "1.1.2",
90
+ "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
91
+ "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ=="
92
+ },
93
+ "node_modules/busboy": {
94
+ "version": "1.6.0",
95
+ "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
96
+ "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
97
+ "dependencies": {
98
+ "streamsearch": "^1.1.0"
99
+ },
100
+ "engines": {
101
+ "node": ">=10.16.0"
102
+ }
103
+ },
104
+ "node_modules/bytes": {
105
+ "version": "3.1.2",
106
+ "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
107
+ "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
108
+ "engines": {
109
+ "node": ">= 0.8"
110
+ }
111
+ },
112
+ "node_modules/call-bind": {
113
+ "version": "1.0.7",
114
+ "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz",
115
+ "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==",
116
+ "dependencies": {
117
+ "es-define-property": "^1.0.0",
118
+ "es-errors": "^1.3.0",
119
+ "function-bind": "^1.1.2",
120
+ "get-intrinsic": "^1.2.4",
121
+ "set-function-length": "^1.2.1"
122
+ },
123
+ "engines": {
124
+ "node": ">= 0.4"
125
+ },
126
+ "funding": {
127
+ "url": "https://github.com/sponsors/ljharb"
128
+ }
129
+ },
130
+ "node_modules/combined-stream": {
131
+ "version": "1.0.8",
132
+ "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
133
+ "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
134
+ "dependencies": {
135
+ "delayed-stream": "~1.0.0"
136
+ },
137
+ "engines": {
138
+ "node": ">= 0.8"
139
+ }
140
+ },
141
+ "node_modules/concat-stream": {
142
+ "version": "1.6.2",
143
+ "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
144
+ "integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
145
+ "engines": [
146
+ "node >= 0.8"
147
+ ],
148
+ "dependencies": {
149
+ "buffer-from": "^1.0.0",
150
+ "inherits": "^2.0.3",
151
+ "readable-stream": "^2.2.2",
152
+ "typedarray": "^0.0.6"
153
+ }
154
+ },
155
+ "node_modules/content-disposition": {
156
+ "version": "0.5.4",
157
+ "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
158
+ "integrity": "sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==",
159
+ "dependencies": {
160
+ "safe-buffer": "5.2.1"
161
+ },
162
+ "engines": {
163
+ "node": ">= 0.6"
164
+ }
165
+ },
166
+ "node_modules/content-type": {
167
+ "version": "1.0.5",
168
+ "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
169
+ "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
170
+ "engines": {
171
+ "node": ">= 0.6"
172
+ }
173
+ },
174
+ "node_modules/cookie": {
175
+ "version": "0.6.0",
176
+ "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
177
+ "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
178
+ "engines": {
179
+ "node": ">= 0.6"
180
+ }
181
+ },
182
+ "node_modules/cookie-signature": {
183
+ "version": "1.0.6",
184
+ "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
185
+ "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ=="
186
+ },
187
+ "node_modules/core-util-is": {
188
+ "version": "1.0.3",
189
+ "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
190
+ "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ=="
191
+ },
192
+ "node_modules/debug": {
193
+ "version": "2.6.9",
194
+ "resolved": "https://registry.npmjs.org/debug/-/debug-2.6.9.tgz",
195
+ "integrity": "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==",
196
+ "dependencies": {
197
+ "ms": "2.0.0"
198
+ }
199
+ },
200
+ "node_modules/define-data-property": {
201
+ "version": "1.1.4",
202
+ "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
203
+ "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
204
+ "dependencies": {
205
+ "es-define-property": "^1.0.0",
206
+ "es-errors": "^1.3.0",
207
+ "gopd": "^1.0.1"
208
+ },
209
+ "engines": {
210
+ "node": ">= 0.4"
211
+ },
212
+ "funding": {
213
+ "url": "https://github.com/sponsors/ljharb"
214
+ }
215
+ },
216
+ "node_modules/delayed-stream": {
217
+ "version": "1.0.0",
218
+ "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
219
+ "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
220
+ "engines": {
221
+ "node": ">=0.4.0"
222
+ }
223
+ },
224
+ "node_modules/depd": {
225
+ "version": "2.0.0",
226
+ "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
227
+ "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
228
+ "engines": {
229
+ "node": ">= 0.8"
230
+ }
231
+ },
232
+ "node_modules/destroy": {
233
+ "version": "1.2.0",
234
+ "resolved": "https://registry.npmjs.org/destroy/-/destroy-1.2.0.tgz",
235
+ "integrity": "sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==",
236
+ "engines": {
237
+ "node": ">= 0.8",
238
+ "npm": "1.2.8000 || >= 1.4.16"
239
+ }
240
+ },
241
+ "node_modules/ee-first": {
242
+ "version": "1.1.1",
243
+ "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
244
+ "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow=="
245
+ },
246
+ "node_modules/encodeurl": {
247
+ "version": "1.0.2",
248
+ "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
249
+ "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
250
+ "engines": {
251
+ "node": ">= 0.8"
252
+ }
253
+ },
254
+ "node_modules/es-define-property": {
255
+ "version": "1.0.0",
256
+ "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz",
257
+ "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==",
258
+ "dependencies": {
259
+ "get-intrinsic": "^1.2.4"
260
+ },
261
+ "engines": {
262
+ "node": ">= 0.4"
263
+ }
264
+ },
265
+ "node_modules/es-errors": {
266
+ "version": "1.3.0",
267
+ "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
268
+ "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
269
+ "engines": {
270
+ "node": ">= 0.4"
271
+ }
272
+ },
273
+ "node_modules/escape-html": {
274
+ "version": "1.0.3",
275
+ "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz",
276
+ "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow=="
277
+ },
278
+ "node_modules/etag": {
279
+ "version": "1.8.1",
280
+ "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
281
+ "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==",
282
+ "engines": {
283
+ "node": ">= 0.6"
284
+ }
285
+ },
286
+ "node_modules/express": {
287
+ "version": "4.19.2",
288
+ "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
289
+ "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
290
+ "dependencies": {
291
+ "accepts": "~1.3.8",
292
+ "array-flatten": "1.1.1",
293
+ "body-parser": "1.20.2",
294
+ "content-disposition": "0.5.4",
295
+ "content-type": "~1.0.4",
296
+ "cookie": "0.6.0",
297
+ "cookie-signature": "1.0.6",
298
+ "debug": "2.6.9",
299
+ "depd": "2.0.0",
300
+ "encodeurl": "~1.0.2",
301
+ "escape-html": "~1.0.3",
302
+ "etag": "~1.8.1",
303
+ "finalhandler": "1.2.0",
304
+ "fresh": "0.5.2",
305
+ "http-errors": "2.0.0",
306
+ "merge-descriptors": "1.0.1",
307
+ "methods": "~1.1.2",
308
+ "on-finished": "2.4.1",
309
+ "parseurl": "~1.3.3",
310
+ "path-to-regexp": "0.1.7",
311
+ "proxy-addr": "~2.0.7",
312
+ "qs": "6.11.0",
313
+ "range-parser": "~1.2.1",
314
+ "safe-buffer": "5.2.1",
315
+ "send": "0.18.0",
316
+ "serve-static": "1.15.0",
317
+ "setprototypeof": "1.2.0",
318
+ "statuses": "2.0.1",
319
+ "type-is": "~1.6.18",
320
+ "utils-merge": "1.0.1",
321
+ "vary": "~1.1.2"
322
+ },
323
+ "engines": {
324
+ "node": ">= 0.10.0"
325
+ }
326
+ },
327
+ "node_modules/finalhandler": {
328
+ "version": "1.2.0",
329
+ "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
330
+ "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
331
+ "dependencies": {
332
+ "debug": "2.6.9",
333
+ "encodeurl": "~1.0.2",
334
+ "escape-html": "~1.0.3",
335
+ "on-finished": "2.4.1",
336
+ "parseurl": "~1.3.3",
337
+ "statuses": "2.0.1",
338
+ "unpipe": "~1.0.0"
339
+ },
340
+ "engines": {
341
+ "node": ">= 0.8"
342
+ }
343
+ },
344
+ "node_modules/form-data": {
345
+ "version": "4.0.0",
346
+ "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
347
+ "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
348
+ "dependencies": {
349
+ "asynckit": "^0.4.0",
350
+ "combined-stream": "^1.0.8",
351
+ "mime-types": "^2.1.12"
352
+ },
353
+ "engines": {
354
+ "node": ">= 6"
355
+ }
356
+ },
357
+ "node_modules/forwarded": {
358
+ "version": "0.2.0",
359
+ "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
360
+ "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==",
361
+ "engines": {
362
+ "node": ">= 0.6"
363
+ }
364
+ },
365
+ "node_modules/fresh": {
366
+ "version": "0.5.2",
367
+ "resolved": "https://registry.npmjs.org/fresh/-/fresh-0.5.2.tgz",
368
+ "integrity": "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==",
369
+ "engines": {
370
+ "node": ">= 0.6"
371
+ }
372
+ },
373
+ "node_modules/function-bind": {
374
+ "version": "1.1.2",
375
+ "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
376
+ "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
377
+ "funding": {
378
+ "url": "https://github.com/sponsors/ljharb"
379
+ }
380
+ },
381
+ "node_modules/get-intrinsic": {
382
+ "version": "1.2.4",
383
+ "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz",
384
+ "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==",
385
+ "dependencies": {
386
+ "es-errors": "^1.3.0",
387
+ "function-bind": "^1.1.2",
388
+ "has-proto": "^1.0.1",
389
+ "has-symbols": "^1.0.3",
390
+ "hasown": "^2.0.0"
391
+ },
392
+ "engines": {
393
+ "node": ">= 0.4"
394
+ },
395
+ "funding": {
396
+ "url": "https://github.com/sponsors/ljharb"
397
+ }
398
+ },
399
+ "node_modules/gopd": {
400
+ "version": "1.0.1",
401
+ "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
402
+ "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==",
403
+ "dependencies": {
404
+ "get-intrinsic": "^1.1.3"
405
+ },
406
+ "funding": {
407
+ "url": "https://github.com/sponsors/ljharb"
408
+ }
409
+ },
410
+ "node_modules/has-property-descriptors": {
411
+ "version": "1.0.2",
412
+ "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
413
+ "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
414
+ "dependencies": {
415
+ "es-define-property": "^1.0.0"
416
+ },
417
+ "funding": {
418
+ "url": "https://github.com/sponsors/ljharb"
419
+ }
420
+ },
421
+ "node_modules/has-proto": {
422
+ "version": "1.0.3",
423
+ "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz",
424
+ "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==",
425
+ "engines": {
426
+ "node": ">= 0.4"
427
+ },
428
+ "funding": {
429
+ "url": "https://github.com/sponsors/ljharb"
430
+ }
431
+ },
432
+ "node_modules/has-symbols": {
433
+ "version": "1.0.3",
434
+ "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz",
435
+ "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==",
436
+ "engines": {
437
+ "node": ">= 0.4"
438
+ },
439
+ "funding": {
440
+ "url": "https://github.com/sponsors/ljharb"
441
+ }
442
+ },
443
+ "node_modules/hasown": {
444
+ "version": "2.0.2",
445
+ "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz",
446
+ "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
447
+ "dependencies": {
448
+ "function-bind": "^1.1.2"
449
+ },
450
+ "engines": {
451
+ "node": ">= 0.4"
452
+ }
453
+ },
454
+ "node_modules/http-errors": {
455
+ "version": "2.0.0",
456
+ "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.0.tgz",
457
+ "integrity": "sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==",
458
+ "dependencies": {
459
+ "depd": "2.0.0",
460
+ "inherits": "2.0.4",
461
+ "setprototypeof": "1.2.0",
462
+ "statuses": "2.0.1",
463
+ "toidentifier": "1.0.1"
464
+ },
465
+ "engines": {
466
+ "node": ">= 0.8"
467
+ }
468
+ },
469
+ "node_modules/iconv-lite": {
470
+ "version": "0.4.24",
471
+ "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz",
472
+ "integrity": "sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA==",
473
+ "dependencies": {
474
+ "safer-buffer": ">= 2.1.2 < 3"
475
+ },
476
+ "engines": {
477
+ "node": ">=0.10.0"
478
+ }
479
+ },
480
+ "node_modules/inherits": {
481
+ "version": "2.0.4",
482
+ "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
483
+ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
484
+ },
485
+ "node_modules/ipaddr.js": {
486
+ "version": "1.9.1",
487
+ "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
488
+ "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==",
489
+ "engines": {
490
+ "node": ">= 0.10"
491
+ }
492
+ },
493
+ "node_modules/isarray": {
494
+ "version": "1.0.0",
495
+ "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz",
496
+ "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ=="
497
+ },
498
+ "node_modules/media-typer": {
499
+ "version": "0.3.0",
500
+ "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz",
501
+ "integrity": "sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==",
502
+ "engines": {
503
+ "node": ">= 0.6"
504
+ }
505
+ },
506
+ "node_modules/merge-descriptors": {
507
+ "version": "1.0.1",
508
+ "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
509
+ "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w=="
510
+ },
511
+ "node_modules/methods": {
512
+ "version": "1.1.2",
513
+ "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
514
+ "integrity": "sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==",
515
+ "engines": {
516
+ "node": ">= 0.6"
517
+ }
518
+ },
519
+ "node_modules/mime": {
520
+ "version": "1.6.0",
521
+ "resolved": "https://registry.npmjs.org/mime/-/mime-1.6.0.tgz",
522
+ "integrity": "sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==",
523
+ "bin": {
524
+ "mime": "cli.js"
525
+ },
526
+ "engines": {
527
+ "node": ">=4"
528
+ }
529
+ },
530
+ "node_modules/mime-db": {
531
+ "version": "1.52.0",
532
+ "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
533
+ "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
534
+ "engines": {
535
+ "node": ">= 0.6"
536
+ }
537
+ },
538
+ "node_modules/mime-types": {
539
+ "version": "2.1.35",
540
+ "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
541
+ "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
542
+ "dependencies": {
543
+ "mime-db": "1.52.0"
544
+ },
545
+ "engines": {
546
+ "node": ">= 0.6"
547
+ }
548
+ },
549
+ "node_modules/minimist": {
550
+ "version": "1.2.8",
551
+ "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
552
+ "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
553
+ "funding": {
554
+ "url": "https://github.com/sponsors/ljharb"
555
+ }
556
+ },
557
+ "node_modules/mkdirp": {
558
+ "version": "0.5.6",
559
+ "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-0.5.6.tgz",
560
+ "integrity": "sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==",
561
+ "dependencies": {
562
+ "minimist": "^1.2.6"
563
+ },
564
+ "bin": {
565
+ "mkdirp": "bin/cmd.js"
566
+ }
567
+ },
568
+ "node_modules/ms": {
569
+ "version": "2.0.0",
570
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.0.0.tgz",
571
+ "integrity": "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="
572
+ },
573
+ "node_modules/multer": {
574
+ "version": "1.4.5-lts.1",
575
+ "resolved": "https://registry.npmjs.org/multer/-/multer-1.4.5-lts.1.tgz",
576
+ "integrity": "sha512-ywPWvcDMeH+z9gQq5qYHCCy+ethsk4goepZ45GLD63fOu0YcNecQxi64nDs3qluZB+murG3/D4dJ7+dGctcCQQ==",
577
+ "dependencies": {
578
+ "append-field": "^1.0.0",
579
+ "busboy": "^1.0.0",
580
+ "concat-stream": "^1.5.2",
581
+ "mkdirp": "^0.5.4",
582
+ "object-assign": "^4.1.1",
583
+ "type-is": "^1.6.4",
584
+ "xtend": "^4.0.0"
585
+ },
586
+ "engines": {
587
+ "node": ">= 6.0.0"
588
+ }
589
+ },
590
+ "node_modules/negotiator": {
591
+ "version": "0.6.3",
592
+ "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz",
593
+ "integrity": "sha512-+EUsqGPLsM+j/zdChZjsnX51g4XrHFOIXwfnCVPGlQk/k5giakcKsuxCObBRu6DSm9opw/O6slWbJdghQM4bBg==",
594
+ "engines": {
595
+ "node": ">= 0.6"
596
+ }
597
+ },
598
+ "node_modules/node-fetch": {
599
+ "version": "2.7.0",
600
+ "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
601
+ "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
602
+ "dependencies": {
603
+ "whatwg-url": "^5.0.0"
604
+ },
605
+ "engines": {
606
+ "node": "4.x || >=6.0.0"
607
+ },
608
+ "peerDependencies": {
609
+ "encoding": "^0.1.0"
610
+ },
611
+ "peerDependenciesMeta": {
612
+ "encoding": {
613
+ "optional": true
614
+ }
615
+ }
616
+ },
617
+ "node_modules/object-assign": {
618
+ "version": "4.1.1",
619
+ "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz",
620
+ "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==",
621
+ "engines": {
622
+ "node": ">=0.10.0"
623
+ }
624
+ },
625
+ "node_modules/object-inspect": {
626
+ "version": "1.13.1",
627
+ "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
628
+ "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==",
629
+ "funding": {
630
+ "url": "https://github.com/sponsors/ljharb"
631
+ }
632
+ },
633
+ "node_modules/on-finished": {
634
+ "version": "2.4.1",
635
+ "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz",
636
+ "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==",
637
+ "dependencies": {
638
+ "ee-first": "1.1.1"
639
+ },
640
+ "engines": {
641
+ "node": ">= 0.8"
642
+ }
643
+ },
644
+ "node_modules/parseurl": {
645
+ "version": "1.3.3",
646
+ "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz",
647
+ "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==",
648
+ "engines": {
649
+ "node": ">= 0.8"
650
+ }
651
+ },
652
+ "node_modules/path-to-regexp": {
653
+ "version": "0.1.7",
654
+ "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
655
+ "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ=="
656
+ },
657
+ "node_modules/process-nextick-args": {
658
+ "version": "2.0.1",
659
+ "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
660
+ "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag=="
661
+ },
662
+ "node_modules/proxy-addr": {
663
+ "version": "2.0.7",
664
+ "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
665
+ "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==",
666
+ "dependencies": {
667
+ "forwarded": "0.2.0",
668
+ "ipaddr.js": "1.9.1"
669
+ },
670
+ "engines": {
671
+ "node": ">= 0.10"
672
+ }
673
+ },
674
+ "node_modules/qs": {
675
+ "version": "6.11.0",
676
+ "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
677
+ "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
678
+ "dependencies": {
679
+ "side-channel": "^1.0.4"
680
+ },
681
+ "engines": {
682
+ "node": ">=0.6"
683
+ },
684
+ "funding": {
685
+ "url": "https://github.com/sponsors/ljharb"
686
+ }
687
+ },
688
+ "node_modules/range-parser": {
689
+ "version": "1.2.1",
690
+ "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz",
691
+ "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==",
692
+ "engines": {
693
+ "node": ">= 0.6"
694
+ }
695
+ },
696
+ "node_modules/raw-body": {
697
+ "version": "2.5.2",
698
+ "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-2.5.2.tgz",
699
+ "integrity": "sha512-8zGqypfENjCIqGhgXToC8aB2r7YrBX+AQAfIPs/Mlk+BtPTztOvTS01NRW/3Eh60J+a48lt8qsCzirQ6loCVfA==",
700
+ "dependencies": {
701
+ "bytes": "3.1.2",
702
+ "http-errors": "2.0.0",
703
+ "iconv-lite": "0.4.24",
704
+ "unpipe": "1.0.0"
705
+ },
706
+ "engines": {
707
+ "node": ">= 0.8"
708
+ }
709
+ },
710
+ "node_modules/readable-stream": {
711
+ "version": "2.3.8",
712
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz",
713
+ "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==",
714
+ "dependencies": {
715
+ "core-util-is": "~1.0.0",
716
+ "inherits": "~2.0.3",
717
+ "isarray": "~1.0.0",
718
+ "process-nextick-args": "~2.0.0",
719
+ "safe-buffer": "~5.1.1",
720
+ "string_decoder": "~1.1.1",
721
+ "util-deprecate": "~1.0.1"
722
+ }
723
+ },
724
+ "node_modules/readable-stream/node_modules/safe-buffer": {
725
+ "version": "5.1.2",
726
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
727
+ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
728
+ },
729
+ "node_modules/safe-buffer": {
730
+ "version": "5.2.1",
731
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
732
+ "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
733
+ "funding": [
734
+ {
735
+ "type": "github",
736
+ "url": "https://github.com/sponsors/feross"
737
+ },
738
+ {
739
+ "type": "patreon",
740
+ "url": "https://www.patreon.com/feross"
741
+ },
742
+ {
743
+ "type": "consulting",
744
+ "url": "https://feross.org/support"
745
+ }
746
+ ]
747
+ },
748
+ "node_modules/safer-buffer": {
749
+ "version": "2.1.2",
750
+ "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
751
+ "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
752
+ },
753
+ "node_modules/send": {
754
+ "version": "0.18.0",
755
+ "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
756
+ "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
757
+ "dependencies": {
758
+ "debug": "2.6.9",
759
+ "depd": "2.0.0",
760
+ "destroy": "1.2.0",
761
+ "encodeurl": "~1.0.2",
762
+ "escape-html": "~1.0.3",
763
+ "etag": "~1.8.1",
764
+ "fresh": "0.5.2",
765
+ "http-errors": "2.0.0",
766
+ "mime": "1.6.0",
767
+ "ms": "2.1.3",
768
+ "on-finished": "2.4.1",
769
+ "range-parser": "~1.2.1",
770
+ "statuses": "2.0.1"
771
+ },
772
+ "engines": {
773
+ "node": ">= 0.8.0"
774
+ }
775
+ },
776
+ "node_modules/send/node_modules/ms": {
777
+ "version": "2.1.3",
778
+ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
779
+ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
780
+ },
781
+ "node_modules/serve-static": {
782
+ "version": "1.15.0",
783
+ "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
784
+ "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
785
+ "dependencies": {
786
+ "encodeurl": "~1.0.2",
787
+ "escape-html": "~1.0.3",
788
+ "parseurl": "~1.3.3",
789
+ "send": "0.18.0"
790
+ },
791
+ "engines": {
792
+ "node": ">= 0.8.0"
793
+ }
794
+ },
795
+ "node_modules/set-function-length": {
796
+ "version": "1.2.2",
797
+ "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
798
+ "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==",
799
+ "dependencies": {
800
+ "define-data-property": "^1.1.4",
801
+ "es-errors": "^1.3.0",
802
+ "function-bind": "^1.1.2",
803
+ "get-intrinsic": "^1.2.4",
804
+ "gopd": "^1.0.1",
805
+ "has-property-descriptors": "^1.0.2"
806
+ },
807
+ "engines": {
808
+ "node": ">= 0.4"
809
+ }
810
+ },
811
+ "node_modules/setprototypeof": {
812
+ "version": "1.2.0",
813
+ "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
814
+ "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw=="
815
+ },
816
+ "node_modules/side-channel": {
817
+ "version": "1.0.6",
818
+ "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz",
819
+ "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==",
820
+ "dependencies": {
821
+ "call-bind": "^1.0.7",
822
+ "es-errors": "^1.3.0",
823
+ "get-intrinsic": "^1.2.4",
824
+ "object-inspect": "^1.13.1"
825
+ },
826
+ "engines": {
827
+ "node": ">= 0.4"
828
+ },
829
+ "funding": {
830
+ "url": "https://github.com/sponsors/ljharb"
831
+ }
832
+ },
833
+ "node_modules/statuses": {
834
+ "version": "2.0.1",
835
+ "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.1.tgz",
836
+ "integrity": "sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==",
837
+ "engines": {
838
+ "node": ">= 0.8"
839
+ }
840
+ },
841
+ "node_modules/stream-parser": {
842
+ "version": "0.3.1",
843
+ "resolved": "https://registry.npmjs.org/stream-parser/-/stream-parser-0.3.1.tgz",
844
+ "integrity": "sha512-bJ/HgKq41nlKvlhccD5kaCr/P+Hu0wPNKPJOH7en+YrJu/9EgqUF+88w5Jb6KNcjOFMhfX4B2asfeAtIGuHObQ==",
845
+ "dependencies": {
846
+ "debug": "2"
847
+ }
848
+ },
849
+ "node_modules/streamsearch": {
850
+ "version": "1.1.0",
851
+ "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
852
+ "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
853
+ "engines": {
854
+ "node": ">=10.0.0"
855
+ }
856
+ },
857
+ "node_modules/string_decoder": {
858
+ "version": "1.1.1",
859
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz",
860
+ "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==",
861
+ "dependencies": {
862
+ "safe-buffer": "~5.1.0"
863
+ }
864
+ },
865
+ "node_modules/string_decoder/node_modules/safe-buffer": {
866
+ "version": "5.1.2",
867
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz",
868
+ "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g=="
869
+ },
870
+ "node_modules/toidentifier": {
871
+ "version": "1.0.1",
872
+ "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
873
+ "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
874
+ "engines": {
875
+ "node": ">=0.6"
876
+ }
877
+ },
878
+ "node_modules/tr46": {
879
+ "version": "0.0.3",
880
+ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
881
+ "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
882
+ },
883
+ "node_modules/type-is": {
884
+ "version": "1.6.18",
885
+ "resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
886
+ "integrity": "sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==",
887
+ "dependencies": {
888
+ "media-typer": "0.3.0",
889
+ "mime-types": "~2.1.24"
890
+ },
891
+ "engines": {
892
+ "node": ">= 0.6"
893
+ }
894
+ },
895
+ "node_modules/typedarray": {
896
+ "version": "0.0.6",
897
+ "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz",
898
+ "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA=="
899
+ },
900
+ "node_modules/unpipe": {
901
+ "version": "1.0.0",
902
+ "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
903
+ "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
904
+ "engines": {
905
+ "node": ">= 0.8"
906
+ }
907
+ },
908
+ "node_modules/util-deprecate": {
909
+ "version": "1.0.2",
910
+ "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
911
+ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
912
+ },
913
+ "node_modules/utils-merge": {
914
+ "version": "1.0.1",
915
+ "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
916
+ "integrity": "sha512-pMZTvIkT1d+TFGvDOqodOclx0QWkkgi6Tdoa8gC8ffGAAqz9pzPTZWAybbsHHoED/ztMtkv/VoYTYyShUn81hA==",
917
+ "engines": {
918
+ "node": ">= 0.4.0"
919
+ }
920
+ },
921
+ "node_modules/vary": {
922
+ "version": "1.1.2",
923
+ "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
924
+ "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==",
925
+ "engines": {
926
+ "node": ">= 0.8"
927
+ }
928
+ },
929
+ "node_modules/wav": {
930
+ "version": "1.0.2",
931
+ "resolved": "https://registry.npmjs.org/wav/-/wav-1.0.2.tgz",
932
+ "integrity": "sha512-viHtz3cDd/Tcr/HbNqzQCofKdF6kWUymH9LGDdskfWFoIy/HJ+RTihgjEcHfnsy1PO4e9B+y4HwgTwMrByquhg==",
933
+ "dependencies": {
934
+ "buffer-alloc": "^1.1.0",
935
+ "buffer-from": "^1.0.0",
936
+ "debug": "^2.2.0",
937
+ "readable-stream": "^1.1.14",
938
+ "stream-parser": "^0.3.1"
939
+ }
940
+ },
941
+ "node_modules/wav/node_modules/isarray": {
942
+ "version": "0.0.1",
943
+ "resolved": "https://registry.npmjs.org/isarray/-/isarray-0.0.1.tgz",
944
+ "integrity": "sha512-D2S+3GLxWH+uhrNEcoh/fnmYeP8E8/zHl644d/jdA0g2uyXvy3sb0qxotE+ne0LtccHknQzWwZEzhak7oJ0COQ=="
945
+ },
946
+ "node_modules/wav/node_modules/readable-stream": {
947
+ "version": "1.1.14",
948
+ "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz",
949
+ "integrity": "sha512-+MeVjFf4L44XUkhM1eYbD8fyEsxcV81pqMSR5gblfcLCHfZvbrqy4/qYHE+/R5HoBUT11WV5O08Cr1n3YXkWVQ==",
950
+ "dependencies": {
951
+ "core-util-is": "~1.0.0",
952
+ "inherits": "~2.0.1",
953
+ "isarray": "0.0.1",
954
+ "string_decoder": "~0.10.x"
955
+ }
956
+ },
957
+ "node_modules/wav/node_modules/string_decoder": {
958
+ "version": "0.10.31",
959
+ "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-0.10.31.tgz",
960
+ "integrity": "sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ=="
961
+ },
962
+ "node_modules/webidl-conversions": {
963
+ "version": "3.0.1",
964
+ "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
965
+ "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
966
+ },
967
+ "node_modules/whatwg-url": {
968
+ "version": "5.0.0",
969
+ "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
970
+ "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
971
+ "dependencies": {
972
+ "tr46": "~0.0.3",
973
+ "webidl-conversions": "^3.0.0"
974
+ }
975
+ },
976
+ "node_modules/xtend": {
977
+ "version": "4.0.2",
978
+ "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz",
979
+ "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==",
980
+ "engines": {
981
+ "node": ">=0.4"
982
+ }
983
+ }
984
+ }
985
+ }
app/package.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "audio-transcription",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "app.js",
6
+ "scripts": {
7
+ "test": "echo \"Error: no test specified\" && exit 1"
8
+ },
9
+ "keywords": [],
10
+ "author": "",
11
+ "license": "ISC",
12
+ "dependencies": {
13
+ "express": "^4.19.2",
14
+ "form-data": "^4.0.0",
15
+ "multer": "^1.4.5-lts.1",
16
+ "node-fetch": "^2.7.0",
17
+ "wav": "^1.0.2"
18
+ }
19
+ }
app/public/app.js ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const recordButton = document.getElementById('record');
2
+ const status = document.getElementById('status');
3
+ const transcriptionElement = document.getElementById('transcription');
4
+ const audioElement = document.getElementById('audio');
5
+ const translationElement = document.getElementById('translation');
6
+
7
+ let mediaRecorder;
8
+ let audioChunks = [];
9
+ let transcript = '';
10
+ let sentenceIndex = 0;
11
+
12
+ const recognition = new (window.SpeechRecognition || window.webkitSpeechRecognition)();
13
+ recognition.continuous = true;
14
+ recognition.interimResults = true;
15
+
16
+ recognition.onresult = (event) => {
17
+ let interimTranscript = '';
18
+ for (let i = event.resultIndex; i < event.results.length; ++i) {
19
+ if (event.results[i].isFinal) {
20
+ transcript += event.results[i][0].transcript + ' ';
21
+ saveAudioAndTranscription(event.results[i][0].transcript, sentenceIndex++);
22
+ } else {
23
+ interimTranscript += event.results[i][0].transcript;
24
+ }
25
+ }
26
+ transcriptionElement.innerHTML = transcript + '<i style="color:red;">' + interimTranscript + '</i>';
27
+ };
28
+
29
+ recognition.onerror = (event) => {
30
+ console.error(event.error);
31
+ };
32
+
33
+ recordButton.onmousedown = async () => {
34
+ status.textContent = "Recording...";
35
+ transcript = '';
36
+ sentenceIndex = 0;
37
+
38
+ // Start speech recognition
39
+ recognition.start();
40
+
41
+ // Start audio recording
42
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
43
+ mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });
44
+ mediaRecorder.start();
45
+
46
+ mediaRecorder.ondataavailable = (event) => {
47
+ audioChunks.push(event.data);
48
+ };
49
+ };
50
+
51
+ recordButton.onmouseup = () => {
52
+ status.textContent = "Recording stopped";
53
+
54
+ // Stop speech recognition and audio recording
55
+ recognition.stop();
56
+ mediaRecorder.stop();
57
+
58
+ // Process the recorded audio
59
+ saveAudioAndTranscription(transcript, sentenceIndex);
60
+ };
61
+
62
+ async function saveAudioAndTranscription(sentence, index) {
63
+ mediaRecorder.stop();
64
+ mediaRecorder.onstop = async () => {
65
+ const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
66
+ const arrayBuffer = await audioBlob.arrayBuffer();
67
+ const audioBuffer = new Uint8Array(arrayBuffer);
68
+
69
+ const formData = new FormData();
70
+ formData.append('audio', new Blob([audioBuffer], { type: 'application/octet-stream' }));
71
+ formData.append('transcript', sentence);
72
+ formData.append('sampleRate', mediaRecorder.stream.getAudioTracks()[0].getSettings().sampleRate);
73
+ formData.append('numberOfChannels', 1); // Assuming mono audio
74
+
75
+ try {
76
+ const response = await fetch('/save-audio', {
77
+ method: 'POST',
78
+ body: formData
79
+ });
80
+
81
+ if (response.ok) {
82
+ const result = await response.json();
83
+ console.log(`Saved sentence ${index}`);
84
+
85
+ // Show translation and play audio
86
+ translationElement.textContent = result.translation;
87
+ audioElement.src = `http://localhost:8000/download-audio?file_path=${result.audio_path}`;
88
+ audioElement.play();
89
+ } else {
90
+ console.error('Failed to save the file.');
91
+ }
92
+ } catch (error) {
93
+ console.error('Error saving audio and transcription:', error);
94
+ }
95
+
96
+ audioChunks = [];
97
+ mediaRecorder.start();
98
+ };
99
+ }
app/public/index.html ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Audio Recording and Translation</title>
7
+ <link rel="stylesheet" href="styles.css">
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.4/css/all.min.css">
9
+ <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Roboto:wght@400;500;700&display=swap">
10
+ </head>
11
+ <body>
12
+ <div class="container">
13
+ <header>
14
+ <h1>Seamless Speech-to-Speech Translation with Voice Replication (S3TVR)</h1>
15
+ <p class="description">S3TVR is an advanced AI cascaded framework designed for real-time speech-to-speech translation while maintaining the speaker's voice characteristics in a zero-shot fashion. This project balances latency and output quality, focusing on English and Spanish languages, and involves multiple open-source models and algorithms. The system is optimized for local execution, allowing for dynamic and efficient voice translation with an average latency of ~3 seconds per sentence. For the optimized model, check the Github Repo bellow.</p>
16
+ <p class="description">NOTE: The local excution is streamed and fully optimized(unlike this Demo)</p>
17
+ <div class="links">
18
+ <a href="https://github.com/yalsaffar/S3TVR" target="_blank"><i class="fab fa-github"></i></a>
19
+ <a href="https://yousifalsaffar.com/" target="_blank"><i class="fas fa-globe"></i></a>
20
+ <a href="https://www.linkedin.com/in/yousif-alsaffar-7621b5142/" target="_blank"><i class="fab fa-linkedin"></i></a>
21
+ <a href="https://huggingface.co/yalsaffar" target="_blank"><i class="fas fa-robot"></i></a>
22
+ </div>
23
+ </header>
24
+ <div class="circle-button" id="record">
25
+ <i class="fas fa-microphone"></i>
26
+ </div>
27
+ <p id="label">Press and Hold till the sentence is not RED</p>
28
+ <p id="status"> </p>
29
+ <div id="transcription" class="text-output"></div>
30
+ <div id="translation" class="text-output"></div>
31
+ <audio id="audio" controls></audio>
32
+ </div>
33
+ <script src="app.js"></script>
34
+ </body>
35
+ </html>
app/public/styles.css ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ font-family: 'Roboto', sans-serif;
3
+ display: flex;
4
+ justify-content: center;
5
+ align-items: center;
6
+ height: 100vh;
7
+ background-color: #f5f5f5;
8
+ margin: 0;
9
+ padding: 20px;
10
+ box-sizing: border-box;
11
+ }
12
+
13
+ .container {
14
+ text-align: center;
15
+ max-width: 800px;
16
+ width: 100%;
17
+ }
18
+
19
+ header {
20
+ margin-bottom: 20px;
21
+ }
22
+
23
+ header h1 {
24
+ font-size: 2em;
25
+ font-weight: 700;
26
+ margin-bottom: 10px;
27
+ }
28
+
29
+ header .description {
30
+ font-size: 1.1em;
31
+ font-weight: 400;
32
+ color: #555;
33
+ margin-bottom: 20px;
34
+ line-height: 1.6;
35
+ }
36
+
37
+ .links {
38
+ display: flex;
39
+ justify-content: center;
40
+ gap: 20px;
41
+ margin-bottom: 20px;
42
+ }
43
+
44
+ .links a {
45
+ color: #333;
46
+ font-size: 1.5em;
47
+ transition: color 0.3s;
48
+ }
49
+
50
+ .links a:hover {
51
+ color: #ff4757;
52
+ }
53
+
54
+ .circle-button {
55
+ width: 100px;
56
+ height: 100px;
57
+ background-color: #ff4757;
58
+ border-radius: 50%;
59
+ display: flex;
60
+ justify-content: center;
61
+ align-items: center;
62
+ cursor: pointer;
63
+ margin: 20px auto;
64
+ transition: background-color 0.3s ease;
65
+ }
66
+
67
+ .circle-button:hover {
68
+ background-color: #ff6b81;
69
+ }
70
+
71
+ .circle-button:active {
72
+ background-color: #34c759;
73
+ }
74
+
75
+ .circle-button i {
76
+ color: white;
77
+ font-size: 2em;
78
+ }
79
+
80
+ .text-output {
81
+ background-color: white;
82
+ border-radius: 5px;
83
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
84
+ padding: 15px;
85
+ margin: 10px auto;
86
+ width: 80%;
87
+ max-width: 500px;
88
+ text-align: left;
89
+ font-size: 1em;
90
+ line-height: 1.5;
91
+ }
92
+
93
+ #status {
94
+ font-weight: bold;
95
+ margin-top: 10px;
96
+ }
app/server.js ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const multer = require('multer');
3
+ const path = require('path');
4
+ const fs = require('fs');
5
+ const { exec } = require('child_process');
6
+ const fetch = require('node-fetch');
7
+ const FormData = require('form-data');
8
+
9
+ const app = express();
10
+ const port = 3000;
11
+
12
+ const uploadsDir = path.join(__dirname, 'uploads');
13
+ if (!fs.existsSync(uploadsDir)) {
14
+ fs.mkdirSync(uploadsDir);
15
+ }
16
+
17
+ const storage = multer.memoryStorage();
18
+ const upload = multer({ storage: storage });
19
+
20
+ app.use(express.static(path.join(__dirname, 'public')));
21
+ app.use(express.json());
22
+
23
+ const getNextFolderNumber = () => {
24
+ const folders = fs.readdirSync(uploadsDir).filter(file => fs.statSync(path.join(uploadsDir, file)).isDirectory());
25
+ const folderNumbers = folders.map(folder => parseInt(folder)).filter(num => !isNaN(num));
26
+ return folderNumbers.length > 0 ? Math.max(...folderNumbers) + 1 : 1;
27
+ };
28
+
29
+ let sentenceIndex = 0;
30
+ let audioPaths = [];
31
+
32
+ app.post('/save-audio', upload.single('audio'), async (req, res) => {
33
+ const nextFolderNumber = getNextFolderNumber();
34
+ const folderPath = path.join(uploadsDir, nextFolderNumber.toString());
35
+ if (!fs.existsSync(folderPath)) {
36
+ fs.mkdirSync(folderPath, { recursive: true });
37
+ }
38
+
39
+ const rawAudioPath = path.join(folderPath, `audio_${sentenceIndex}.webm`);
40
+ const wavAudioPath = path.join(folderPath, `audio_${sentenceIndex}.wav`);
41
+ const transcriptionPath = path.join(folderPath, `transcription_${sentenceIndex}.txt`);
42
+
43
+ fs.writeFileSync(rawAudioPath, req.file.buffer);
44
+
45
+ fs.writeFileSync(transcriptionPath, req.body.transcript);
46
+
47
+ const ffmpegCommand = `ffmpeg -i ${rawAudioPath} -ar 44100 -ac 1 ${wavAudioPath}`;
48
+ exec(ffmpegCommand, async (error, stdout, stderr) => {
49
+ if (error) {
50
+ console.error(`Error converting audio to WAV: ${stderr}`);
51
+ return res.status(500).send('Error converting audio to WAV');
52
+ }
53
+
54
+ fs.unlinkSync(rawAudioPath);
55
+
56
+ const formData = new FormData();
57
+ formData.append('original_path', fs.createReadStream(wavAudioPath));
58
+ formData.append('text', req.body.transcript);
59
+ formData.append('lang', 'en');
60
+ formData.append('target_lang', 'es');
61
+
62
+ try {
63
+ const response = await fetch('http://localhost:8000/process-audio/', {
64
+ method: 'POST',
65
+ body: formData,
66
+ headers: formData.getHeaders()
67
+ });
68
+
69
+ if (response.ok) {
70
+ const result = await response.json();
71
+ console.log(result);
72
+ audioPaths.push(result.audio_path);
73
+ sentenceIndex++;
74
+ res.status(200).json({ audio_path: result.audio_path, translation: result.translation });
75
+ } else {
76
+ console.error('Failed to process the file via FastAPI');
77
+ res.status(500).send('Failed to process the file via FastAPI');
78
+ }
79
+ } catch (error) {
80
+ console.error('Error calling FastAPI:', error);
81
+ res.status(500).send('Error calling FastAPI');
82
+ }
83
+ });
84
+ });
85
+
86
+ app.get('/concatenate-audio', (req, res) => {
87
+ const folderPath = path.join(uploadsDir, getNextFolderNumber().toString());
88
+ const finalAudioPath = path.join(folderPath, 'final_audio.wav');
89
+ const concatCommand = `ffmpeg -y -i "concat:${audioPaths.join('|')}" -acodec copy ${finalAudioPath}`;
90
+ exec(concatCommand, (concatError, concatStdout, concatStderr) => {
91
+ if (concatError) {
92
+ console.error(`Error concatenating audio files: ${concatStderr}`);
93
+ return res.status(500).send('Error concatenating audio files');
94
+ }
95
+
96
+ res.status(200).json({ audio_path: finalAudioPath });
97
+ });
98
+ });
99
+
100
+ app.listen(port, () => {
101
+ console.log(`Server running at http://localhost:${port}`);
102
+ });
app/temp_wav_files/audio-1718725396714.wav ADDED
Binary file (278 kB). View file
 
app/uploads/1/audio_2.wav ADDED
Binary file (307 kB). View file
 
app/uploads/1/transcription_2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ okay now we're still actually works
audio_segments/readme ADDED
File without changes
inference_functions.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import torch
3
+ import torchaudio
4
+ import noisereduce as nr
5
+ import numpy as np
6
+ from models.nllb import nllb_translate
7
+
8
+ def translate(model_nllb, tokenizer_nllb, text, target_lang):
9
+ print("Processing translation...")
10
+ start_time = time.time()
11
+ translation = nllb_translate(model_nllb, tokenizer_nllb, text, target_lang)
12
+ print("Translation:", translation)
13
+ print("Translation time:", time.time() - start_time)
14
+ return translation
15
+
16
+ def just_inference(model, original_path, output_dir, text, lang):
17
+ print("Inference...")
18
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
+ model.to(device)
20
+ path_to_save = output_dir
21
+ t0 = time.time()
22
+
23
+ try:
24
+ # Load the audio
25
+ print("Loading audio...")
26
+ wav, sr = torchaudio.load(original_path)
27
+ print(f"Loaded audio with sample rate: {sr}")
28
+
29
+ wav = wav.squeeze().numpy()
30
+ print(f"Audio shape after squeezing: {wav.shape}")
31
+
32
+ # Apply noise reduction
33
+ print("Applying noise reduction...")
34
+ reduced_noise_audio = nr.reduce_noise(y=wav, sr=sr)
35
+ reduced_noise_audio = torch.tensor(reduced_noise_audio).unsqueeze(0)
36
+ print(f"Reduced noise audio shape: {reduced_noise_audio.shape}")
37
+
38
+ # Move the reduced noise audio to the correct device
39
+ reduced_noise_audio = reduced_noise_audio.to(device)
40
+
41
+ print("Getting conditioning latents...")
42
+ gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[original_path])
43
+ print("Got conditioning latents.")
44
+
45
+ print("Starting inference stream...")
46
+ chunks = model.inference_stream(
47
+ text,
48
+ lang,
49
+ gpt_cond_latent,
50
+ speaker_embedding,
51
+ stream_chunk_size=15,
52
+ speed=0.95
53
+ )
54
+ print("Inference stream started.")
55
+
56
+ full_audio = torch.Tensor().to(device)
57
+ for i, chunk in enumerate(chunks):
58
+ try:
59
+ if i == 1:
60
+ time_to_first_chunk = time.time() - t0
61
+ print(f"Time to first chunk: {time_to_first_chunk}")
62
+ full_audio = torch.cat((full_audio, chunk.squeeze().to(device)), dim=-1)
63
+ print(f"Processed chunk {i}, chunk shape: {chunk.shape}")
64
+ except Exception as e:
65
+ print(f"Error processing chunk {i}: {e}")
66
+ raise
67
+
68
+ # Move full_audio to CPU before saving
69
+ full_audio = full_audio.cpu()
70
+
71
+ print(f"Saving full audio to {path_to_save}...")
72
+ torchaudio.save(path_to_save, full_audio.unsqueeze(0), 24000)
73
+ print("Audio saved.")
74
+
75
+ print("Inference finished")
76
+ return full_audio
77
+
78
+ except Exception as e:
79
+ print(f"Error during processing: {e}")
80
+ raise
load_models.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models.nllb import nllb
2
+ #from models.parakeet import parakeet_ctc_model
3
+ model_nllb, tokenizer_nllb = nllb()
4
+
5
+ from models.TTS_utils import load_manual_xtts_v2
6
+
7
+
8
+ config_path = "test/config.json"
9
+ model_path = "test"
10
+
11
+ xtts_v2_model = load_manual_xtts_v2(config_path, model_path)
12
+
13
+
14
+ def get_nllb_model_and_tokenizer():
15
+ return model_nllb, tokenizer_nllb
16
+
17
+ def get_xtts_model():
18
+ return xtts_v2_model
main.ipynb ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "c:\\Users\\spn\\anaconda3\\envs\\capstone\\Lib\\site-packages\\torchvision\\io\\image.py:13: UserWarning: Failed to load image Python extension: '[WinError 127] The specified procedure could not be found'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n",
13
+ " warn(\n"
14
+ ]
15
+ },
16
+ {
17
+ "name": "stdout",
18
+ "output_type": "stream",
19
+ "text": [
20
+ "[2024-06-10 23:30:49,190] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
21
+ ]
22
+ },
23
+ {
24
+ "name": "stderr",
25
+ "output_type": "stream",
26
+ "text": [
27
+ "[2024-06-10 23:30:49,544] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.\n",
28
+ "[NeMo W 2024-06-10 23:30:52 nemo_logging:393] Could not import NeMo NLP collection which is required for speech translation model.\n"
29
+ ]
30
+ },
31
+ {
32
+ "name": "stdout",
33
+ "output_type": "stream",
34
+ "text": [
35
+ "[NeMo I 2024-06-10 23:31:08 nemo_logging:381] Tokenizer SentencePieceTokenizer initialized with 1024 tokens\n"
36
+ ]
37
+ },
38
+ {
39
+ "name": "stderr",
40
+ "output_type": "stream",
41
+ "text": [
42
+ "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
43
+ " Train config : \n",
44
+ " manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-train-all.json\n",
45
+ " sample_rate: 16000\n",
46
+ " batch_size: 16\n",
47
+ " shuffle: true\n",
48
+ " num_workers: 8\n",
49
+ " pin_memory: true\n",
50
+ " use_start_end_token: false\n",
51
+ " trim_silence: false\n",
52
+ " max_duration: 16.7\n",
53
+ " min_duration: 0.1\n",
54
+ " is_tarred: false\n",
55
+ " tarred_audio_filepaths: null\n",
56
+ " shuffle_n: 2048\n",
57
+ " bucketing_strategy: fully_randomized\n",
58
+ " bucketing_batch_size: null\n",
59
+ " \n",
60
+ "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n",
61
+ " Validation config : \n",
62
+ " manifest_filepath: /disk1/NVIDIA/datasets/LibriSpeech_NeMo/librivox-dev-clean.json\n",
63
+ " sample_rate: 16000\n",
64
+ " batch_size: 16\n",
65
+ " shuffle: false\n",
66
+ " use_start_end_token: false\n",
67
+ " num_workers: 8\n",
68
+ " pin_memory: true\n",
69
+ " \n",
70
+ "[NeMo W 2024-06-10 23:31:08 nemo_logging:393] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n",
71
+ " Test config : \n",
72
+ " manifest_filepath: null\n",
73
+ " sample_rate: 16000\n",
74
+ " batch_size: 16\n",
75
+ " shuffle: false\n",
76
+ " use_start_end_token: false\n",
77
+ " num_workers: 8\n",
78
+ " pin_memory: true\n",
79
+ " \n"
80
+ ]
81
+ },
82
+ {
83
+ "name": "stdout",
84
+ "output_type": "stream",
85
+ "text": [
86
+ "[NeMo I 2024-06-10 23:31:08 nemo_logging:381] PADDING: 0\n"
87
+ ]
88
+ },
89
+ {
90
+ "name": "stderr",
91
+ "output_type": "stream",
92
+ "text": [
93
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n",
94
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n",
95
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n",
96
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n",
97
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `method_cfg` is deprecated and will be removed in the future. Please use `measure_cfg` instead.\n",
98
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `measure_cfg` with the value of `method_cfg`.\n",
99
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] `temperature` is deprecated and will be removed in the future. Please use `alpha` instead.\n",
100
+ "[NeMo W 2024-06-10 23:31:11 nemo_logging:393] Re-writing `alpha` with the value of `temperature`.\n"
101
+ ]
102
+ },
103
+ {
104
+ "name": "stdout",
105
+ "output_type": "stream",
106
+ "text": [
107
+ "[NeMo I 2024-06-10 23:31:16 nemo_logging:381] Model EncDecCTCModelBPE was successfully restored from C:\\Users\\spn\\.cache\\huggingface\\hub\\models--nvidia--parakeet-ctc-0.6b\\snapshots\\097ffc5b027beabc73acb627def2d1d278e774e9\\parakeet-ctc-0.6b.nemo.\n"
108
+ ]
109
+ }
110
+ ],
111
+ "source": [
112
+ "from models.nllb import nllb\n",
113
+ "#from models.TTS_utils import xtts_v2\n",
114
+ "from models.parakeet import parakeet_ctc_model\n",
115
+ "from models.es_fastconformer import stt_es_model\n",
116
+ "model_nllb, tokinizer_nllb = nllb()\n",
117
+ "#xtts_v2_model = xtts_v2()\n",
118
+ "parakeet = parakeet_ctc_model()\n",
119
+ "#sst = stt_es_model()"
120
+ ]
121
+ },
122
+ {
123
+ "cell_type": "code",
124
+ "execution_count": 2,
125
+ "metadata": {},
126
+ "outputs": [
127
+ {
128
+ "name": "stdout",
129
+ "output_type": "stream",
130
+ "text": [
131
+ "Writing audio_segments\\segment_0.wav...\n",
132
+ "Processing segment...\n",
133
+ "0.021454915\n",
134
+ "Noise reduction done!\n",
135
+ "Noise removed. Time: 0.06042814254760742\n"
136
+ ]
137
+ },
138
+ {
139
+ "data": {
140
+ "application/vnd.jupyter.widget-view+json": {
141
+ "model_id": "6909654da05f4b0a88458139a9b37d6d",
142
+ "version_major": 2,
143
+ "version_minor": 0
144
+ },
145
+ "text/plain": [
146
+ "Transcribing: 0%| | 0/1 [00:00<?, ?it/s]"
147
+ ]
148
+ },
149
+ "metadata": {},
150
+ "output_type": "display_data"
151
+ },
152
+ {
153
+ "name": "stdout",
154
+ "output_type": "stream",
155
+ "text": [
156
+ "Transcription: hello can you hear me\n",
157
+ "Transcription time: 1.3255603313446045\n",
158
+ "Translating...\n",
159
+ "Processing translation...\n",
160
+ "Translation: Hola, ¿ me escuchas?\n",
161
+ "Translation time: 0.932790994644165\n",
162
+ "Writing audio_segments\\segment_1.wav...\n",
163
+ "Processing segment...\n",
164
+ "0.010297036\n",
165
+ "No speech detected.\n",
166
+ "Writing audio_segments\\segment_2.wav...\n",
167
+ "Processing segment...\n",
168
+ "0.006772096\n",
169
+ "No speech detected.\n",
170
+ "Writing audio_segments\\segment_3.wav...\n",
171
+ "Processing segment...\n",
172
+ "0.0034770737\n",
173
+ "No speech detected.\n",
174
+ "Writing audio_segments\\segment_4.wav...\n",
175
+ "Processing segment...\n",
176
+ "0.0039069764\n",
177
+ "No speech detected.\n",
178
+ "Writing audio_segments\\segment_5.wav...\n",
179
+ "Processing segment...\n",
180
+ "0.0046523036\n",
181
+ "No speech detected.\n",
182
+ "Writing audio_segments\\segment_6.wav...\n",
183
+ "Processing segment...\n",
184
+ "0.0040206155\n",
185
+ "No speech detected.\n",
186
+ "Writing audio_segments\\segment_7.wav...\n",
187
+ "Processing segment...\n",
188
+ "0.0043495107\n",
189
+ "No speech detected.\n",
190
+ "Writing audio_segments\\segment_8.wav...\n",
191
+ "Processing segment...\n",
192
+ "0.00421352\n",
193
+ "No speech detected.\n",
194
+ "Writing audio_segments\\segment_9.wav...\n",
195
+ "Processing segment...\n",
196
+ "0.0040656724\n",
197
+ "No speech detected.\n",
198
+ "Writing audio_segments\\segment_10.wav...\n",
199
+ "Processing segment...\n",
200
+ "0.0042125704\n",
201
+ "No speech detected.\n",
202
+ "Writing audio_segments\\segment_11.wav...\n",
203
+ "Processing segment...\n",
204
+ "0.015398192\n",
205
+ "Noise reduction done!\n",
206
+ "Noise removed. Time: 0.020929336547851562\n"
207
+ ]
208
+ },
209
+ {
210
+ "data": {
211
+ "application/vnd.jupyter.widget-view+json": {
212
+ "model_id": "de3d4b3a7bc14de2afbb01ff82252dc2",
213
+ "version_major": 2,
214
+ "version_minor": 0
215
+ },
216
+ "text/plain": [
217
+ "Transcribing: 0%| | 0/1 [00:00<?, ?it/s]"
218
+ ]
219
+ },
220
+ "metadata": {},
221
+ "output_type": "display_data"
222
+ }
223
+ ],
224
+ "source": [
225
+ "from stream_VAD import stream\n",
226
+ "stream(parakeet, model_nllb, tokinizer_nllb, \"english\", \"spanish\", 'record_temp.json', 'record_per.json')"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": 4,
232
+ "metadata": {},
233
+ "outputs": [
234
+ {
235
+ "data": {
236
+ "application/vnd.jupyter.widget-view+json": {
237
+ "model_id": "fdc0440dfcaf4c9f814689fc47c10e3e",
238
+ "version_major": 2,
239
+ "version_minor": 0
240
+ },
241
+ "text/plain": [
242
+ "(…)tt_es_fastconformer_hybrid_large_pc.nemo: 0%| | 0.00/459M [00:00<?, ?B/s]"
243
+ ]
244
+ },
245
+ "metadata": {},
246
+ "output_type": "display_data"
247
+ },
248
+ {
249
+ "name": "stdout",
250
+ "output_type": "stream",
251
+ "text": [
252
+ "[NeMo I 2024-04-12 16:10:09 nemo_logging:381] Tokenizer SentencePieceTokenizer initialized with 1024 tokens\n"
253
+ ]
254
+ },
255
+ {
256
+ "name": "stderr",
257
+ "output_type": "stream",
258
+ "text": [
259
+ "[NeMo W 2024-04-12 16:10:10 nemo_logging:393] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.\n",
260
+ " Train config : \n",
261
+ " manifest_filepath: null\n",
262
+ " sample_rate: 16000\n",
263
+ " batch_size: 16\n",
264
+ " shuffle: true\n",
265
+ " num_workers: 8\n",
266
+ " pin_memory: true\n",
267
+ " use_start_end_token: false\n",
268
+ " trim_silence: false\n",
269
+ " max_duration: 20\n",
270
+ " min_duration: 0.1\n",
271
+ " is_tarred: false\n",
272
+ " tarred_audio_filepaths: null\n",
273
+ " shuffle_n: 2048\n",
274
+ " bucketing_strategy: fully_randomized\n",
275
+ " bucketing_batch_size: null\n",
276
+ " is_concat: false\n",
277
+ " concat_sampling_technique: random\n",
278
+ " concat_sampling_probabilities: ''\n",
279
+ " \n",
280
+ "[NeMo W 2024-04-12 16:10:10 nemo_logging:393] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). \n",
281
+ " Validation config : \n",
282
+ " manifest_filepath: null\n",
283
+ " sample_rate: 16000\n",
284
+ " batch_size: 32\n",
285
+ " shuffle: false\n",
286
+ " num_workers: 8\n",
287
+ " pin_memory: true\n",
288
+ " use_start_end_token: false\n",
289
+ " is_concat: true\n",
290
+ " concat_sampling_technique: random\n",
291
+ " concat_sampling_probabilities:\n",
292
+ " - 0.099\n",
293
+ " - 0.2771\n",
294
+ " - 0.5482\n",
295
+ " - 0.0757\n",
296
+ " concat_shuffle: false\n",
297
+ " concat_sampling_seed: 1234\n",
298
+ " max_duration: 20\n",
299
+ " \n",
300
+ "[NeMo W 2024-04-12 16:10:10 nemo_logging:393] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).\n",
301
+ " Test config : \n",
302
+ " manifest_filepath: null\n",
303
+ " sample_rate: 16000\n",
304
+ " batch_size: 16\n",
305
+ " shuffle: false\n",
306
+ " num_workers: 8\n",
307
+ " pin_memory: true\n",
308
+ " use_start_end_token: false\n",
309
+ " \n"
310
+ ]
311
+ },
312
+ {
313
+ "name": "stdout",
314
+ "output_type": "stream",
315
+ "text": [
316
+ "[NeMo I 2024-04-12 16:10:10 nemo_logging:381] PADDING: 0\n"
317
+ ]
318
+ },
319
+ {
320
+ "name": "stderr",
321
+ "output_type": "stream",
322
+ "text": [
323
+ "[NeMo W 2024-04-12 16:10:11 nemo_logging:393] c:\\Users\\spn\\anaconda3\\envs\\capstone\\Lib\\site-packages\\torch\\nn\\modules\\rnn.py:83: UserWarning: dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2 and num_layers=1\n",
324
+ " warnings.warn(\"dropout option adds dropout after all but last \"\n",
325
+ " \n"
326
+ ]
327
+ },
328
+ {
329
+ "name": "stdout",
330
+ "output_type": "stream",
331
+ "text": [
332
+ "[NeMo I 2024-04-12 16:10:11 nemo_logging:381] Using RNNT Loss : warprnnt_numba\n",
333
+ " Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}\n",
334
+ "[NeMo I 2024-04-12 16:10:12 nemo_logging:381] Model EncDecHybridRNNTCTCBPEModel was successfully restored from C:\\Users\\spn\\.cache\\huggingface\\hub\\models--nvidia--stt_es_fastconformer_hybrid_large_pc\\snapshots\\65f775445d5947d6784c3e80d9a14d859571947f\\stt_es_fastconformer_hybrid_large_pc.nemo.\n"
335
+ ]
336
+ }
337
+ ],
338
+ "source": [
339
+ "from models.es_fastconformer import stt_es_model\n",
340
+ "model = stt_es_model()\n",
341
+ "# check how much memory is used by the model\n",
342
+ "import torch\n",
343
+ "import psutil\n",
344
+ "import os\n",
345
+ "import time\n",
346
+ "\n"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 5,
352
+ "metadata": {},
353
+ "outputs": [
354
+ {
355
+ "name": "stdout",
356
+ "output_type": "stream",
357
+ "text": [
358
+ "Model size: 458.86 MB\n"
359
+ ]
360
+ }
361
+ ],
362
+ "source": [
363
+ "# get the size of the model in term of memory in MB\n",
364
+ "def get_size(model):\n",
365
+ " torch.save(model.state_dict(), 'temp.p')\n",
366
+ " size = os.path.getsize('temp.p') / 1e6\n",
367
+ " os.remove('temp.p')\n",
368
+ " return size\n",
369
+ "size = get_size(model)\n",
370
+ "print(f\"Model size: {size:.2f} MB\")"
371
+ ]
372
+ }
373
+ ],
374
+ "metadata": {
375
+ "kernelspec": {
376
+ "display_name": "capstone",
377
+ "language": "python",
378
+ "name": "python3"
379
+ },
380
+ "language_info": {
381
+ "codemirror_mode": {
382
+ "name": "ipython",
383
+ "version": 3
384
+ },
385
+ "file_extension": ".py",
386
+ "mimetype": "text/x-python",
387
+ "name": "python",
388
+ "nbconvert_exporter": "python",
389
+ "pygments_lexer": "ipython3",
390
+ "version": "3.11.7"
391
+ }
392
+ },
393
+ "nbformat": 4,
394
+ "nbformat_minor": 2
395
+ }
main.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
3
+ import av
4
+ import numpy as np
5
+ import pydub
6
+ from io import BytesIO
7
+ from models.nllb import nllb
8
+ from models.parakeet import parakeet_ctc_model
9
+ from stream_VAD import stream
10
+ from models.es_fastconformer import stt_es_model
11
+
12
+ RTC_CONFIGURATION = RTCConfiguration({"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]})
13
+
14
+ # Load models once
15
+ model_nllb, tokenizer_nllb = nllb()
16
+ parakeet = parakeet_ctc_model()
17
+ stt_model = stt_es_model()
18
+
19
+ def process_audio(audio_chunk, language):
20
+ # Convert audio chunk to pydub.AudioSegment
21
+ audio_segment = pydub.AudioSegment(
22
+ data=audio_chunk.tobytes(),
23
+ sample_width=audio_chunk.format.sample_width,
24
+ frame_rate=audio_chunk.sample_rate,
25
+ channels=len(audio_chunk.layout.channels)
26
+ )
27
+
28
+ # Process audio based on selected language
29
+ if language == "en":
30
+ processed_audio = stream(parakeet, model_nllb, tokenizer_nllb, "english", "spanish", audio_segment)
31
+ elif language == "es":
32
+ processed_audio = stream(stt_model, model_nllb, tokenizer_nllb, "spanish", "english", audio_segment)
33
+ else:
34
+ return audio_chunk
35
+
36
+ # Convert processed audio back to numpy array
37
+ processed_audio_np = np.array(processed_audio.get_array_of_samples())
38
+
39
+ return processed_audio.frame_rate, processed_audio_np
40
+
41
+ def audio_callback(frame: av.AudioFrame, language):
42
+ audio_data = frame.to_ndarray()
43
+ audio_chunk = av.AudioFrame.from_ndarray(audio_data, format="s16", layout="mono")
44
+ return process_audio(audio_chunk, language)
45
+
46
+ st.title("Real-Time Audio Processing")
47
+
48
+ language = st.radio("Select Language", ["en", "es"], index=0)
49
+
50
+ webrtc_ctx = webrtc_streamer(
51
+ key="audio",
52
+ mode=WebRtcMode.SENDRECV,
53
+ rtc_configuration=RTC_CONFIGURATION,
54
+ media_stream_constraints={"audio": True, "video": False},
55
+ audio_receiver_size=256,
56
+ async_processing=True,
57
+ )
58
+
59
+ if webrtc_ctx.audio_receiver:
60
+ webrtc_ctx.audio_receiver.on("data", lambda frame: audio_callback(frame, language))
61
+
62
+ if "audio_buffer" not in st.session_state:
63
+ st.session_state["audio_buffer"] = BytesIO()
64
+
65
+ if webrtc_ctx.audio_receiver:
66
+ audio_frames = webrtc_ctx.audio_receiver.get_frames()
67
+
68
+ for frame in audio_frames:
69
+ processed_audio_rate, processed_audio_np = audio_callback(frame, language)
70
+
71
+ audio_segment = pydub.AudioSegment(
72
+ data=processed_audio_np.tobytes(),
73
+ sample_width=processed_audio_np.dtype.itemsize,
74
+ frame_rate=processed_audio_rate,
75
+ channels=1
76
+ )
77
+ st.session_state["audio_buffer"].write(audio_segment.export(format="wav").read())
78
+
79
+ st.audio(st.session_state["audio_buffer"].getvalue(), format="audio/wav")
main_stream.ipynb ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "[2024-06-25 20:01:43,998] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
13
+ ]
14
+ },
15
+ {
16
+ "name": "stderr",
17
+ "output_type": "stream",
18
+ "text": [
19
+ "[2024-06-25 20:01:44,318] torch.distributed.elastic.multiprocessing.redirects: [WARNING] NOTE: Redirects are currently not supported in Windows or MacOs.\n"
20
+ ]
21
+ },
22
+ {
23
+ "name": "stdout",
24
+ "output_type": "stream",
25
+ "text": [
26
+ "Loading model...\n",
27
+ "[2024-06-25 20:02:01,663] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed info: version=0.14.0+ce78a63, git-hash=ce78a63, git-branch=master\n",
28
+ "[2024-06-25 20:02:01,664] [WARNING] [config_utils.py:69:_process_deprecated_field] Config parameter replace_method is deprecated. This parameter is no longer needed, please remove from your call to DeepSpeed-inference\n",
29
+ "[2024-06-25 20:02:01,665] [WARNING] [config_utils.py:69:_process_deprecated_field] Config parameter mp_size is deprecated use tensor_parallel.tp_size instead\n",
30
+ "[2024-06-25 20:02:01,666] [INFO] [logging.py:96:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1\n",
31
+ "[2024-06-25 20:02:01,900] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed-Inference config: {'layer_id': 0, 'hidden_size': 1024, 'intermediate_size': 4096, 'heads': 16, 'num_hidden_layers': -1, 'dtype': torch.float32, 'pre_layer_norm': True, 'norm_type': <NormType.LayerNorm: 1>, 'local_rank': -1, 'stochastic_mode': False, 'epsilon': 1e-05, 'mp_size': 1, 'scale_attention': True, 'triangular_masking': True, 'local_attention': False, 'window_size': 1, 'rotary_dim': -1, 'rotate_half': False, 'rotate_every_two': True, 'return_tuple': True, 'mlp_after_attn': True, 'mlp_act_func_type': <ActivationFuncType.GELU: 1>, 'specialized_mode': False, 'training_mp_size': 1, 'bigscience_bloom': False, 'max_out_tokens': 1024, 'min_out_tokens': 1, 'scale_attn_by_inverse_layer_idx': False, 'enable_qkv_quantization': False, 'use_mup': False, 'return_single_tuple': False, 'set_empty_params': False, 'transposed_mode': False, 'use_triton': False, 'triton_autotune': False, 'num_kv': -1, 'rope_theta': 10000}\n"
32
+ ]
33
+ }
34
+ ],
35
+ "source": [
36
+ "from models.TTS_utils import load_manual_xtts_v2\n",
37
+ "config_path = \"test/config.json\"\n",
38
+ "model_path = \"test\"\n",
39
+ "\n",
40
+ "xtts_v2_model = load_manual_xtts_v2(config_path, model_path)\n"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 4,
46
+ "metadata": {},
47
+ "outputs": [
48
+ {
49
+ "name": "stdout",
50
+ "output_type": "stream",
51
+ "text": [
52
+ "Inference...\n",
53
+ "No more text to process\n",
54
+ "Inference...\n",
55
+ "No more text to process\n"
56
+ ]
57
+ }
58
+ ],
59
+ "source": [
60
+ "from models.TTS_utils import stream_prod\n",
61
+ "stream_prod(xtts_v2_model, \"record_temp.json\", \"audio_segments/\")\n",
62
+ "\n"
63
+ ]
64
+ }
65
+ ],
66
+ "metadata": {
67
+ "kernelspec": {
68
+ "display_name": "capstone",
69
+ "language": "python",
70
+ "name": "python3"
71
+ },
72
+ "language_info": {
73
+ "codemirror_mode": {
74
+ "name": "ipython",
75
+ "version": 3
76
+ },
77
+ "file_extension": ".py",
78
+ "mimetype": "text/x-python",
79
+ "name": "python",
80
+ "nbconvert_exporter": "python",
81
+ "pygments_lexer": "ipython3",
82
+ "version": "3.11.7"
83
+ }
84
+ },
85
+ "nbformat": 4,
86
+ "nbformat_minor": 2
87
+ }
models/TTS_utils.py ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from TTS.api import TTS
3
+ import time
4
+ import torchaudio
5
+ from TTS.tts.configs.xtts_config import XttsConfig
6
+ from TTS.tts.models.xtts import Xtts
7
+ import sounddevice as sd
8
+
9
+
10
+ def xtts_v2():
11
+ """
12
+ Load and return the XTTS v2 model.
13
+
14
+ This function initializes the XTTS v2 model from the 🐸TTS library.
15
+ The model is configured to use a GPU if available, otherwise it defaults to CPU.
16
+
17
+ Returns:
18
+ TTS: The initialized XTTS v2 model.
19
+
20
+ Example usage:
21
+ tts = xtts_v2()
22
+ """
23
+ # Get device
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+
26
+ # List available 🐸TTS models
27
+ # print(TTS().list_models())
28
+
29
+ # Init TTS
30
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
31
+
32
+
33
+ return tts
34
+
35
+ def load_manual_xtts_v2(config_path, checkpoint_path):
36
+ """
37
+ Load the XTTS v2 model manually with configuration and checkpoint files.
38
+
39
+ Args:
40
+ config_path (str): Path to the configuration file.
41
+ Example: "path/to/config.json"
42
+ checkpoint_path (str): Path to the checkpoint directory.
43
+ Example: "path/to/checkpoint/"
44
+
45
+ Returns:
46
+ Xtts: The loaded XTTS v2 model.
47
+
48
+ Example usage:
49
+ model = load_manual_xtts_v2("config.json", "checkpoint/")
50
+ """
51
+ print("Loading model...")
52
+ config = XttsConfig()
53
+ config.load_json(config_path)
54
+ model = Xtts.init_from_config(config)
55
+ model.load_checkpoint(config, checkpoint_dir=checkpoint_path, use_deepspeed=True)
56
+ model.cuda()
57
+
58
+ return model
59
+
60
+ import json
61
+ import concurrent.futures
62
+
63
+ # ----------------- StreamXTTSV2 -----------------
64
+ def get_text_order(json_path, num_elements, ):
65
+ """
66
+ Retrieve a specified number of text elements from a JSON file and update the file.
67
+
68
+ Args:
69
+ json_path (str): Path to the JSON file.
70
+ Example: "path/to/data.json"
71
+ num_elements (int): Number of elements to retrieve.
72
+ Example: 3
73
+
74
+ Returns:
75
+ list: A list of tuples containing text, order, original_path, path_to_save, and language.
76
+
77
+ Example usage:
78
+ text_order = get_text_order("data.json", 3)
79
+ """
80
+ with open(json_path) as f:
81
+ data = json.load(f)
82
+ # check if the data is empty
83
+ if not data['text']:
84
+ return "No more text to process"
85
+ if len(data['text']) < num_elements:
86
+ num_elements = len(data['text'])
87
+ text = data['text'][:num_elements]
88
+ order = data['order'][:num_elements]
89
+ original_path = data['original_path'][:num_elements]
90
+ path_to_save = data['path_to_save'][:num_elements]
91
+ language = data['language'][:num_elements]
92
+ # remove the first elements
93
+ data['text'] = data['text'][num_elements:]
94
+ data['order'] = data['order'][num_elements:]
95
+ data['original_path'] = data['original_path'][num_elements:]
96
+ data['path_to_save'] = data['path_to_save'][num_elements:]
97
+ data['language'] = data['language'][num_elements:]
98
+ data['original_text'] = data['original_text'][num_elements:]
99
+ # write the data back to the file
100
+ with open(json_path, 'w') as f:
101
+ json.dump(data, f)
102
+ # make it return an array of arrays of text and order
103
+ result = [i for i in zip(text, order, original_path, path_to_save, language)]
104
+ return result
105
+
106
+ def append_text_order(json_path, text, order, original_path, path_to_save, language, original_text=None):
107
+ """
108
+ Append a text order to a JSON file.
109
+
110
+ Args:
111
+ json_path (str): Path to the JSON file.
112
+ Example: "path/to/data.json"
113
+ text (str): The text to append.
114
+ Example: "Hello, world!"
115
+ order (int): The order index.
116
+ Example: 1
117
+ original_path (str): Path to the original file.
118
+ Example: "path/to/original.wav"
119
+ path_to_save (str): Path to save the processed file.
120
+ Example: "path/to/save.wav"
121
+ language (str): Language of the text.
122
+ Example: "en"
123
+ original_text (str, optional): The original text if available.
124
+ Example: "Hola, mundo!"
125
+
126
+ Example usage:
127
+ append_text_order("data.json", "Hello, world!", 1, "original.wav", "save.wav", "en", "Hola, mundo!")
128
+ """
129
+ with open(json_path) as f:
130
+ data = json.load(f)
131
+ data['text'].append(text)
132
+ data['order'].append(order)
133
+ data['original_path'].append(original_path)
134
+ data['path_to_save'].append(path_to_save)
135
+ data['language'].append(language)
136
+ data['original_text'].append(original_text)
137
+ with open(json_path, 'w') as f:
138
+ json.dump(data, f)
139
+ # ----------------- StreamXTTSV2 -----------------
140
+ class StreamXTTSV2:
141
+ """
142
+ A class to handle streaming TTS using XTTS v2 model.
143
+
144
+ Args:
145
+ model (Xtts): The XTTS v2 model.
146
+ sample_rate (int, optional): The sample rate for audio playback. Default is 24000.
147
+ buffer_size (int, optional): The buffer size for audio playback. Default is 2.
148
+ """
149
+ def __init__(self, model, sample_rate=24000, buffer_size=2):
150
+ self.model = model
151
+ #self.gpt_cond_latent = gpt_cond_latent
152
+ #self.speaker_embedding = speaker_embedding
153
+ self.sample_rate = sample_rate
154
+ self.buffer_size = buffer_size
155
+ self.speed = 0.95
156
+ self.stream_chunk_size = 40
157
+ self.buffer = torch.Tensor().to('cpu')
158
+ self.chunk_save = torch.Tensor().to('cpu')
159
+ self.is_playing = False
160
+ self.tasks_order = []
161
+ self.order = 0
162
+ self.initial = True
163
+
164
+ def chunk_callback(self, chunk, i, output_dir, order):
165
+ """
166
+ Callback function to handle each chunk of audio during streaming.
167
+
168
+ Args:
169
+ chunk (torch.Tensor): The audio chunk.
170
+ Example: tensor([0.1, 0.2, 0.3])
171
+ i (int): The chunk index.
172
+ Example: 1
173
+ output_dir (str): Directory to save the chunk.
174
+ Example: "output/"
175
+ order (int): The order index.
176
+ Example: 1
177
+ """
178
+ # Accumulate chunk into buffer
179
+ self.buffer = torch.cat((self.buffer, chunk.squeeze().to('cpu')), dim=-1)
180
+ self.chunk_save = torch.cat((self.chunk_save, chunk.squeeze().to('cpu')), dim=-1)
181
+ chunk_filename = output_dir + f"chunk_{i}_{order}.wav"
182
+ print(self.sample_rate)
183
+ torchaudio.save(chunk_filename, self.chunk_save.unsqueeze(0), self.sample_rate)
184
+ print(f"Chunk saved as {chunk_filename}")
185
+ self.chunk_save = torch.Tensor().to('cpu')
186
+
187
+ # Check if buffer has enough chunks to start playing
188
+ if not self.is_playing and len(self.buffer) >= self.buffer_size:
189
+ self.start_playback()
190
+
191
+ def start_playback(self):
192
+ """Start audio playback."""
193
+ self.is_playing = True
194
+ sd.play(self.buffer.numpy(), self.sample_rate, blocking=False)
195
+ self.buffer = torch.Tensor().to('cpu') # Reset buffer after starting playback
196
+
197
+ def play(self, chunks, output_dir, path_to_save, order):
198
+ """
199
+ Play the audio chunks and save the complete audio.
200
+
201
+ Args:
202
+ chunks (list): List of audio chunks.
203
+ Example: [tensor([0.1, 0.2, 0.3]), tensor([0.4, 0.5, 0.6])]
204
+ output_dir (str): Directory to save the chunks.
205
+ Example: "output/"
206
+ path_to_save (str): Path to save the complete audio file.
207
+ Example: "output/complete.wav"
208
+ order (int): The order index.
209
+ Example: 1
210
+ """
211
+ t0 = time.time()
212
+
213
+
214
+ for i, chunk in enumerate(chunks):
215
+ #print(chunk)
216
+ if i == 0:
217
+ print(f"Time to first chunk: {time.time() - t0}")
218
+ print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
219
+ self.chunk_callback(chunk, i, output_dir, order)
220
+
221
+ # Ensure all remaining audio is played
222
+ while sd.get_stream().active:
223
+ time.sleep(0.1)
224
+ if len(self.buffer) > 0:
225
+ sd.play(self.buffer.numpy(), self.sample_rate, blocking=True)
226
+
227
+ # Save the complete audio to a file
228
+ torchaudio.save(path_to_save, self.buffer.unsqueeze(0), self.sample_rate)
229
+ print(f"Total audio length: {self.buffer.shape[-1]}")
230
+ print("Audio playback finished.")
231
+ #self.order += 1
232
+
233
+
234
+ def inference_and_play(self, json_path, output_dir):
235
+ """
236
+ Perform inference and play the generated audio.
237
+
238
+ Args:
239
+ json_path (str): Path to the JSON file containing text orders.
240
+ Example: "path/to/data.json"
241
+ output_dir (str): Directory to save the chunks.
242
+ Example: "output/"
243
+ """
244
+ print("Inference...")
245
+
246
+
247
+
248
+ self.texts = get_text_order(json_path, 3)
249
+
250
+ if self.texts == "No more text to process":
251
+ print("No more text to process")
252
+ return
253
+ if self.texts == "Not enough text to process":
254
+ print("Not enough text to process")
255
+ return
256
+ # is it returns a list of text and order
257
+ if self.texts is not None:
258
+ #print(self.texts)
259
+ self.gpt_cond_latent, self.speaker_embedding = self.model.get_conditioning_latents(audio_path=[self.texts[0][2]])
260
+ path_to_save = self.texts[0][3]
261
+ #print(self.gpt_cond_latent, self.speaker_embedding)
262
+ #print(self.texts)
263
+
264
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
265
+ #text, order = get_text_order(texts)
266
+ #print(text, order)
267
+ futures = []
268
+ print(self.texts)
269
+
270
+ for text, i, path_a, path_s, lang in self.texts:
271
+ #print(text, i, path)
272
+ print(f"Processing text {i}: {text}")
273
+ print(f"Processing text {i}: {lang}")
274
+ future = executor.submit(self.model.inference_stream, text, lang, self.gpt_cond_latent, self.speaker_embedding, stream_chunk_size=self.stream_chunk_size, speed=self.speed)
275
+ #print(future.result())
276
+ futures.append(future)
277
+
278
+
279
+ for future, text in zip(futures, self.texts):
280
+ #print(text)
281
+ chunks = future.result()
282
+ print(text[1])
283
+ self.play(chunks, output_dir, path_to_save, text[1])
284
+ self.buffer = torch.Tensor().to('cpu')
285
+
286
+ self.inference_and_play(json_path, output_dir )
287
+
288
+
289
+ def stream_prod(model, json_path, directory_path):
290
+ """
291
+ Stream production function for XTTS v2.
292
+
293
+ Args:
294
+ model (Xtts): The XTTS v2 model.
295
+ Example: model = load_manual_xtts_v2("config.json", "checkpoint/")
296
+ json_path (str): Path to the JSON file containing text orders.
297
+ Example: "path/to/data.json"
298
+ directory_path (str): Directory to save the chunks.
299
+ Example: "output/"
300
+ """
301
+ streamer = StreamXTTSV2(model, buffer_size=2)
302
+ results = streamer.inference_and_play(json_path, directory_path)
303
+ if results is None:
304
+ time.sleep(3)
305
+ stream_prod(model, json_path, directory_path)
306
+ return "Streaming finished"
307
+
308
+
309
+ def just_inference(model, original_path, output_dir, text, lang, order):
310
+ """
311
+ Perform inference and save the generated audio.
312
+
313
+ Args:
314
+ model (Xtts): The XTTS v2 model.
315
+ Example: model = load_manual_xtts_v2("config.json", "checkpoint/")
316
+ original_path (str): Path to the original audio file.
317
+ Example: "path/to/original.wav"
318
+ output_dir (str): Directory to save the generated audio file.
319
+ Example: "output/"
320
+ text (str): The text to be synthesized.
321
+ Example: "Hello, world!"
322
+ lang (str): The language of the text.
323
+ Example: "en"
324
+ order (int): The order index.
325
+ Example: 1
326
+
327
+ Returns:
328
+ tuple: A tuple containing the path to the saved audio file and the time to first chunk.
329
+ Example: ("output/complete.wav", 1.23)
330
+ """
331
+ print("Inference...")
332
+ path_to_save = output_dir
333
+ t0 = time.time()
334
+ gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path=[original_path])
335
+
336
+ chunks = model.inference_stream(
337
+ text,
338
+ lang,
339
+ gpt_cond_latent,
340
+ speaker_embedding,
341
+ stream_chunk_size= 15 ,
342
+ speed=0.95
343
+ #temperature=0.1,
344
+ #enable_text_splitting=True,
345
+ )
346
+ full_audio = torch.Tensor().to('cpu')
347
+ wav_chuncks = []
348
+ for i, chunk in enumerate(chunks):
349
+ if i == 1:
350
+ time_to_first_chunk = time.time() - t0
351
+ print(f"Time to first chunck: {time_to_first_chunk}")
352
+ print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
353
+ wav_chuncks.append(chunk)
354
+ full_audio = torch.cat((full_audio, chunk.squeeze().to('cpu')), dim=-1)
355
+
356
+
357
+
358
+ # Save the complete audio to a file
359
+ torchaudio.save(path_to_save, full_audio.unsqueeze(0), 24000)
360
+
361
+ print("Inference finished")
362
+ return path_to_save, time_to_first_chunk
363
+
364
+
365
+
models/__init__.py ADDED
File without changes
models/__pycache__/TTS_utils.cpython-311.pyc ADDED
Binary file (18.2 kB). View file
 
models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (173 Bytes). View file
 
models/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (155 Bytes). View file
 
models/__pycache__/es_fastconformer.cpython-311.pyc ADDED
Binary file (1.98 kB). View file
 
models/__pycache__/nllb.cpython-311.pyc ADDED
Binary file (3.84 kB). View file
 
models/__pycache__/nllb.cpython-38.pyc ADDED
Binary file (2.49 kB). View file
 
models/__pycache__/noise_red.cpython-311.pyc ADDED
Binary file (1.3 kB). View file
 
models/__pycache__/parakeet.cpython-311.pyc ADDED
Binary file (2.08 kB). View file
 
models/__pycache__/parakeet.cpython-38.pyc ADDED
Binary file (1.69 kB). View file
 
models/es_fastconformer.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nemo.collections.asr as nemo_asr
2
+ import torch
3
+
4
+ def stt_es_model():
5
+ """
6
+ Load and return the pre-trained Spanish ASR model.
7
+
8
+ This function loads the pre-trained EncDecCTCModelBPE model from NVIDIA's NeMo collection.
9
+ The model is configured to use a GPU if available, otherwise it defaults to CPU.
10
+
11
+ Returns:
12
+ nemo_asr.models.EncDecCTCModelBPE: The loaded ASR model.
13
+ Example usage:
14
+ asr_model = stt_es_model()
15
+ """
16
+ # Load the pre-trained model
17
+ asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/stt_es_fastconformer_hybrid_large_pc")
18
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
19
+ asr_model = asr_model.to(device)
20
+ return asr_model
21
+
22
+ def stt_es_process(asr_model, audio_file):
23
+ """
24
+ Transcribe an audio file using the given ASR model.
25
+
26
+ Args:
27
+ asr_model (nemo_asr.models.EncDecCTCModelBPE): The ASR model to use for transcription.
28
+ Example: asr_model = stt_es_model()
29
+ audio_file (str): Path to the audio file to be transcribed.
30
+ Example: "path/to/audio_file.wav"
31
+
32
+ Returns:
33
+ list: A list containing the transcribed text.
34
+ Example: ["transcribed text"]
35
+ """
36
+ text = asr_model.transcribe(paths2audio_files=[audio_file], batch_size=1)
37
+ return text
models/nllb.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+ import torch
3
+
4
+ def nllb():
5
+ """
6
+ Load and return the NLLB (No Language Left Behind) model and tokenizer.
7
+
8
+ This function loads the NLLB-200-distilled-1.3B model and tokenizer from Hugging Face's Transformers library.
9
+ The model is configured to use a GPU if available, otherwise it defaults to CPU.
10
+
11
+ Returns:
12
+ tuple: A tuple containing the loaded model and tokenizer.
13
+ - model (transformers.AutoModelForSeq2SeqLM): The loaded NLLB model.
14
+ - tokenizer (transformers.AutoTokenizer): The loaded tokenizer.
15
+
16
+ Example usage:
17
+ model, tokenizer = nllb()
18
+ """
19
+ #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ device = torch.device("cpu")
21
+ # Load the tokenizer and model
22
+ tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B")
23
+ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B").to(device)
24
+ # write done to the file named status.txt
25
+ with open("status.txt", 'w') as f:
26
+ f.write("done")
27
+ return model, tokenizer
28
+
29
+ def nllb_translate(model, tokenizer, article, language):
30
+ """
31
+ Translate an article using the NLLB model and tokenizer.
32
+
33
+ Args:
34
+ model (transformers.AutoModelForSeq2SeqLM): The NLLB model to use for translation.
35
+ Example: model, tokenizer = nllb()
36
+ tokenizer (transformers.AutoTokenizer): The tokenizer to use with the NLLB model.
37
+ Example: model, tokenizer = nllb()
38
+ article (str): The article text to be translated.
39
+ Example: "This is a sample article."
40
+ language (str): The target language for translation. Must be either 'spanish' or 'english'.
41
+ Example: "spanish"
42
+
43
+ Returns:
44
+ str: The translated text.
45
+ Example: "Este es un artículo de muestra."
46
+ """
47
+ try:
48
+ # Tokenize the text
49
+ inputs = tokenizer(article, return_tensors="pt")
50
+
51
+ # Move the tokenized inputs to the same device as the model
52
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
53
+
54
+ if language == "es":
55
+ translated_tokens = model.generate(
56
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["spa_Latn"], max_length=30
57
+ )
58
+ elif language == "en":
59
+ translated_tokens = model.generate(
60
+ **inputs, forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], max_length=30
61
+ )
62
+ else:
63
+ raise ValueError("Unsupported language. Use 'es' or 'en'.")
64
+
65
+ # Decode the translation
66
+ text = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
67
+ return text
68
+
69
+ except Exception as e:
70
+ print(f"Error during translation: {e}")
71
+ return "Translation failed"
72
+
models/noise_red.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from scipy.io import wavfile
2
+ import noisereduce as nr
3
+ # Load your data
4
+
5
+ def noise_reduction(path, new_path):
6
+ """
7
+ Perform noise reduction on an audio file and save the output.
8
+
9
+ This function reads an audio file from the given path, performs noise reduction using the noisereduce library,
10
+ and saves the processed audio to a new file.
11
+
12
+ Args:
13
+ path (str): Path to the input audio file.
14
+ Example: "path/to/input_audio.wav"
15
+ new_path (str): Path to save the processed audio file.
16
+ Example: "path/to/output_audio.wav"
17
+
18
+ Returns:
19
+ None
20
+
21
+ Example usage:
22
+ noise_reduction("input.wav", "output.wav")
23
+ """
24
+ rate, data = wavfile.read(path)
25
+ # Perform noise reduction
26
+ reduced_noise = nr.reduce_noise(y=data, sr=rate)
27
+ wavfile.write(new_path, rate, reduced_noise)
28
+ return print("Noise reduction done!")
models/parakeet.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nemo.collections.asr as nemo_asr
2
+ import torch
3
+
4
+
5
+ def parakeet_ctc_model():
6
+ """
7
+ Load and return the pre-trained Parakeet CTC model.
8
+
9
+ This function loads the pre-trained EncDecCTCModelBPE model from NVIDIA's NeMo collection.
10
+ The model is configured to use a GPU if available, otherwise it defaults to CPU.
11
+
12
+ Returns:
13
+ nemo_asr.models.EncDecCTCModelBPE: The loaded ASR model.
14
+
15
+ Example usage:
16
+ asr_model = parakeet_ctc_model()
17
+ """
18
+ # Load the pre-trained model
19
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
20
+ asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained("nvidia/parakeet-ctc-0.6b")
21
+ asr_model = asr_model.to(device)
22
+ return asr_model
23
+
24
+ def parakeet_ctc_process(asr_model, audio_file):
25
+ """
26
+ Transcribe an audio file using the given Parakeet CTC ASR model.
27
+
28
+ Args:
29
+ asr_model (nemo_asr.models.EncDecCTCModelBPE): The ASR model to use for transcription.
30
+ Example: asr_model = parakeet_ctc_model()
31
+ audio_file (str): Path to the audio file to be transcribed.
32
+ Example: "path/to/audio_file.wav"
33
+
34
+ Returns:
35
+ list: A list containing the transcribed text.
36
+ Example: ["transcribed text"]
37
+
38
+ Example usage:
39
+ text = parakeet_ctc_process(asr_model, "path/to/audio_file.wav")
40
+ """
41
+ text = asr_model.transcribe(paths2audio_files=[audio_file], batch_size=1)
42
+
43
+ return text
models/status.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ done
record_per.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"text": ["Hola, \u00bf c\u00f3mo est\u00e1s?", "Est\u00e1 bien, intent\u00e9moslo de nuevo.", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "Hola, \u00bf qu\u00e9 pasa?", "Hola, \u00bf qu\u00e9 pasa?", "Est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien."], "original_path": ["audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/1\\audio.wav", "audio-transcription/uploads/2\\audio.wav", "audio-transcription/uploads/3\\audio.wav", "audio-transcription/uploads/4\\audio.wav", "audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/6\\audio.wav"], "order": [0, 0, 0, 0, 0, 0, 0], "path_to_save": ["results", "results", "results", "results", "results/", "results/", "results/"], "language": ["es", "es", "es", "es", "es", "es", "es"], "original_text": ["hello how are you", "okay let's try it again", " so this model should capture this and translate right away", " so this model should capture this and translate right away", "hello", "hello", "okay okay okay okay okay okay okay okay okay okay okay okay okay okay"]}
record_temp.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"text": ["Est\u00e1 bien, intent\u00e9moslo de nuevo.", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "As\u00ed que este modelo debe capturar esto y traducir inmediatamente", "Hola, \u00bf qu\u00e9 pasa?", "Hola, \u00bf qu\u00e9 pasa?", "Est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien, est\u00e1 bien."], "original_path": ["audio-transcription/uploads/1\\audio.wav", "audio-transcription/uploads/2\\audio.wav", "audio-transcription/uploads/3\\audio.wav", "audio-transcription/uploads/4\\audio.wav", "audio-transcription/uploads/5\\audio.wav", "audio-transcription/uploads/6\\audio.wav"], "order": [0, 0, 0, 0, 0, 0], "path_to_save": ["results/", "results/", "results/", "results/", "results/", "results/"], "language": ["es", "es", "es", "es", "es", "es"], "original_text": ["okay let's try it again", " so this model should capture this and translate right away", " so this model should capture this and translate right away", "hello", "hello", "okay okay okay okay okay okay okay okay okay okay okay okay okay okay"]}
requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ language_tool_python
2
+ noisereduce
3
+ numpy
4
+ pandas
5
+ pydub
6
+ #scikit_learn==1.4.0
7
+ scipy
8
+ speechbrain
9
+ webrtcvad==2.0.10
10
+ deepspeed==0.14.0
11
+ transformers==4.40.2
12
+ hydra-core
13
+ pytorch_lightning
14
+ streamlit
15
+ sounddevice
16
+ playsound
17
+ streamlit-webrtc
18
+ pybind11
19
+ fasttext
20
+ Cython
21
+ # nemo_toolkit[all]==1.21
22
+ fastapi
23
+ uvicorn
24
+ pydantic==1.10.9
25
+ spacy
results/readme ADDED
File without changes
run.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ import argparse
3
+ import subprocess
4
+ from models.nllb import nllb
5
+ from models.parakeet import parakeet_ctc_model
6
+ from models.es_fastconformer import stt_es_model
7
+ from models.TTS_utils import load_manual_xtts_v2
8
+ from stream_VAD import stream
9
+
10
+ def main(xtts_path, xtts_config_path, language="en", record_temp="record_temp.json", record_per="record_per.json", record_path="audio_segments/", result_dir="results", segments_dir="audio_segments"):
11
+ """
12
+ Main function to run the ASR stream and initiate the TTS stream production.
13
+
14
+ Args:
15
+ xtts_path (str): Path to the xtts model file.
16
+ Example: "path/to/xtts_model.pt"
17
+ xtts_config_path (str): Path to the xtts configuration file.
18
+ Example: "path/to/xtts_config.json"
19
+ language (str, optional): Language for the ASR model. Must be either 'en' for English or 'es' for Spanish.
20
+ Default: 'en'
21
+ Example: "en"
22
+ record_temp (str, optional): Path to the temporary record JSON file.
23
+ Default: "record_temp.json"
24
+ Example: "path/to/record_temp.json"
25
+ record_per (str, optional): Path to the periodic record JSON file.
26
+ Default: "record_per.json"
27
+ Example: "path/to/record_per.json"
28
+ record_path (str, optional): Path to the directory where audio segments are recorded.
29
+ Default: "audio_segments/"
30
+ Example: "path/to/audio_segments/"
31
+ result_dir (str, optional): Path to the directory where results are stored.
32
+ Default: "results"
33
+ Example: "path/to/results"
34
+ segments_dir (str, optional): Path to the directory where audio segments are stored.
35
+ Default: "audio_segments"
36
+ Example: "path/to/audio_segments"
37
+ """
38
+ model_nllb, tokinizer_nllb = nllb()
39
+
40
+ if language == "en":
41
+ asr = parakeet_ctc_model()
42
+ stream_thread = threading.Thread(target=stream, args=(asr, model_nllb, tokinizer_nllb, "english", "spanish", record_temp, record_per, result_dir, segments_dir))
43
+
44
+ elif language == "es":
45
+ asr = stt_es_model()
46
+ stream_thread = threading.Thread(target=stream, args=(asr, model_nllb, tokinizer_nllb, "spanish", "english", record_temp, record_per, result_dir, segments_dir))
47
+
48
+ else:
49
+ raise ValueError("Language not supported")
50
+
51
+ # Start the stream thread
52
+ stream_thread.start()
53
+
54
+ # Call the other script to start stream_prod
55
+ subprocess.Popen(['python', 'stream_prod_main.py', xtts_path, xtts_config_path, record_temp, record_path])
56
+
57
+ # Wait for the stream thread to complete
58
+ stream_thread.join()
59
+
60
+ if __name__ == "__main__":
61
+ parser = argparse.ArgumentParser(description="Run stream and initiate stream_prod.")
62
+ parser.add_argument("xtts_path", type=str, help="Path to the xtts model.")
63
+ parser.add_argument("xtts_config_path", type=str, help="Path to the xtts config.")
64
+ parser.add_argument("language", type=str, choices=["en", "es"], help="Language (en or es).")
65
+ parser.add_argument("--record_temp", type=str, default="record_temp.json", help="Path to the record temp file.")
66
+ parser.add_argument("--record_per", type=str, default="record_per.json", help="Path to the record per file.")
67
+ parser.add_argument("--record_path", type=str, default="audio_segments/", help="Path to the record directory.")
68
+ parser.add_argument("--result_dir", type=str, default="results", help="Path to the result directory.")
69
+ parser.add_argument("--segments_dir", type=str, default="audio_segments", help="Path to the segments directory.")
70
+
71
+ args = parser.parse_args()
72
+
73
+ main(args.xtts_path, args.xtts_config_path, args.language, args.record_temp, args.record_per, args.record_path, args.result_dir, args.segments_dir)
setup.sh ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ git clone https://github.com/coqui-ai/TTS/ && \
3
+ cd TTS && \
4
+ make install
5
+
6
+ pip install PyAudio-0.2.11-cp37-cp37m-win_amd64.whl
7
+ pip install pybind11
8
+ pip install wheel setuptools pip --upgrade
9
+ pip install fasttext
10
+ apt-get update && apt-get install -y libsndfile1 ffmpeg
11
+ pip install Cython
12
+ # pip install nemo_toolkit['all']
13
+
14
+ # show the version of nemo in python
15
+ python -c "import nemo; print(nemo.__version__)"
16
+ pip install torch==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
17
+ pip install torchaudio==2.2.2+cu121 -f https://download.pytorch.org/whl/cu121/torch_stable.html
18
+ pip install -r requirements.txt
status.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ done
stream_VAD.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import contextlib
3
+ import wave
4
+ import webrtcvad
5
+ import pyaudio
6
+ import os
7
+ import librosa
8
+ import numpy as np
9
+ from models.nllb import nllb_translate
10
+ from models.TTS_utils import append_text_order
11
+ from models.parakeet import parakeet_ctc_process
12
+ from models.es_fastconformer import stt_es_process
13
+ from concurrent.futures import ThreadPoolExecutor
14
+ import time
15
+ from models.noise_red import noise_reduction
16
+ class Frame(object):
17
+ """
18
+ Represents a "frame" of audio data.
19
+
20
+ Args:
21
+ bytes (bytes): The audio data.
22
+ timestamp (float): The timestamp of the frame.
23
+ duration (float): The duration of the frame.
24
+ """
25
+ def __init__(self, bytes, timestamp, duration):
26
+ self.bytes = bytes
27
+ self.timestamp = timestamp
28
+ self.duration = duration
29
+
30
+ def read_audio(stream, frame_duration_ms, rate):
31
+ """
32
+ Generates audio frames from the input stream.
33
+
34
+ Args:
35
+ stream (pyaudio.Stream): The audio stream.
36
+ frame_duration_ms (int): Duration of each frame in milliseconds.
37
+ rate (int): The sample rate of the audio.
38
+
39
+ Yields:
40
+ bytes: The audio frames.
41
+ """
42
+ frames_per_buffer = int(rate * frame_duration_ms / 1000)
43
+ while True:
44
+ yield stream.read(frames_per_buffer)
45
+
46
+ def vad_collector(sample_rate, frame_duration_ms, padding_duration_ms, vad, frames):
47
+ """
48
+ Filters out non-voiced audio frames.
49
+
50
+ Args:
51
+ sample_rate (int): The sample rate of the audio.
52
+ frame_duration_ms (int): Duration of each frame in milliseconds.
53
+ padding_duration_ms (int): Duration of padding in milliseconds.
54
+ vad (webrtcvad.Vad): The VAD object.
55
+ frames (generator): A generator yielding audio frames.
56
+
57
+ Yields:
58
+ bytes: Voiced audio frames.
59
+ """
60
+ num_padding_frames = int(padding_duration_ms / frame_duration_ms)
61
+ ring_buffer = collections.deque(maxlen=num_padding_frames)
62
+ triggered = False
63
+
64
+ voiced_frames = []
65
+ for frame in frames:
66
+ is_speech = vad.is_speech(frame.bytes, sample_rate)
67
+
68
+ if not triggered:
69
+ ring_buffer.append((frame, is_speech))
70
+ num_voiced = len([f for f, speech in ring_buffer if speech])
71
+ if num_voiced > 0.9 * ring_buffer.maxlen:
72
+ triggered = True
73
+ voiced_frames.extend(f for f, speech in ring_buffer)
74
+ ring_buffer.clear()
75
+ else:
76
+ voiced_frames.append(frame)
77
+ ring_buffer.append((frame, is_speech))
78
+ num_unvoiced = len([f for f, speech in ring_buffer if not speech])
79
+ if num_unvoiced > 0.9 * ring_buffer.maxlen:
80
+ yield b''.join([f.bytes for f in voiced_frames])
81
+ ring_buffer.clear()
82
+ voiced_frames = []
83
+ triggered = False
84
+ if voiced_frames:
85
+ yield b''.join([f.bytes for f in voiced_frames])
86
+
87
+
88
+ def is_segment_empty(file_path):
89
+ """
90
+ Check if the audio segment is empty.
91
+
92
+ Args:
93
+ file_path (str): Path to the audio file.
94
+
95
+ Returns:
96
+ bool: True if the segment is empty, False otherwise.
97
+ """
98
+ audio, _ = librosa.load(file_path)
99
+ rms = librosa.feature.rms(y=audio) # Pass the audio data as an argument
100
+ rms_mean = np.mean(rms)
101
+ print(rms_mean)
102
+
103
+ if rms_mean < 0.015:
104
+ return True
105
+ else:
106
+ return False
107
+
108
+
109
+ def process_segment(asr_model, model_nllb, tokenizer_nllb, path_segments, path_results, target_lang, order, json_path_temp, json_path_record):
110
+ """
111
+ Process an audio segment: noise reduction, transcription, translation, and append results.
112
+
113
+ Args:
114
+ asr_model: The ASR model for transcription.
115
+ model_nllb: The NLLB model for translation.
116
+ tokenizer_nllb: The tokenizer for the NLLB model.
117
+ path_segments (str): Path to the audio segment.
118
+ path_results (str): Path to save the results.
119
+ target_lang (str): Target language for translation.
120
+ order (int): Order index of the segment.
121
+ json_path_temp (str): Path to the temporary JSON file.
122
+ json_path_record (str): Path to the record JSON file.
123
+ """
124
+ print("Processing segment...")
125
+ if is_segment_empty(path_segments):
126
+ print("No speech detected.")
127
+ # remove the empty segment
128
+ os.remove(path_segments)
129
+ return
130
+ # Noise Reduction
131
+ start_time = time.time()
132
+ noise_reduction(path_segments, path_segments)
133
+ print("Noise removed. Time:", time.time() - start_time)
134
+
135
+
136
+ # Transcription
137
+ transcription = transcribe(asr_model, path_segments, target_lang)
138
+ #if not transcription.strip():
139
+ # print("No speech detected.")
140
+ # return
141
+
142
+ # Translation
143
+ print("Translating...")
144
+ translation = translate(model_nllb, tokenizer_nllb, transcription, target_lang)
145
+
146
+ # Text-to-Speech
147
+ # process_tts(tts_model, translation, path_segments, target_lang, path_results)
148
+ append_text_order(json_path_temp,translation, order, path_segments, path_results, "es" if target_lang == "spanish" else "en", transcription)
149
+ append_text_order(json_path_record,translation, order, path_segments, path_results, "es" if target_lang == "spanish" else "en", transcription)
150
+ def transcribe(asr_model, path_segments, target_lang):
151
+ """
152
+ Transcribe an audio segment using the specified ASR model.
153
+
154
+ Args:
155
+ asr_model: The ASR model for transcription.
156
+ path_segments (str): Path to the audio segment.
157
+ target_lang (str): Target language for transcription.
158
+
159
+ Returns:
160
+ str: The transcription of the audio segment.
161
+ """
162
+ start_time = time.time()
163
+ transcription_func = {
164
+ "spanish": parakeet_ctc_process,
165
+ "english": stt_es_process
166
+ }[target_lang]
167
+ transcription = transcription_func(asr_model, path_segments)
168
+ print("Transcription:", transcription[0])
169
+ print("Transcription time:", time.time() - start_time)
170
+ return transcription[0]
171
+
172
+ def translate(model_nllb, tokenizer_nllb, text, target_lang):
173
+ """
174
+ Translate text using the specified NLLB model and tokenizer.
175
+
176
+ Args:
177
+ model_nllb: The NLLB model for translation.
178
+ tokenizer_nllb: The tokenizer for the NLLB model.
179
+ text (str): The text to translate.
180
+ target_lang (str): Target language for translation.
181
+
182
+ Returns:
183
+ str: The translated text.
184
+ """
185
+ print("Processing translation...")
186
+ start_time = time.time()
187
+ translation = nllb_translate(model_nllb, tokenizer_nllb, text, target_lang)
188
+ print("Translation:", translation)
189
+ print("Translation time:", time.time() - start_time)
190
+ return translation
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+ def stream(asr_model, model_nllb, tokinizer_nllb, source_lang, target_lang, json_file_temp, json_file_record,result_dir = "results",segments_dir = "audio_segments"):
199
+ """
200
+ Stream audio input, process segments, and save the results.
201
+
202
+ Args:
203
+ asr_model: The ASR model for transcription.
204
+ model_nllb: The NLLB model for translation.
205
+ tokinizer_nllb: The tokenizer for the NLLB model.
206
+ source_lang (str): Source language of the audio.
207
+ target_lang (str): Target language for translation.
208
+ json_file_temp (str): Path to the temporary JSON file.
209
+ json_file_record (str): Path to the record JSON file.
210
+ result_dir (str, optional): Directory to save the results. Default is "results".
211
+ segments_dir (str, optional): Directory to save the audio segments. Default is "audio_segments".
212
+ """
213
+ FORMAT = pyaudio.paInt16
214
+ CHANNELS = 1
215
+ RATE = 16000
216
+ CHUNK_DURATION_MS = 30 # supports 10, 20 and 30 (ms)
217
+ PADDING_DURATION_MS = 300
218
+ vad = webrtcvad.Vad(1)
219
+
220
+ audio = pyaudio.PyAudio()
221
+ stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=160)
222
+ frames = read_audio(stream, CHUNK_DURATION_MS, RATE)
223
+ frames = (Frame(f, None, None) for f in frames)
224
+
225
+
226
+ if not os.path.exists(segments_dir):
227
+ os.makedirs(segments_dir)
228
+ if not os.path.exists(result_dir):
229
+ os.makedirs(result_dir)
230
+
231
+ executor = ThreadPoolExecutor(max_workers=2) # Adjust the number of workers as per your requirement
232
+
233
+ for i, segment in enumerate(vad_collector(RATE, CHUNK_DURATION_MS, PADDING_DURATION_MS, vad, frames)):
234
+ path_segements = os.path.join(segments_dir, f"segment_{i}.wav")
235
+ path_results = os.path.join(result_dir, f"result_{i}.wav")
236
+ print(f"Writing {path_segements}...")
237
+ with contextlib.closing(wave.open(path_segements, 'wb')) as wf:
238
+ wf.setnchannels(CHANNELS)
239
+ wf.setsampwidth(audio.get_sample_size(FORMAT))
240
+ wf.setframerate(RATE)
241
+ wf.writeframes(segment)
242
+
243
+ executor.submit(process_segment, asr_model, model_nllb, tokinizer_nllb, path_segements,path_results, target_lang, i, json_file_temp, json_file_record)
244
+
245
+ stream.stop_stream()
246
+ stream.close()
247
+ audio.terminate()
248
+
249
+