started link transcription
Browse files- App/Transcription/TranscriptionRoutes.py +22 -9
- App/Transcription/Utils/audio_transcription.py +1 -1
- App/Worker.py +14 -1
- requirements.txt +1 -0
App/Transcription/TranscriptionRoutes.py
CHANGED
@@ -2,9 +2,10 @@ from fastapi import APIRouter, status, Form, UploadFile, File, Query, Background
|
|
2 |
from typing_extensions import Annotated
|
3 |
from .Schemas import UserDetails
|
4 |
from App import bot
|
5 |
-
import aiofiles
|
|
|
6 |
from celery.result import AsyncResult
|
7 |
-
from App.Worker import transcription_task
|
8 |
from App.Users.Model import User
|
9 |
from .Model import Transcriptions
|
10 |
from .Utils.fastapi_tasks import perform_background_task
|
@@ -16,6 +17,25 @@ from .Utils.fastapi_tasks import perform_background_task
|
|
16 |
transcription_router = APIRouter(tags=["User"])
|
17 |
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
@transcription_router.post("/uploadfile/")
|
20 |
async def create_file(
|
21 |
background_tasks: BackgroundTasks,
|
@@ -42,13 +62,6 @@ async def create_file(
|
|
42 |
finally:
|
43 |
await file.close()
|
44 |
|
45 |
-
# telegram
|
46 |
-
# data = await bot.send_file(
|
47 |
-
# -1001925049183,
|
48 |
-
# file_size=file.size,
|
49 |
-
# caption=file.filename,
|
50 |
-
# file=f"./{file.filename}",
|
51 |
-
# )
|
52 |
# celery task
|
53 |
task = transcription_task.delay(file.filename, model)
|
54 |
|
|
|
2 |
from typing_extensions import Annotated
|
3 |
from .Schemas import UserDetails
|
4 |
from App import bot
|
5 |
+
import aiofiles, os
|
6 |
+
import tempfile
|
7 |
from celery.result import AsyncResult
|
8 |
+
from App.Worker import transcription_task, downloadfile
|
9 |
from App.Users.Model import User
|
10 |
from .Model import Transcriptions
|
11 |
from .Utils.fastapi_tasks import perform_background_task
|
|
|
17 |
transcription_router = APIRouter(tags=["User"])
|
18 |
|
19 |
|
20 |
+
@transcription_router.get("/download-audio")
|
21 |
+
async def download_audio(url: str):
|
22 |
+
ydl_opts = {
|
23 |
+
"format": "bestaudio/best",
|
24 |
+
"postprocessors": [
|
25 |
+
{
|
26 |
+
"key": "FFmpegExtractAudio",
|
27 |
+
"preferredcodec": "mp3",
|
28 |
+
"preferredquality": "192",
|
29 |
+
}
|
30 |
+
],
|
31 |
+
"outtmpl": os.path.join(tempfile.gettempdir(), "%(title)s.%(ext)s"),
|
32 |
+
}
|
33 |
+
task = downloadfile.delay(url, ydl_opts)
|
34 |
+
return {
|
35 |
+
"task_id": task.id,
|
36 |
+
}
|
37 |
+
|
38 |
+
|
39 |
@transcription_router.post("/uploadfile/")
|
40 |
async def create_file(
|
41 |
background_tasks: BackgroundTasks,
|
|
|
62 |
finally:
|
63 |
await file.close()
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
# celery task
|
66 |
task = transcription_task.delay(file.filename, model)
|
67 |
|
App/Transcription/Utils/audio_transcription.py
CHANGED
@@ -9,7 +9,7 @@ model_size = "tiny"
|
|
9 |
def transcribe_file(state, file_path, model_size="tiny"):
|
10 |
result = {}
|
11 |
metadata = TranscriptionMetadata()
|
12 |
-
metadata.logs = "STARTING"
|
13 |
state.update_state(
|
14 |
state="PROGRESS",
|
15 |
meta=metadata.dict(),
|
|
|
9 |
def transcribe_file(state, file_path, model_size="tiny"):
|
10 |
result = {}
|
11 |
metadata = TranscriptionMetadata()
|
12 |
+
metadata.logs = "Transcription STARTING"
|
13 |
state.update_state(
|
14 |
state="PROGRESS",
|
15 |
meta=metadata.dict(),
|
App/Worker.py
CHANGED
@@ -2,12 +2,25 @@ from celery import Celery
|
|
2 |
import os
|
3 |
import time
|
4 |
from App import celery_config
|
|
|
|
|
5 |
from App.Transcription.Utils.audio_transcription import transcribe_file
|
6 |
|
7 |
celery = Celery()
|
8 |
celery.config_from_object(celery_config)
|
9 |
|
10 |
|
11 |
-
@celery.task(name="
|
12 |
def transcription_task(self, file_path, model_size="tiny"):
|
13 |
return transcribe_file(state=self, file_path=file_path, model_size=model_size)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import os
|
3 |
import time
|
4 |
from App import celery_config
|
5 |
+
import yt_dlp
|
6 |
+
import tempfile
|
7 |
from App.Transcription.Utils.audio_transcription import transcribe_file
|
8 |
|
9 |
celery = Celery()
|
10 |
celery.config_from_object(celery_config)
|
11 |
|
12 |
|
13 |
+
@celery.task(name="transcription", bind=True)
|
14 |
def transcription_task(self, file_path, model_size="tiny"):
|
15 |
return transcribe_file(state=self, file_path=file_path, model_size=model_size)
|
16 |
+
|
17 |
+
|
18 |
+
@celery.task(name="download", bind=True)
|
19 |
+
def downloadfile(self, url, ydl_opts):
|
20 |
+
self.update_state(state="Downloading File..", meta={})
|
21 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
22 |
+
info = ydl.extract_info(url, download=True)
|
23 |
+
audio_file = ydl.prepare_filename(info)
|
24 |
+
self.update_state(state="Downloading complete", meta={})
|
25 |
+
|
26 |
+
return transcribe_file(state=self, file_path=audio_file, model_size="tiny")
|
requirements.txt
CHANGED
@@ -18,6 +18,7 @@ pydantic[email]
|
|
18 |
uvicorn==0.21.1
|
19 |
gunicorn
|
20 |
ujson
|
|
|
21 |
psutil
|
22 |
orm[mysql]
|
23 |
celery
|
|
|
18 |
uvicorn==0.21.1
|
19 |
gunicorn
|
20 |
ujson
|
21 |
+
yt-dlp
|
22 |
psutil
|
23 |
orm[mysql]
|
24 |
celery
|