test
Browse files
App/Transcription/Model.py
CHANGED
@@ -10,7 +10,13 @@ class Transcriptions(orm.Model):
|
|
10 |
fields = {
|
11 |
"id": orm.Integer(primary_key=True),
|
12 |
"task_id": orm.String(max_length=100, index=True, default=""),
|
13 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
"user": orm.ForeignKey(User, on_delete=orm.CASCADE),
|
15 |
"createdAt": orm.DateTime(index=True, default=datetime.datetime.now),
|
16 |
"content": orm.JSON(default={}),
|
|
|
10 |
fields = {
|
11 |
"id": orm.Integer(primary_key=True),
|
12 |
"task_id": orm.String(max_length=100, index=True, default=""),
|
13 |
+
"file_name": orm.String(max_length=100, index=True, default=""),
|
14 |
+
"language": orm.String(max_length=100, index=True, default="-"),
|
15 |
+
"youtubeLink": orm.String(max_length=100, index=True, allow_null=True),
|
16 |
+
"tl_file_id": orm.String(
|
17 |
+
max_length=100, index=True, default="", allow_null=True
|
18 |
+
),
|
19 |
+
"duration": orm.Integer(index=True, default=0),
|
20 |
"user": orm.ForeignKey(User, on_delete=orm.CASCADE),
|
21 |
"createdAt": orm.DateTime(index=True, default=datetime.datetime.now),
|
22 |
"content": orm.JSON(default={}),
|
App/Transcription/Schemas.py
CHANGED
@@ -1,14 +1,27 @@
|
|
1 |
-
from pydantic import BaseModel
|
|
|
2 |
|
3 |
|
4 |
class TranscriptionMetadata(BaseModel):
|
5 |
duration: int = 0
|
6 |
-
language: str = "
|
7 |
-
logs: str =
|
8 |
-
percentage: str = "
|
9 |
transcription: dict = {}
|
10 |
state: str = "PENDING"
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
class UserDetails(BaseModel):
|
14 |
userId: str
|
|
|
1 |
+
from pydantic import BaseModel, Field
|
2 |
+
from datetime import datetime
|
3 |
|
4 |
|
5 |
class TranscriptionMetadata(BaseModel):
|
6 |
duration: int = 0
|
7 |
+
language: str = "-"
|
8 |
+
logs: str = ""
|
9 |
+
percentage: str = "-"
|
10 |
transcription: dict = {}
|
11 |
state: str = "PENDING"
|
12 |
|
13 |
|
14 |
+
class TranscriptionResult(BaseModel):
|
15 |
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
16 |
+
duration: int = 0
|
17 |
+
language: str = "-"
|
18 |
+
transcription_state: str = "SUCCESS"
|
19 |
+
transcript: list = []
|
20 |
+
|
21 |
+
@property
|
22 |
+
def content(self):
|
23 |
+
return self.transcript
|
24 |
+
|
25 |
+
|
26 |
class UserDetails(BaseModel):
|
27 |
userId: str
|
App/Transcription/TranscriptionRoutes.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
|
2 |
from typing_extensions import Annotated
|
3 |
-
from .Schemas import UserDetails
|
4 |
from App import bot
|
5 |
import aiofiles, os, re
|
6 |
import tempfile
|
@@ -54,10 +54,12 @@ async def download_audio(
|
|
54 |
}
|
55 |
|
56 |
task = downloadfile.delay(url, ydl_opts, model)
|
|
|
57 |
transcription_enrty = await Transcriptions.objects.create(
|
58 |
-
|
59 |
)
|
60 |
-
|
|
|
61 |
|
62 |
|
63 |
@transcription_router.post("/uploadfile/")
|
@@ -101,13 +103,16 @@ async def create_file(
|
|
101 |
@transcription_router.get("/tasks/{task_id}")
|
102 |
async def get_status(task_id):
|
103 |
task_result = AsyncResult(task_id)
|
|
|
104 |
entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
|
105 |
if task_result.status == "SUCCESS":
|
106 |
-
|
107 |
-
|
108 |
-
)
|
109 |
else:
|
110 |
-
|
|
|
|
|
111 |
result = {
|
112 |
"task_id": task_id,
|
113 |
"task_status": task_result.status,
|
|
|
1 |
from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
|
2 |
from typing_extensions import Annotated
|
3 |
+
from .Schemas import UserDetails, TranscriptionMetadata, TranscriptionResult
|
4 |
from App import bot
|
5 |
import aiofiles, os, re
|
6 |
import tempfile
|
|
|
54 |
}
|
55 |
|
56 |
task = downloadfile.delay(url, ydl_opts, model)
|
57 |
+
response = {"task_id": task.id, "file_name": f"{video_title}.mp3"}
|
58 |
transcription_enrty = await Transcriptions.objects.create(
|
59 |
+
user=user, youtubeLink=url, **response
|
60 |
)
|
61 |
+
|
62 |
+
return response
|
63 |
|
64 |
|
65 |
@transcription_router.post("/uploadfile/")
|
|
|
103 |
@transcription_router.get("/tasks/{task_id}")
|
104 |
async def get_status(task_id):
|
105 |
task_result = AsyncResult(task_id)
|
106 |
+
|
107 |
entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
|
108 |
if task_result.status == "SUCCESS":
|
109 |
+
trans = TranscriptionResult(**task_result.result)
|
110 |
+
trans
|
111 |
+
await entry.update(**trans.dict(exclude={"transcript"}, content=trans.content))
|
112 |
else:
|
113 |
+
_trans = TranscriptionMetadata(**task_result.result)
|
114 |
+
await entry.update(**_trans.dict(exclude={"logs", "transcription"}))
|
115 |
+
|
116 |
result = {
|
117 |
"task_id": task_id,
|
118 |
"task_status": task_result.status,
|
App/Transcription/Utils/audio_transcription.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
from faster_whisper import WhisperModel
|
|
|
2 |
from tqdm import tqdm
|
3 |
import os, time
|
4 |
-
from App.Transcription.Schemas import TranscriptionMetadata
|
5 |
|
6 |
current_time = time.localtime()
|
7 |
model_size = "tiny"
|
@@ -11,6 +12,7 @@ def transcribe_file(state, file_path, model_size="tiny"):
|
|
11 |
result = []
|
12 |
metadata = TranscriptionMetadata()
|
13 |
metadata.logs = "Transcription STARTING"
|
|
|
14 |
state.update_state(
|
15 |
state="PROGRESS",
|
16 |
meta=metadata.dict(),
|
@@ -43,8 +45,8 @@ def transcribe_file(state, file_path, model_size="tiny"):
|
|
43 |
"text": word.word,
|
44 |
}
|
45 |
result.append(temp)
|
46 |
-
|
47 |
-
if
|
48 |
metadata.logs = "Transcribing.."
|
49 |
metadata.percentage = f"{((word.end / total_duration)*100)}"
|
50 |
state.update_state(state="PROGRESS", meta=metadata.dict())
|
@@ -61,4 +63,6 @@ def transcribe_file(state, file_path, model_size="tiny"):
|
|
61 |
# delete file
|
62 |
os.remove(file_path)
|
63 |
|
64 |
-
|
|
|
|
|
|
1 |
from faster_whisper import WhisperModel
|
2 |
+
from datetime import datetime, timedelta
|
3 |
from tqdm import tqdm
|
4 |
import os, time
|
5 |
+
from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult
|
6 |
|
7 |
current_time = time.localtime()
|
8 |
model_size = "tiny"
|
|
|
12 |
result = []
|
13 |
metadata = TranscriptionMetadata()
|
14 |
metadata.logs = "Transcription STARTING"
|
15 |
+
start_time = datetime.now()
|
16 |
state.update_state(
|
17 |
state="PROGRESS",
|
18 |
meta=metadata.dict(),
|
|
|
45 |
"text": word.word,
|
46 |
}
|
47 |
result.append(temp)
|
48 |
+
time_difference = datetime.now() - start_time
|
49 |
+
if time_difference >= timedelta(seconds=30):
|
50 |
metadata.logs = "Transcribing.."
|
51 |
metadata.percentage = f"{((word.end / total_duration)*100)}"
|
52 |
state.update_state(state="PROGRESS", meta=metadata.dict())
|
|
|
63 |
# delete file
|
64 |
os.remove(file_path)
|
65 |
|
66 |
+
transcription_result = TranscriptionResult(**metadata.dict())
|
67 |
+
transcription_result.transcript = result
|
68 |
+
return transcription_result.dict()
|
App/app.py
CHANGED
@@ -25,14 +25,6 @@ logging.basicConfig(
|
|
25 |
)
|
26 |
|
27 |
|
28 |
-
async def create_async_model(model):
|
29 |
-
import asyncio
|
30 |
-
|
31 |
-
# until something better comes along
|
32 |
-
url = model._get_database_url()
|
33 |
-
await asyncio.gather(asyncio.create_task(model._create_all(url)))
|
34 |
-
|
35 |
-
|
36 |
app = FastAPI()
|
37 |
origins = ["*"]
|
38 |
|
|
|
25 |
)
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
app = FastAPI()
|
29 |
origins = ["*"]
|
30 |
|