Mbonea commited on
Commit
b8952b7
1 Parent(s): 9ad8e24
App/Transcription/Model.py CHANGED
@@ -10,7 +10,13 @@ class Transcriptions(orm.Model):
10
  fields = {
11
  "id": orm.Integer(primary_key=True),
12
  "task_id": orm.String(max_length=100, index=True, default=""),
13
- "tl_file_id": orm.String(max_length=100, index=True, default=""),
 
 
 
 
 
 
14
  "user": orm.ForeignKey(User, on_delete=orm.CASCADE),
15
  "createdAt": orm.DateTime(index=True, default=datetime.datetime.now),
16
  "content": orm.JSON(default={}),
 
10
  fields = {
11
  "id": orm.Integer(primary_key=True),
12
  "task_id": orm.String(max_length=100, index=True, default=""),
13
+ "file_name": orm.String(max_length=100, index=True, default=""),
14
+ "language": orm.String(max_length=100, index=True, default="-"),
15
+ "youtubeLink": orm.String(max_length=100, index=True, allow_null=True),
16
+ "tl_file_id": orm.String(
17
+ max_length=100, index=True, default="", allow_null=True
18
+ ),
19
+ "duration": orm.Integer(index=True, default=0),
20
  "user": orm.ForeignKey(User, on_delete=orm.CASCADE),
21
  "createdAt": orm.DateTime(index=True, default=datetime.datetime.now),
22
  "content": orm.JSON(default={}),
App/Transcription/Schemas.py CHANGED
@@ -1,14 +1,27 @@
1
- from pydantic import BaseModel
 
2
 
3
 
4
  class TranscriptionMetadata(BaseModel):
5
  duration: int = 0
6
- language: str = "0"
7
- logs: str = 0
8
- percentage: str = "0"
9
  transcription: dict = {}
10
  state: str = "PENDING"
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class UserDetails(BaseModel):
14
  userId: str
 
1
+ from pydantic import BaseModel, Field
2
+ from datetime import datetime
3
 
4
 
5
  class TranscriptionMetadata(BaseModel):
6
  duration: int = 0
7
+ language: str = "-"
8
+ logs: str = ""
9
+ percentage: str = "-"
10
  transcription: dict = {}
11
  state: str = "PENDING"
12
 
13
 
14
+ class TranscriptionResult(BaseModel):
15
+ created_at: datetime = Field(default_factory=datetime.utcnow)
16
+ duration: int = 0
17
+ language: str = "-"
18
+ transcription_state: str = "SUCCESS"
19
+ transcript: list = []
20
+
21
+ @property
22
+ def content(self):
23
+ return self.transcript
24
+
25
+
26
  class UserDetails(BaseModel):
27
  userId: str
App/Transcription/TranscriptionRoutes.py CHANGED
@@ -1,6 +1,6 @@
1
  from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
2
  from typing_extensions import Annotated
3
- from .Schemas import UserDetails
4
  from App import bot
5
  import aiofiles, os, re
6
  import tempfile
@@ -54,10 +54,12 @@ async def download_audio(
54
  }
55
 
56
  task = downloadfile.delay(url, ydl_opts, model)
 
57
  transcription_enrty = await Transcriptions.objects.create(
58
- task_id=task.id, user=user
59
  )
60
- return {"task_id": task.id, "file_name": filename}
 
61
 
62
 
63
  @transcription_router.post("/uploadfile/")
@@ -101,13 +103,16 @@ async def create_file(
101
  @transcription_router.get("/tasks/{task_id}")
102
  async def get_status(task_id):
103
  task_result = AsyncResult(task_id)
 
104
  entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
105
  if task_result.status == "SUCCESS":
106
- await entry.update(
107
- content=task_result.result, transcription_state=task_result.status
108
- )
109
  else:
110
- await entry.update(transcription_state=task_result.status)
 
 
111
  result = {
112
  "task_id": task_id,
113
  "task_status": task_result.status,
 
1
  from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
2
  from typing_extensions import Annotated
3
+ from .Schemas import UserDetails, TranscriptionMetadata, TranscriptionResult
4
  from App import bot
5
  import aiofiles, os, re
6
  import tempfile
 
54
  }
55
 
56
  task = downloadfile.delay(url, ydl_opts, model)
57
+ response = {"task_id": task.id, "file_name": f"{video_title}.mp3"}
58
  transcription_enrty = await Transcriptions.objects.create(
59
+ user=user, youtubeLink=url, **response
60
  )
61
+
62
+ return response
63
 
64
 
65
  @transcription_router.post("/uploadfile/")
 
103
  @transcription_router.get("/tasks/{task_id}")
104
  async def get_status(task_id):
105
  task_result = AsyncResult(task_id)
106
+
107
  entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
108
  if task_result.status == "SUCCESS":
109
+ trans = TranscriptionResult(**task_result.result)
110
+ trans
111
+ await entry.update(**trans.dict(exclude={"transcript"}, content=trans.content))
112
  else:
113
+ _trans = TranscriptionMetadata(**task_result.result)
114
+ await entry.update(**_trans.dict(exclude={"logs", "transcription"}))
115
+
116
  result = {
117
  "task_id": task_id,
118
  "task_status": task_result.status,
App/Transcription/Utils/audio_transcription.py CHANGED
@@ -1,7 +1,8 @@
1
  from faster_whisper import WhisperModel
 
2
  from tqdm import tqdm
3
  import os, time
4
- from App.Transcription.Schemas import TranscriptionMetadata
5
 
6
  current_time = time.localtime()
7
  model_size = "tiny"
@@ -11,6 +12,7 @@ def transcribe_file(state, file_path, model_size="tiny"):
11
  result = []
12
  metadata = TranscriptionMetadata()
13
  metadata.logs = "Transcription STARTING"
 
14
  state.update_state(
15
  state="PROGRESS",
16
  meta=metadata.dict(),
@@ -43,8 +45,8 @@ def transcribe_file(state, file_path, model_size="tiny"):
43
  "text": word.word,
44
  }
45
  result.append(temp)
46
-
47
- if current_time.tm_sec % 5 == 0:
48
  metadata.logs = "Transcribing.."
49
  metadata.percentage = f"{((word.end / total_duration)*100)}"
50
  state.update_state(state="PROGRESS", meta=metadata.dict())
@@ -61,4 +63,6 @@ def transcribe_file(state, file_path, model_size="tiny"):
61
  # delete file
62
  os.remove(file_path)
63
 
64
- return result
 
 
 
1
  from faster_whisper import WhisperModel
2
+ from datetime import datetime, timedelta
3
  from tqdm import tqdm
4
  import os, time
5
+ from App.Transcription.Schemas import TranscriptionMetadata, TranscriptionResult
6
 
7
  current_time = time.localtime()
8
  model_size = "tiny"
 
12
  result = []
13
  metadata = TranscriptionMetadata()
14
  metadata.logs = "Transcription STARTING"
15
+ start_time = datetime.now()
16
  state.update_state(
17
  state="PROGRESS",
18
  meta=metadata.dict(),
 
45
  "text": word.word,
46
  }
47
  result.append(temp)
48
+ time_difference = datetime.now() - start_time
49
+ if time_difference >= timedelta(seconds=30):
50
  metadata.logs = "Transcribing.."
51
  metadata.percentage = f"{((word.end / total_duration)*100)}"
52
  state.update_state(state="PROGRESS", meta=metadata.dict())
 
63
  # delete file
64
  os.remove(file_path)
65
 
66
+ transcription_result = TranscriptionResult(**metadata.dict())
67
+ transcription_result.transcript = result
68
+ return transcription_result.dict()
App/app.py CHANGED
@@ -25,14 +25,6 @@ logging.basicConfig(
25
  )
26
 
27
 
28
- async def create_async_model(model):
29
- import asyncio
30
-
31
- # until something better comes along
32
- url = model._get_database_url()
33
- await asyncio.gather(asyncio.create_task(model._create_all(url)))
34
-
35
-
36
  app = FastAPI()
37
  origins = ["*"]
38
 
 
25
  )
26
 
27
 
 
 
 
 
 
 
 
 
28
  app = FastAPI()
29
  origins = ["*"]
30