|
from celery import Celery, chain |
|
import os |
|
import time,subprocess |
|
import cgi |
|
from App import celery_config |
|
import yt_dlp |
|
import tempfile |
|
from App.Transcription.Utils.audio_transcription import transcribe_file |
|
from App.Embedding.utils.Initialize import encode, generateChunks |
|
|
|
celery = Celery() |
|
celery.config_from_object(celery_config) |
|
celery.conf.update( |
|
|
|
CELERYD_LOG_LEVEL='INFO', |
|
) |
|
|
|
@celery.task(name="embbeding", bind=True) |
|
def generate_store(self, data, task_id): |
|
print('generating chunks') |
|
chunks = generateChunks(data, task_id) |
|
encode(chunks) |
|
print("hellooo") |
|
|
|
def download_with_wget(link, download_dir, filename): |
|
subprocess.run(["aria2c", link, "-d", download_dir, "-o", filename]) |
|
|
|
@celery.task(name="download", bind=True) |
|
def downloadUrl(self, link, download_dir, filename, model_size="base"): |
|
file_path=os.path.join(download_dir,filename) |
|
download_with_wget(link=link,download_dir=download_dir,filename=filename) |
|
|
|
data = transcribe_file(state=self, file_path=file_path, model_size=model_size) |
|
|
|
return data |
|
|
|
|
|
|
|
@celery.task(name="transcription", bind=True) |
|
def transcription_task(self, file_path, model_size="tiny"): |
|
data = transcribe_file(state=self, file_path=file_path, model_size=model_size) |
|
generate_store.delay(data["content"], self.request.id) |
|
return data |
|
|
|
|
|
@celery.task(name="download", bind=True) |
|
def downloadfile(self, url, ydl_opts, model_size="base"): |
|
self.update_state(state="Downloading File..", meta={}) |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
ydl.download([url]) |
|
|
|
|
|
self.update_state(state="Downloading complete", meta={}) |
|
audio_file = ydl_opts["outtmpl"] |
|
print(model_size, "worker after") |
|
|
|
data = transcribe_file( |
|
state=self, file_path=audio_file["default"], model_size=model_size |
|
) |
|
generate_store.delay(data["content"], self.request.id) |
|
return data |
|
|