File size: 2,035 Bytes
d7d0d8e
252d749
4fceeff
88c5489
252d749
385a3d3
 
252d749
d7d0d8e
252d749
 
 
647122b
 
9d130ca
647122b
252d749
d7d0d8e
 
88c5489
d7d0d8e
 
 
 
4fceeff
 
 
 
 
 
 
 
 
136640b
4fceeff
 
 
d7d0d8e
385a3d3
78f1df3
7a1123a
 
d7d0d8e
385a3d3
 
 
af60251
385a3d3
7306982
dd2695b
af5c58a
7306982
 
385a3d3
dd2695b
7a1123a
88c5489
d7d0d8e
dd2695b
 
d7d0d8e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from celery import Celery, chain
import os
import time,subprocess
import cgi
from App import celery_config
import yt_dlp
import tempfile
from App.Transcription.Utils.audio_transcription import transcribe_file
from App.Embedding.utils.Initialize import encode, generateChunks

celery = Celery()
celery.config_from_object(celery_config)
celery.conf.update(
    # Other Celery configuration settings
    CELERYD_LOG_LEVEL='INFO',  # Set log level to DEBUG for the worker
)

@celery.task(name="embbeding", bind=True)
def generate_store(self, data, task_id):
    print('generating chunks')
    chunks = generateChunks(data, task_id)
    encode(chunks)
    print("hellooo")

def download_with_wget(link, download_dir, filename):
    subprocess.run(["aria2c", link, "-d", download_dir, "-o", filename])

@celery.task(name="download", bind=True)
def downloadUrl(self, link, download_dir, filename, model_size="base"):
    file_path=os.path.join(download_dir,filename)
    download_with_wget(link=link,download_dir=download_dir,filename=filename)

    data = transcribe_file(state=self, file_path=file_path, model_size=model_size)
    # generate_store.delay(data["content"], self.request.id)
    return data



@celery.task(name="transcription", bind=True)
def transcription_task(self, file_path, model_size="tiny"):
    data = transcribe_file(state=self, file_path=file_path, model_size=model_size)
    generate_store.delay(data["content"], self.request.id)
    return data


@celery.task(name="download", bind=True)
def downloadfile(self, url, ydl_opts, model_size="base"):
    self.update_state(state="Downloading File..", meta={})

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

    # updated
    self.update_state(state="Downloading complete", meta={})
    audio_file = ydl_opts["outtmpl"]
    print(model_size, "worker after")

    data = transcribe_file(
        state=self, file_path=audio_file["default"], model_size=model_size
    )
    generate_store.delay(data["content"], self.request.id)
    return data