File size: 1,424 Bytes
a8b3f00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import datetime
import time

import click
from sqlalchemy import text
from werkzeug.exceptions import NotFound

import app
from configs import dify_config
from extensions.ext_database import db
from models.dataset import Embedding


@app.celery.task(queue="dataset")
def clean_embedding_cache_task():
    click.echo(click.style("Start clean embedding cache.", fg="green"))
    clean_days = int(dify_config.PLAN_SANDBOX_CLEAN_DAY_SETTING)
    start_at = time.perf_counter()
    thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
    while True:
        try:
            embedding_ids = (
                db.session.query(Embedding.id)
                .filter(Embedding.created_at < thirty_days_ago)
                .order_by(Embedding.created_at.desc())
                .limit(100)
                .all()
            )
            embedding_ids = [embedding_id[0] for embedding_id in embedding_ids]
        except NotFound:
            break
        if embedding_ids:
            for embedding_id in embedding_ids:
                db.session.execute(
                    text("DELETE FROM embeddings WHERE id = :embedding_id"), {"embedding_id": embedding_id}
                )

            db.session.commit()
        else:
            break
    end_at = time.perf_counter()
    click.echo(click.style("Cleaned embedding cache from db success latency: {}".format(end_at - start_at), fg="green"))