leadr64 commited on
Commit
ebec1bd
1 Parent(s): ee99c34
Files changed (5) hide show
  1. .env +4 -0
  2. .gitignore +2 -0
  3. app.py +66 -0
  4. requirements.txt +170 -0
  5. s3_utils.py +66 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ QDRANT_URL=https://006817a4-0b45-4db8-a4e5-1f916808e19b.us-east4-0.gcp.cloud.qdrant.io:6333
2
+ QDRANT_KEY=d9MUWQOMUA7JGBdpoFRtmou-h4Pf-e9uCrr0jMtUfAvvk4osqL_JtA
3
+ AWS_ACCESS_KEY_ID=AKIAWOUASMWP5DM6RZG2
4
+ AWS_SECRET_ACCESS_KEY=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
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Ignore le fichier .env
2
+ .env
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from qdrant_client import QdrantClient
4
+ from transformers import ClapModel, ClapProcessor
5
+ from dotenv import load_dotenv
6
+ import requests
7
+
8
+ # Charger les variables d'environnement à partir du fichier .env
9
+ load_dotenv()
10
+
11
+ # Récupérer les variables d'environnement
12
+ QDRANT_URL = os.getenv('QDRANT_URL')
13
+ QDRANT_KEY = os.getenv('QDRANT_KEY')
14
+
15
+ # Vérifier les valeurs récupérées
16
+ print(f"QDRANT_URL: {QDRANT_URL}")
17
+ print(f"QDRANT_KEY: {QDRANT_KEY}")
18
+
19
+ try:
20
+ # Tester la connexion à l'URL de Qdrant
21
+ response = requests.get(QDRANT_URL)
22
+ print(f"Test de la connexion à Qdrant: {response.status_code}")
23
+
24
+ # Vérifier que les variables sont correctement récupérées
25
+ if not QDRANT_URL or not QDRANT_KEY:
26
+ raise ValueError("Les variables d'environnement QDRANT_URL ou QDRANT_KEY ne sont pas définies")
27
+
28
+ # Connexion au client Qdrant
29
+ client = QdrantClient(QDRANT_URL, api_key=QDRANT_KEY)
30
+ print("[INFO] Client created...")
31
+
32
+ # Chargement du modèle
33
+ print("[INFO] Loading the model...")
34
+ model_name = "laion/larger_clap_general"
35
+ model = ClapModel.from_pretrained(model_name)
36
+ processor = ClapProcessor.from_pretrained(model_name)
37
+
38
+ # Interface Gradio
39
+ max_results = 10
40
+
41
+ def sound_search(query):
42
+ text_inputs = processor(text=query, return_tensors="pt")
43
+ text_embed = model.get_text_features(**text_inputs)[0]
44
+
45
+ hits = client.search(
46
+ collection_name="demo_spaces_db",
47
+ query_vector=text_embed,
48
+ limit=max_results,
49
+ )
50
+ return [
51
+ gr.Audio(
52
+ hit.payload['audio_path'],
53
+ label=f"style: {hit.payload['style']} -- score: {hit.score}")
54
+ for hit in hits
55
+ ]
56
+
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown("# Sound search database")
59
+ inp = gr.Textbox(placeholder="What sound are you looking for ?")
60
+ out = [gr.Audio(label=f"{x}") for x in range(max_results)]
61
+ inp.change(sound_search, inp, out)
62
+
63
+ demo.launch()
64
+
65
+ except Exception as e:
66
+ print(f"[ERROR] Failed to create Qdrant client: {e}")
requirements.txt ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aioboto3==12.3.0
2
+ aiobotocore==2.11.2
3
+ aiofiles==23.2.1
4
+ aiohttp==3.9.3
5
+ aioitertools==0.11.0
6
+ aiosignal==1.3.1
7
+ altair==5.3.0
8
+ annotated-types==0.6.0
9
+ antlr4-python3-runtime==4.9.3
10
+ anyio==4.3.0
11
+ appdirs==1.4.4
12
+ async-timeout==4.0.3
13
+ attrs==23.2.0
14
+ audioread==3.0.1
15
+ boto3==1.34.34
16
+ botocore==1.34.34
17
+ braceexpand==0.1.7
18
+ certifi==2024.2.2
19
+ cffi==1.16.0
20
+ charset-normalizer==3.3.2
21
+ click==8.1.7
22
+ cloudpickle==3.0.0
23
+ contourpy==1.2.1
24
+ cycler==0.12.1
25
+ datasets==2.19.1
26
+ decorator==5.1.1
27
+ demucs==4.0.1
28
+ dill==0.3.8
29
+ diskcache==5.6.3
30
+ docker-pycreds==0.4.0
31
+ dora_search==0.1.12
32
+ einops==0.7.0
33
+ essentia==2.1b6.dev1110
34
+ exceptiongroup==1.2.0
35
+ fastapi==0.110.1
36
+ ffmpy==0.3.2
37
+ filelock==3.13.3
38
+ filetype==1.2.0
39
+ fonttools==4.51.0
40
+ frozenlist==1.4.1
41
+ fsspec==2024.3.1
42
+ ftfy==6.2.0
43
+ gitdb==4.0.11
44
+ GitPython==3.1.43
45
+ gradio==4.26.0
46
+ gradio_client==0.15.1
47
+ grpcio==1.62.1
48
+ grpcio-tools==1.62.1
49
+ h11==0.14.0
50
+ h2==4.1.0
51
+ h5py==3.10.0
52
+ hpack==4.0.0
53
+ httpcore==1.0.5
54
+ httpx==0.27.0
55
+ huggingface-hub==0.22.2
56
+ hyperframe==6.0.1
57
+ idna==3.6
58
+ importlib_resources==6.4.0
59
+ Jinja2==3.1.3
60
+ jmespath==1.0.1
61
+ joblib==1.3.2
62
+ jsonschema==4.21.1
63
+ jsonschema-specifications==2023.12.1
64
+ julius==0.2.7
65
+ kiwisolver==1.4.5
66
+ laion-clap==1.1.4
67
+ lameenc==1.7.0
68
+ lazy_loader==0.4
69
+ librosa==0.10.1
70
+ llvmlite==0.42.0
71
+ markdown-it-py==3.0.0
72
+ MarkupSafe==2.1.5
73
+ matplotlib==3.8.4
74
+ mdurl==0.1.2
75
+ miditoolkit==1.0.1
76
+ mido==1.3.2
77
+ mpmath==1.3.0
78
+ msgpack==1.0.8
79
+ multidict==6.0.5
80
+ multiprocess==0.70.16
81
+ networkx==3.3
82
+ numba==0.59.1
83
+ numpy==1.23.5
84
+ nvidia-cublas-cu12==12.1.3.1
85
+ nvidia-cuda-cupti-cu12==12.1.105
86
+ nvidia-cuda-nvrtc-cu12==12.1.105
87
+ nvidia-cuda-runtime-cu12==12.1.105
88
+ nvidia-cudnn-cu12==8.9.2.26
89
+ nvidia-cufft-cu12==11.0.2.54
90
+ nvidia-curand-cu12==10.3.2.106
91
+ nvidia-cusolver-cu12==11.4.5.107
92
+ nvidia-cusparse-cu12==12.1.0.106
93
+ nvidia-nccl-cu12==2.19.3
94
+ nvidia-nvjitlink-cu12==12.4.127
95
+ nvidia-nvtx-cu12==12.1.105
96
+ omegaconf==2.3.0
97
+ openunmix==1.2.1
98
+ orjson==3.10.0
99
+ packaging==23.2
100
+ pandas==2.2.1
101
+ pillow==10.3.0
102
+ platformdirs==4.2.0
103
+ pooch==1.8.1
104
+ portalocker==2.8.2
105
+ progressbar==2.5
106
+ protobuf==4.25.3
107
+ psutil==5.9.8
108
+ pyarrow==16.1.0
109
+ pyarrow-hotfix==0.6
110
+ pycparser==2.22
111
+ pydantic==2.6.4
112
+ pydantic_core==2.16.3
113
+ pydub==0.25.1
114
+ Pygments==2.17.2
115
+ pyparsing==3.1.2
116
+ python-dateutil==2.9.0.post0
117
+ python-dotenv==1.0.1
118
+ python-multipart==0.0.9
119
+ pytz==2024.1
120
+ PyYAML==6.0.1
121
+ qdrant-client==1.8.2
122
+ referencing==0.34.0
123
+ regex==2023.12.25
124
+ requests==2.31.0
125
+ retrying==1.3.4
126
+ rich==13.7.1
127
+ rpds-py==0.18.0
128
+ ruff==0.3.5
129
+ s3transfer==0.10.1
130
+ safetensors==0.4.2
131
+ scikit-learn==1.4.1.post1
132
+ scipy==1.13.0
133
+ semantic-version==2.10.0
134
+ sentry-sdk==1.44.1
135
+ setproctitle==1.3.3
136
+ sf_segmenter==0.0.2
137
+ shellingham==1.5.4
138
+ six==1.16.0
139
+ smmap==5.0.1
140
+ sniffio==1.3.1
141
+ soundfile==0.12.1
142
+ soxr==0.3.7
143
+ starlette==0.37.2
144
+ submitit==1.5.1
145
+ sympy==1.12
146
+ threadpoolctl==3.4.0
147
+ tokenizers==0.13.3
148
+ tomlkit==0.12.0
149
+ toolz==0.12.1
150
+ torch==2.2.2
151
+ torchaudio==2.2.2
152
+ torchlibrosa==0.1.0
153
+ torchvision==0.17.2
154
+ tqdm==4.66.2
155
+ transformers==4.30.0
156
+ treetable==0.2.5
157
+ triton==2.2.0
158
+ typer==0.12.2
159
+ typing_extensions==4.11.0
160
+ tzdata==2024.1
161
+ urllib3==2.0.7
162
+ uvicorn==0.29.0
163
+ wandb==0.16.6
164
+ wcwidth==0.2.13
165
+ webdataset==0.2.86
166
+ websockets==11.0.3
167
+ wget==3.2
168
+ wrapt==1.16.0
169
+ xxhash==3.4.1
170
+ yarl==1.9.4
s3_utils.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import hashlib
2
+ from enum import Enum
3
+
4
+ import boto3
5
+ from botocore.client import BaseClient
6
+
7
+
8
+ # S3 HANDLING ######################################################################################
9
+ def get_md5(fpath):
10
+ with open(fpath, "rb") as f:
11
+ file_hash = hashlib.md5()
12
+ while chunk := f.read(8192):
13
+ file_hash.update(chunk)
14
+ return file_hash.hexdigest()
15
+
16
+
17
+ def upload_file_to_bucket(s3_client, file_obj, bucket, s3key):
18
+ """Upload a file to an S3 bucket
19
+ :param file_obj: File to upload
20
+ :param bucket: Bucket to upload to
21
+ :param s3key: s3key
22
+ :param object_name: S3 object name. If not specified then file_name is used
23
+ :return: True if file was uploaded, else False
24
+ """
25
+ # Upload the file
26
+ return s3_client.upload_fileobj(
27
+ file_obj, bucket, s3key,
28
+ ExtraArgs={"ACL": "public-read", "ContentType": "Content-Type: audio/mpeg"}
29
+ )
30
+
31
+
32
+ def s3_auth(aws_access_key_id, aws_secret_access_key, region_name) -> BaseClient:
33
+ s3 = boto3.client(
34
+ service_name='s3',
35
+ aws_access_key_id=aws_access_key_id,
36
+ aws_secret_access_key=aws_secret_access_key,
37
+ region_name=region_name
38
+ )
39
+ return s3
40
+
41
+
42
+ def get_list_of_buckets(s3: BaseClient):
43
+ response = s3.list_buckets()
44
+ buckets = {}
45
+
46
+ for buckets in response['Buckets']:
47
+ buckets[response['Name']] = response['Name']
48
+
49
+ BucketName = Enum('BucketName', buckets)
50
+ return BucketName
51
+
52
+
53
+ if __name__ == '__main__':
54
+ import os
55
+
56
+ AWS_ACCESS_KEY_ID = os.environ['AWS_ACCESS_KEY_ID']
57
+ AWS_SECRET_ACCESS_KEY = os.environ['AWS_SECRET_ACCESS_KEY']
58
+ S3_BUCKET = "synthia-research"
59
+ S3_FOLDER = "huggingface_spaces_demo"
60
+ AWS_REGION = "eu-west-3"
61
+
62
+ s3 = s3_auth(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
63
+ print(s3.list_buckets())
64
+
65
+ s3key = f'{S3_FOLDER}/015.WAV'
66
+ #print(upload_file_to_bucket(s3, file_obj, S3_BUCKET, s3key))