Diego-0121
commited on
Commit
•
2d39e80
1
Parent(s):
5178166
Create vectorial_representation.py
Browse files- vectorial_representation.py +19 -0
vectorial_representation.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from vectorization import model, spotify_data
|
3 |
+
|
4 |
+
# Función para convertir una canción en un vector promedio de sus palabras
|
5 |
+
def song_vector(tokens, model):
|
6 |
+
# Filtrar palabras que están en el modelo
|
7 |
+
tokens = [word for word in tokens if word in model.wv.key_to_index]
|
8 |
+
|
9 |
+
if len(tokens) == 0:
|
10 |
+
return np.zeros(model.vector_size)
|
11 |
+
|
12 |
+
# Calcular el promedio de los vectores de las palabras
|
13 |
+
song_vec = np.mean([model.wv[word] for word in tokens], axis=0)
|
14 |
+
return song_vec
|
15 |
+
|
16 |
+
# Aplicar esta función a cada canción en tu dataset
|
17 |
+
spotify_data['song_vector'] = spotify_data['cleaned_text'].apply(lambda x: song_vector(x, model))
|
18 |
+
spotify_data.to_csv('dataset_modificado.csv', index=False)
|
19 |
+
|