|
from sklearn.metrics.pairwise import cosine_similarity |
|
import pandas as pd |
|
import numpy as np |
|
from vectorization import spotify_data |
|
import json |
|
import gradio as gr |
|
from gradio.components import Textbox |
|
from ast import literal_eval |
|
spotify_data_processed = pd.read_csv('dataset_modificado.csv') |
|
|
|
def convert_string_to_array(str_vector): |
|
|
|
if isinstance(str_vector, np.ndarray): |
|
return str_vector |
|
|
|
try: |
|
cleaned_str = str_vector.replace('[', '').replace(']', '').replace('\n', ' ').replace('\r', '').strip() |
|
vector_elements = [float(item) for item in cleaned_str.split()] |
|
return np.array(vector_elements) |
|
except ValueError as e: |
|
print("Error:", e) |
|
return np.zeros((100,)) |
|
|
|
|
|
spotify_data_processed['song_vector'] = spotify_data_processed['song_vector'].apply(convert_string_to_array) |
|
|
|
|
|
|
|
sample_data = spotify_data_processed['song_vector'].head() |
|
converted_vectors = sample_data.apply(convert_string_to_array) |
|
print(converted_vectors) |
|
|
|
|
|
|
|
def recommend_song(song_name, artist_name, spotify_data_processed, top_n=4): |
|
|
|
specific_song = spotify_data_processed[(spotify_data_processed['song'] == song_name) |
|
& (spotify_data_processed['artist'] == artist_name)] |
|
|
|
|
|
if specific_song.empty: |
|
return pd.DataFrame({"Error": ["Canción no encontrada en la base de datos."]}) |
|
|
|
|
|
|
|
song_vec = specific_song['song_vector'].iloc[0] |
|
|
|
|
|
if isinstance(song_vec, str): |
|
song_vec = convert_string_to_array(song_vec) |
|
|
|
all_song_vectors = np.array(spotify_data_processed['song_vector'].tolist()) |
|
|
|
|
|
similarities = cosine_similarity([song_vec], all_song_vectors)[0] |
|
|
|
|
|
top_indices = np.argsort(similarities)[::-1][1:top_n+1] |
|
|
|
|
|
recommended_songs = spotify_data_processed.iloc[top_indices][['song', 'artist']] |
|
return recommended_songs |
|
|
|
|
|
|
|
|
|
def recommend_song_interface(song_name, artist_name): |
|
recommendations_df = recommend_song(song_name, artist_name, spotify_data_processed) |
|
|
|
|
|
if isinstance(recommendations_df, pd.DataFrame) and not recommendations_df.empty and {'song', 'artist'}.issubset(recommendations_df.columns): |
|
recommendations_list = recommendations_df[['song', 'artist']].values.tolist() |
|
formatted_recommendations = ["{} by {}".format(song, artist) for song, artist in recommendations_list] |
|
|
|
while len(formatted_recommendations) < 4: |
|
formatted_recommendations.append("") |
|
return formatted_recommendations[:4] |
|
else: |
|
random_song = spotify_data_processed.sample() |
|
random_song_name = random_song['song'].iloc[0] |
|
random_artist_name = random_song['artist'].iloc[0] |
|
|
|
|
|
random_recommendations_df = recommend_song(random_song_name, random_artist_name, spotify_data_processed) |
|
random_recommendations_list = random_recommendations_df[['song', 'artist']].values.tolist() |
|
formatted_random_recommendations = ["{} by {}".format(song, artist) for song, artist in random_recommendations_list] |
|
|
|
|
|
while len(formatted_random_recommendations) < 4: |
|
formatted_random_recommendations.append("") |
|
return formatted_random_recommendations[:4] |
|
|
|
|
|
|
|
recommendations = recommend_song_interface("song_name", "artist_name") |