|
import numpy as np
|
|
import pandas as pd
|
|
|
|
df = pd.read_json("/kaggle/input/50memejson/data_set_50.json")
|
|
|
|
meme_attribute = {}
|
|
meme_filename = []
|
|
meme_list = []
|
|
|
|
for col in df.columns:
|
|
name = df.loc["filename", col]
|
|
name = name.replace(".jpg", "")
|
|
name = name.replace("High-Quality-", "")
|
|
attribute_tmp = df.loc["file_attributes", col]
|
|
if attribute_tmp == {}:
|
|
continue
|
|
attribute_str = attribute_tmp['image_label'].strip().strip('"')
|
|
cleaned_attribute = attribute_str.replace(',', '')
|
|
file_name = df.loc["filename", col]
|
|
meme_attribute[name] = cleaned_attribute
|
|
meme_filename.append(file_name)
|
|
meme_list.append(name) |