memegen / preprocess.py
vam
Update preprocess.py
c9ef990 verified
raw
history blame
639 Bytes
import numpy as np
import pandas as pd
df = pd.read_json("data_set_50.json")
meme_attribute = {}
meme_filename = []
meme_list = []
for col in df.columns:
name = df.loc["filename", col]
name = name.replace(".jpg", "")
name = name.replace("High-Quality-", "")
attribute_tmp = df.loc["file_attributes", col]
if attribute_tmp == {}:
continue
attribute_str = attribute_tmp['image_label'].strip().strip('"')
cleaned_attribute = attribute_str.replace(',', '')
file_name = df.loc["filename", col]
meme_attribute[name] = cleaned_attribute
meme_filename.append(file_name)
meme_list.append(name)