vam
commited on
Commit
•
9e7ea31
1
Parent(s):
2b7242f
Upload preprocess.py
Browse files- preprocess.py +22 -0
preprocess.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
df = pd.read_json("/kaggle/input/50memejson/data_set_50.json")
|
5 |
+
|
6 |
+
meme_attribute = {}
|
7 |
+
meme_filename = []
|
8 |
+
meme_list = []
|
9 |
+
|
10 |
+
for col in df.columns:
|
11 |
+
name = df.loc["filename", col]
|
12 |
+
name = name.replace(".jpg", "")
|
13 |
+
name = name.replace("High-Quality-", "")
|
14 |
+
attribute_tmp = df.loc["file_attributes", col]
|
15 |
+
if attribute_tmp == {}:
|
16 |
+
continue
|
17 |
+
attribute_str = attribute_tmp['image_label'].strip().strip('"')
|
18 |
+
cleaned_attribute = attribute_str.replace(',', '')
|
19 |
+
file_name = df.loc["filename", col]
|
20 |
+
meme_attribute[name] = cleaned_attribute
|
21 |
+
meme_filename.append(file_name)
|
22 |
+
meme_list.append(name)
|