vam commited on
Commit
9e7ea31
1 Parent(s): 2b7242f

Upload preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +22 -0
preprocess.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ df = pd.read_json("/kaggle/input/50memejson/data_set_50.json")
5
+
6
+ meme_attribute = {}
7
+ meme_filename = []
8
+ meme_list = []
9
+
10
+ for col in df.columns:
11
+ name = df.loc["filename", col]
12
+ name = name.replace(".jpg", "")
13
+ name = name.replace("High-Quality-", "")
14
+ attribute_tmp = df.loc["file_attributes", col]
15
+ if attribute_tmp == {}:
16
+ continue
17
+ attribute_str = attribute_tmp['image_label'].strip().strip('"')
18
+ cleaned_attribute = attribute_str.replace(',', '')
19
+ file_name = df.loc["filename", col]
20
+ meme_attribute[name] = cleaned_attribute
21
+ meme_filename.append(file_name)
22
+ meme_list.append(name)