khulnasoft commited on
Commit
589c7b6
1 Parent(s): cb204bf

Create Malware-Prediction.py

Browse files
Files changed (1) hide show
  1. Malware-Prediction.py +22 -0
Malware-Prediction.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ frames = [train, test]
2
+ df = pd.concat(frames)
3
+
4
+ list_frequency_encoding = ['AppVersion',
5
+ 'AvSigVersion',
6
+ 'Census_OSVersion',
7
+ 'EngineVersion',
8
+ 'OsBuildLab']
9
+
10
+ def frequency_encoding(feature):
11
+ t = df[feature].value_counts().reset_index()
12
+ t = t.reset_index()
13
+ t.loc[t[feature] == 1, 'level_0'] = np.nan
14
+ t.set_index('index', inplace=True)
15
+ max_label = t['level_0'].max() + 1
16
+ t.fillna(max_label, inplace=True)
17
+ return t.to_dict()['level_0']
18
+
19
+ for feature in tqdm(list_frequency_encoding):
20
+ freq_enc_dict = frequency_encoding(feature)
21
+ df[feature] = df[feature].map(lambda x: freq_enc_dict.get(x, np.nan))
22
+ df[feature] = df[feature].astype('int64')