khulnasoft
commited on
Commit
•
589c7b6
1
Parent(s):
cb204bf
Create Malware-Prediction.py
Browse files- Malware-Prediction.py +22 -0
Malware-Prediction.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
frames = [train, test]
|
2 |
+
df = pd.concat(frames)
|
3 |
+
|
4 |
+
list_frequency_encoding = ['AppVersion',
|
5 |
+
'AvSigVersion',
|
6 |
+
'Census_OSVersion',
|
7 |
+
'EngineVersion',
|
8 |
+
'OsBuildLab']
|
9 |
+
|
10 |
+
def frequency_encoding(feature):
|
11 |
+
t = df[feature].value_counts().reset_index()
|
12 |
+
t = t.reset_index()
|
13 |
+
t.loc[t[feature] == 1, 'level_0'] = np.nan
|
14 |
+
t.set_index('index', inplace=True)
|
15 |
+
max_label = t['level_0'].max() + 1
|
16 |
+
t.fillna(max_label, inplace=True)
|
17 |
+
return t.to_dict()['level_0']
|
18 |
+
|
19 |
+
for feature in tqdm(list_frequency_encoding):
|
20 |
+
freq_enc_dict = frequency_encoding(feature)
|
21 |
+
df[feature] = df[feature].map(lambda x: freq_enc_dict.get(x, np.nan))
|
22 |
+
df[feature] = df[feature].astype('int64')
|