Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alec
commited on
Commit
•
8101eff
1
Parent(s):
284240f
update to gridsearch
Browse files- baseline.py +8 -5
baseline.py
CHANGED
@@ -5,8 +5,8 @@ import fire
|
|
5 |
import numpy as np
|
6 |
from scipy import sparse
|
7 |
|
8 |
-
from sklearn.model_selection import PredefinedSplit
|
9 |
-
from sklearn.linear_model import
|
10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
11 |
|
12 |
def _load_split(data_dir, source, split, n=np.inf):
|
@@ -35,10 +35,13 @@ def main(data_dir, log_dir, source='xl-1542M-k40', n_train=500000, n_valid=10000
|
|
35 |
valid_features = vect.transform(valid_texts)
|
36 |
test_features = vect.transform(test_texts)
|
37 |
|
38 |
-
|
|
|
39 |
split = PredefinedSplit([-1]*n_train+[0]*n_valid)
|
40 |
-
|
41 |
-
|
|
|
|
|
42 |
valid_accuracy = model.score(valid_features, valid_labels)*100.
|
43 |
test_accuracy = model.score(test_features, test_labels)*100.
|
44 |
data = {
|
|
|
5 |
import numpy as np
|
6 |
from scipy import sparse
|
7 |
|
8 |
+
from sklearn.model_selection import PredefinedSplit, GridSearchCV
|
9 |
+
from sklearn.linear_model import LogisticRegression
|
10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
11 |
|
12 |
def _load_split(data_dir, source, split, n=np.inf):
|
|
|
35 |
valid_features = vect.transform(valid_texts)
|
36 |
test_features = vect.transform(test_texts)
|
37 |
|
38 |
+
model = LogisticRegression(solver='liblinear')
|
39 |
+
params = {'C': [1/64, 1/32, 1/16, 1/8, 1/4, 1/2, 1, 2, 4, 8, 16, 32, 64]}
|
40 |
split = PredefinedSplit([-1]*n_train+[0]*n_valid)
|
41 |
+
search = GridSearchCV(model, params, cv=split, n_jobs=n_jobs, verbose=verbose, refit=False)
|
42 |
+
search.fit(sparse.vstack([train_features, valid_features]), train_labels+valid_labels)
|
43 |
+
model = model.set_params(**search.best_params_)
|
44 |
+
model.fit(train_features, train_labels)
|
45 |
valid_accuracy = model.score(valid_features, valid_labels)*100.
|
46 |
test_accuracy = model.score(test_features, test_labels)*100.
|
47 |
data = {
|