Code snippets for page Node ListΒΆ

Download node_list.py. Browse the code snippet index.

# -*- coding: utf-8 -*-
# Generated by codesnippet sphinx extension on 2019-02-02

import mdp
import numpy as np
np.random.seed(0)
from sklearn.preprocessing import MinMaxScaler
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler()
print(scaler.fit(data))
# Expected:
## MinMaxScaler(copy=True, feature_range=(0, 1))
print(scaler.data_max_)
# Expected:
## [  1.  18.]
print(scaler.transform(data))
# Expected:
## [[ 0.    0.  ]
##  [ 0.25  0.25]
##  [ 0.5   0.5 ]
##  [ 1.    1.  ]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[ 1.5  0. ]]

from sklearn.preprocessing import StandardScaler
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
print(scaler.fit(data))
# Expected:
## StandardScaler(copy=True, with_mean=True, with_std=True)
print(scaler.mean_)
# Expected:
## [ 0.5  0.5]
print(scaler.transform(data))
# Expected:
## [[-1. -1.]
##  [-1. -1.]
##  [ 1.  1.]
##  [ 1.  1.]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[ 3.  3.]]

X = np.arange(6).reshape(3, 2)
X
# Expected:
## array([[0, 1],
##        [2, 3],
##        [4, 5]])
poly = PolynomialFeatures(2)
poly.fit_transform(X)
# Expected:
## array([[  1.,   0.,   1.,   0.,   0.,   1.],
##        [  1.,   2.,   3.,   4.,   6.,   9.],
##        [  1.,   4.,   5.,  16.,  20.,  25.]])
poly = PolynomialFeatures(interaction_only=True)
poly.fit_transform(X)
# Expected:
## array([[  1.,   0.,   1.,   0.],
##        [  1.,   2.,   3.,   6.],
##        [  1.,   4.,   5.,  20.]])

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])
# Expected:
## OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
##        handle_unknown='error', n_values='auto', sparse=True)
enc.n_values_
# Expected:
## array([2, 3, 4])
enc.feature_indices_
# Expected:
## array([0, 2, 5, 9])
enc.transform([[0, 1, 1]]).toarray()
# Expected:
## array([[ 1.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  0.]])

import numpy as np
from sklearn.preprocessing import QuantileTransformer
rng = np.random.RandomState(0)
X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
qt = QuantileTransformer(n_quantiles=10, random_state=0)
qt.fit_transform(X)
# Expected:
## array([...])

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit([1, 2, 2, 6])
# Expected:
## LabelEncoder()
le.classes_
# Expected:
## array([1, 2, 6])
le.transform([1, 1, 2, 6])
# Expected:
## array([0, 0, 1, 2]...)
le.inverse_transform([0, 0, 1, 2])
# Expected:
## array([1, 1, 2, 6])

le = preprocessing.LabelEncoder()
le.fit(["paris", "paris", "tokyo", "amsterdam"])
# Expected:
## LabelEncoder()
list(le.classes_)
# Expected:
## ['amsterdam', 'paris', 'tokyo']
le.transform(["tokyo", "tokyo", "paris"])
# Expected:
## array([2, 2, 1]...)
list(le.inverse_transform([2, 2, 1]))
# Expected:
## ['tokyo', 'tokyo', 'paris']

from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit([1, 2, 6, 4, 2])
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([1, 2, 4, 6])
lb.transform([1, 6])
# Expected:
## array([[1, 0, 0, 0],
##        [0, 0, 0, 1]])

lb = preprocessing.LabelBinarizer()
lb.fit_transform(['yes', 'no', 'no', 'yes'])
# Expected:
## array([[1],
##        [0],
##        [0],
##        [1]])

import numpy as np
lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([0, 1, 2])
lb.transform([0, 1, 2, 1])
# Expected:
## array([[1, 0, 0],
##        [0, 1, 0],
##        [0, 0, 1],
##        [0, 1, 0]])

from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform([(1, 2), (3,)])
# Expected:
## array([[1, 1, 0],
##        [0, 0, 1]])
mlb.classes_
# Expected:
## array([1, 2, 3])

mlb.fit_transform([set(['sci-fi', 'thriller']), set(['comedy'])])
# Expected:
## array([[0, 1, 1],
##        [1, 0, 0]])
list(mlb.classes_)
# Expected:
## ['comedy', 'sci-fi', 'thriller']

from sklearn import svm, grid_search, datasets
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svr = svm.SVC()
clf = grid_search.GridSearchCV(svr, parameters)
clf.fit(iris.data, iris.target)

# Expected:
## GridSearchCV(cv=None, error_score=...,
##        estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
##                      decision_function_shape='ovr', degree=..., gamma=...,
##                      kernel='rbf', max_iter=-1, probability=False,
##                      random_state=None, shrinking=True, tol=...,
##                      verbose=False),
##        fit_params={}, iid=..., n_jobs=1,
##        param_grid=..., pre_dispatch=..., refit=...,
##        scoring=..., verbose=...)

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
# Expected:
## KNeighborsClassifier(...)
print(neigh.predict([[1.1]]))
# Expected:
## [0]
print(neigh.predict_proba([[0.9]]))
# Expected:
## [[ 0.66666667  0.33333333]]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsClassifier(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
# Expected:
## KNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [ 0.5]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsRegressor
neigh = RadiusNeighborsRegressor(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [ 0.5]

from sklearn.neighbors.nearest_centroid import NearestCentroid
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = NearestCentroid()
clf.fit(X, y)
# Expected:
## NearestCentroid(metric='euclidean', shrink_threshold=None)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True,
##         False, False, False, False, False], dtype=bool)
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFECV(estimator, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True,
##         False, False, False, False, False], dtype=bool)
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, tol=0.001, verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([ 1.])

from sklearn import linear_model
clf = linear_model.ARDRegression()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001,
##         verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([ 1.])

from sklearn import linear_model
reg = linear_model.Lars(n_nonzero_coefs=1)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## Lars(copy_X=True, eps=..., fit_intercept=True, fit_path=True,
##    n_nonzero_coefs=1, normalize=True, positive=False, precompute='auto',
##    verbose=False)
print(reg.coef_)
# Expected:
## [ 0. -1.11...]

from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])

# Expected:
## LassoLars(alpha=0.01, copy_X=True, eps=..., fit_intercept=True,
##      fit_path=True, max_iter=500, normalize=True, positive=False,
##      precompute='auto', verbose=False)
print(reg.coef_)
# Expected:
## [ 0.         -0.963257...]

from sklearn import linear_model
reg = linear_model.LassoLarsIC(criterion='bic')
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## LassoLarsIC(copy_X=True, criterion='bic', eps=..., fit_intercept=True,
##       max_iter=500, normalize=True, positive=False, precompute='auto',
##       verbose=False)
print(reg.coef_)
# Expected:
## [ 0.  -1.11...]

from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNet(random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
##       max_iter=1000, normalize=False, positive=False, precompute=False,
##       random_state=0, selection='cyclic', tol=0.0001, warm_start=False)
print(regr.coef_)
# Expected:
## [ 18.83816048  64.55968825]
print(regr.intercept_)
# Expected:
## 1.45126075617
print(regr.predict([[0, 0]]))
# Expected:
## [ 1.45126076]

from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##    normalize=False, positive=False, precompute=False, random_state=None,
##    selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [ 0.85  0.  ]
print(clf.intercept_)
# Expected:
## 0.15

from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNetCV(cv=5, random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True,
##        l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=1,
##        normalize=False, positive=False, precompute='auto', random_state=0,
##        selection='cyclic', tol=0.0001, verbose=0)
print(regr.alpha_)
# Expected:
## 0.19947279427
print(regr.intercept_)
# Expected:
## 0.398882965428
print(regr.predict([[0, 0]]))
# Expected:
## [ 0.39888297]

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNet(alpha=0.1, copy_X=True, fit_intercept=True,
##         l1_ratio=0.5, max_iter=1000, normalize=False, random_state=None,
##         selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [[ 0.45663524  0.45612256]
##  [ 0.45663524  0.45612256]]
print(clf.intercept_)
# Expected:
## [ 0.0872422  0.0872422]

from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskLasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##         normalize=False, random_state=None, selection='cyclic', tol=0.0001,
##         warm_start=False)
print(clf.coef_)
# Expected:
## [[ 0.89393398  0.        ]
##  [ 0.89393398  0.        ]]
print(clf.intercept_)
# Expected:
## [ 0.10606602  0.10606602]

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNetCV()
clf.fit([[0,0], [1, 1], [2, 2]],
        [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNetCV(alphas=None, copy_X=True, cv=None, eps=0.001,
##        fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
##        n_jobs=1, normalize=False, random_state=None, selection='cyclic',
##        tol=0.0001, verbose=0)
print(clf.coef_)
# Expected:
## [[ 0.52875032  0.46958558]
##  [ 0.52875032  0.46958558]]
print(clf.intercept_)
# Expected:
## [ 0.00166409  0.00166409]

import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier()
clf.fit(X, Y)

# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
##        eta0=0.0, fit_intercept=True, l1_ratio=0.15,
##        learning_rate='optimal', loss='hinge', max_iter=None, n_iter=None,
##        n_jobs=1, penalty='l2', power_t=0.5, random_state=None,
##        shuffle=True, tol=None, verbose=0, warm_start=False)

print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
from sklearn import linear_model
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = linear_model.SGDRegressor()
clf.fit(X, y)

# Expected:
## SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
##        fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
##        loss='squared_loss', max_iter=None, n_iter=None, penalty='l2',
##        power_t=0.25, random_state=None, shuffle=True, tol=None,
##        verbose=0, warm_start=False)

from sklearn.linear_model import Ridge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y)
# Expected:
## Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
##       normalize=False, random_state=None, solver='auto', tol=0.001)

from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = PassiveAggressiveClassifier(random_state=0)
clf.fit(X, y)
# Expected:
## PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
##               fit_intercept=True, loss='hinge', max_iter=None, n_iter=None,
##               n_jobs=1, random_state=0, shuffle=True, tol=None, verbose=0,
##               warm_start=False)
print(clf.coef_)
# Expected:
## [[ 0.49324685  1.0552176   1.49519589  1.33798314]]
print(clf.intercept_)
# Expected:
## [ 2.18438388]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = PassiveAggressiveRegressor(random_state=0)
regr.fit(X, y)
# Expected:
## PassiveAggressiveRegressor(C=1.0, average=False, epsilon=0.1,
##               fit_intercept=True, loss='epsilon_insensitive',
##               max_iter=None, n_iter=None, random_state=0, shuffle=True,
##               tol=None, verbose=0, warm_start=False)
print(regr.coef_)
# Expected:
## [ 20.48736655  34.18818427  67.59122734  87.94731329]
print(regr.intercept_)
# Expected:
## [-0.02306214]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-0.02306214]

from sklearn.linear_model import RandomizedLasso
randomized_lasso = RandomizedLasso()

from sklearn.linear_model import RandomizedLogisticRegression
randomized_logistic = RandomizedLogisticRegression()

from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = LinearSVC(random_state=0)
clf.fit(X, y)
# Expected:
## LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
##      intercept_scaling=1, loss='squared_hinge', max_iter=1000,
##      multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
##      verbose=0)
print(clf.coef_)
# Expected:
## [[ 0.08551385  0.39414796  0.49847831  0.37513797]]
print(clf.intercept_)
# Expected:
## [ 0.28418066]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.svm import LinearSVR
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = LinearSVR(random_state=0)
regr.fit(X, y)
# Expected:
## LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
##      intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
##      random_state=0, tol=0.0001, verbose=0)
print(regr.coef_)
# Expected:
## [ 16.35750999  26.91499923  42.30652207  60.47843124]
print(regr.intercept_)
# Expected:
## [-4.29756543]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-4.29756543]

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import SVC
clf = SVC()
clf.fit(X, y)
# Expected:
## SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
##     decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
##     max_iter=-1, probability=False, random_state=None, shrinking=True,
##     tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import NuSVC
clf = NuSVC()
clf.fit(X, y)
# Expected:
## NuSVC(cache_size=200, class_weight=None, coef0=0.0,
##       decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
##       max_iter=-1, nu=0.5, probability=False, random_state=None,
##       shrinking=True, tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.svm import SVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = SVR(C=1.0, epsilon=0.2)
clf.fit(X, y)
# Expected:
## SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='auto',
##     kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

from sklearn.svm import NuSVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = NuSVR(C=1.0, nu=0.1)
clf.fit(X, y)
# Expected:
## NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='auto',
##       kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001,
##       verbose=False)

import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(X)
H = model.components_

import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
##   svd_solver='auto', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...  0.00755...]
print(pca.singular_values_)
# Expected:
## [ 6.30061...  0.54980...]

pca = PCA(n_components=2, svd_solver='full')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
##   svd_solver='full', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...  0.00755...]
print(pca.singular_values_)
# Expected:
## [ 6.30061...  0.54980...]

pca = PCA(n_components=1, svd_solver='arpack')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
##   svd_solver='arpack', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...]
print(pca.singular_values_)
# Expected:
## [ 6.30061...]

import numpy as np
from sklearn.decomposition import RandomizedPCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = RandomizedPCA(n_components=2)
pca.fit(X)
# Expected:
## RandomizedPCA(copy=True, iterated_power=2, n_components=2,
##        random_state=None, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [ 0.99244...  0.00755...]
print(pca.singular_values_)
# Expected:
## [ 6.30061...  0.54980...]

from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix
X = sparse_random_matrix(100, 100, density=0.01, random_state=42)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)
# Expected:
## TruncatedSVD(algorithm='randomized', n_components=5, n_iter=7,
##         random_state=42, tol=0.0)
print(svd.explained_variance_ratio_)
# Expected:
## [ 0.0606... 0.0584... 0.0497... 0.0434... 0.0372...]
print(svd.explained_variance_ratio_.sum())
# Expected:
## 0.249...
print(svd.singular_values_)
# Expected:
## [ 2.5841... 2.5245... 2.3201... 2.1753... 2.0443...]

from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
iris = load_iris()
cross_val_score(clf, iris.data, iris.target, cv=10)

# Expected:
## array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
##         0.93...,  0.93...,  1.     ,  0.93...,  1.      ])

from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
boston = load_boston()
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, boston.data, boston.target, cv=10)

# Expected:
## array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
##         0.07..., 0.29..., 0.33..., -1.42..., -1.77...])

from sklearn.cluster import KMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 4], [4, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.labels_
# Expected:
## array([0, 0, 0, 1, 1, 1], dtype=int32)
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1], dtype=int32)
kmeans.cluster_centers_
# Expected:
## array([[ 1.,  2.],
##        [ 4.,  2.]])

from sklearn.cluster import Birch
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.5,
compute_labels=True)
brc.fit(X)
# Expected:
## Birch(branching_factor=50, compute_labels=True, copy=True, n_clusters=None,
##    threshold=0.5)
brc.predict(X)
# Expected:
## array([0, 0, 0, 1, 1, 1])

from sklearn.cross_decomposition import PLSRegression
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)

# Expected:
## PLSRegression(copy=True, max_iter=500, n_components=2, scale=True,
##         tol=1e-06)
Y_pred = pls2.predict(X)

from sklearn.cross_decomposition import PLSCanonical
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
plsca = PLSCanonical(n_components=2)
plsca.fit(X, Y)

# Expected:
## PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
##              scale=True, tol=1e-06)
X_c, Y_c = plsca.transform(X, Y)

from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, Y)

# Expected:
## CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06)
X_c, Y_c = cca.transform(X, Y)

import numpy as np
from sklearn.gaussian_process import GaussianProcess
X = np.array([[1., 3., 5., 6., 7., 8.]]).T
y = (X * np.sin(X)).ravel()
gp = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.)
gp.fit(X, y)
# Expected:
## GaussianProcess(beta0=None...
##         ...

import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
##               solver='svd', store_covariance=False, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)

# Expected:
## QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
##                               store_covariance=False,
##                               store_covariances=None, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X, y)
# Expected:
## RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
##             max_depth=2, max_features='auto', max_leaf_nodes=None,
##             min_impurity_decrease=0.0, min_impurity_split=None,
##             min_samples_leaf=1, min_samples_split=2,
##             min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
##             oob_score=False, random_state=0, verbose=0, warm_start=False)
print(clf.feature_importances_)
# Expected:
## [ 0.17287856  0.80608704  0.01884792  0.00218648]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, n_informative=2,
                       random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0)
regr.fit(X, y)
# Expected:
## RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
##            max_features='auto', max_leaf_nodes=None,
##            min_impurity_decrease=0.0, min_impurity_split=None,
##            min_samples_leaf=1, min_samples_split=2,
##            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
##            oob_score=False, random_state=0, verbose=0, warm_start=False)
print(regr.feature_importances_)
# Expected:
## [ 0.17339552  0.81594114  0.          0.01066333]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-2.50699856]

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
print(eclf1.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft')
eclf2 = eclf2.fit(X, y)
print(eclf2.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf3 = VotingClassifier(estimators=[
       ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
       voting='soft', weights=[2,1,1],
       flatten_transform=True)
eclf3 = eclf3.fit(X, y)
print(eclf3.predict(X))
# Expected:
## [1 1 1 2 2 2]
print(eclf3.transform(X).shape)
# Expected:
## (6, 6)

from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
X = v.fit_transform(D)
X
# Expected:
## array([[ 2.,  0.,  1.],
##        [ 0.,  1.,  3.]])
v.inverse_transform(X) ==         [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
# Expected:
## True
v.transform({'foo': 4, 'unseen_feature': 3})
# Expected:
## array([[ 0.,  0.,  4.]])

from sklearn.feature_extraction import FeatureHasher
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
f.toarray()
# Expected:
## array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],
##        [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])

from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y)
# Expected:
## KernelRidge(alpha=1.0, coef0=1, degree=3, gamma=None, kernel='linear',
##             kernel_params=None)

import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
# Expected:
## GaussianNB(priors=None)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
# Expected:
## GaussianNB(priors=None)
print(clf_pf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
# Expected:
## MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
X = np.random.randint(2, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
from sklearn.naive_bayes import BernoulliNB
clf = BernoulliNB()
clf.fit(X, Y)
# Expected:
## BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
from sklearn.neural_network import BernoulliRBM
X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
model = BernoulliRBM(n_components=2)
model.fit(X)
# Expected:
## BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10,
##        random_state=None, verbose=0)

from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelPropagation(...)

from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
label_prop_model = LabelSpreading()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelSpreading(...)