Code snippets for page Node ListΒΆ

Download node_list.py. Browse the code snippet index.

# -*- coding: utf-8 -*-
# Generated by codesnippet sphinx extension on 2020-12-16

import mdp
import numpy as np
np.random.seed(0)
import mdp
from mdp import numx

def identity(x): return x

def u3(x): return numx.absolute(x)**3 #A simple nonlinear transformation

def norm2(x): #Computes the norm of each sample returning an Nx1 array
    return ((x**2).sum(axis=1)**0.5).reshape((-1,1)) 

x = numx.array([[-2., 2.], [0.2, 0.3], [0.6, 1.2]])
gen = mdp.nodes.GeneralExpansionNode(funcs=[identity, u3, norm2])
print(gen.execute(x))
[[-2.          2.          8.          8.          2.82842712]
 [ 0.2         0.3         0.008       0.027       0.36055513]
 [ 0.6         1.2         0.216       1.728       1.34164079]]

import numpy as np
from sklearn import linear_model
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(X, y)

# Expected:
## SGDRegressor(alpha=0.0001, average=False, early_stopping=False,
##        epsilon=0.1, eta0=0.01, fit_intercept=True, l1_ratio=0.15,
##        learning_rate='invscaling', loss='squared_loss', max_iter=1000,
##        n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
##        random_state=None, shuffle=True, tol=0.001, validation_fraction=0.1,
##        verbose=0, warm_start=False)

from sklearn.linear_model import TheilSenRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
    n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = TheilSenRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9884...
reg.predict(X[:1,])
# Expected:
## array([-31.5871...])

import numpy as np
from sklearn.random_projection import SparseRandomProjection
np.random.seed(42)
X = np.random.rand(100, 10000)
transformer = SparseRandomProjection()
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)
np.mean(transformer.components_ != 0)
# Expected:
## 0.0100...

from sklearn.preprocessing import MinMaxScaler

data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler()
print(scaler.fit(data))
# Expected:
## MinMaxScaler(copy=True, feature_range=(0, 1))
print(scaler.data_max_)
# Expected:
## [ 1. 18.]
print(scaler.transform(data))
# Expected:
## [[0.   0.  ]
##  [0.25 0.25]
##  [0.5  0.5 ]
##  [1.   1.  ]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[1.5 0. ]]

from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression

X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNetCV(cv=5, random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True,
##        l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=None,
##        normalize=False, positive=False, precompute='auto', random_state=0,
##        selection='cyclic', tol=0.0001, verbose=0)
print(regr.alpha_)
# Expected:
## 0.1994727942696716
print(regr.intercept_)
# Expected:
## 0.398...
print(regr.predict([[0, 0]]))
# Expected:
## [0.398...]

from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(X)
clf = SGDClassifier(max_iter=5, tol=1e-3)
clf.fit(X_features, y)

# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
##        early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
##        l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
##        n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
##        power_t=0.5, random_state=None, shuffle=True, tol=0.001,
##        validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_features, y)
# Expected:
## 1.0

from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=100, n_informative=10,
                       noise=4, random_state=0)
reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.n_nonzero_coefs_
# Expected:
## 10
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])

from sklearn.kernel_approximation import SkewedChi2Sampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
chi2_feature = SkewedChi2Sampler(skewedness=.01,
                                 n_components=10,
                                 random_state=0)
X_features = chi2_feature.fit_transform(X, y)
clf = SGDClassifier(max_iter=10, tol=1e-3)
clf.fit(X_features, y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
##        early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
##        l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,
##        n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
##        power_t=0.5, random_state=None, shuffle=True, tol=0.001,
##        validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_features, y)
# Expected:
## 1.0

from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
X, y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X, y)
# Expected:
## Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
##       fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
##       n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=0.001,
##       validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X, y)
# Expected:
## 0.946...

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifier
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifier().fit(X, y)
clf.score(X, y)
# Expected:
## 0.9595...

from sklearn.svm import LinearSVR
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = LinearSVR(random_state=0, tol=1e-5)
regr.fit(X, y)
# Expected:
## LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
##      intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
##      random_state=0, tol=1e-05, verbose=0)
print(regr.coef_)
# Expected:
## [16.35... 26.91... 42.30... 60.47...]
print(regr.intercept_)
# Expected:
## [-4.29...]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-4.29...]

from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)

# Expected:
## OrdinalEncoder(categories='auto', dtype=<... 'numpy.float64'>)
enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 3], ['Male', 1]])
# Expected:
## array([[0., 2.],
##        [1., 0.]])

enc.inverse_transform([[1, 0], [0, 1]])
# Expected:
## array([['Male', 1],
##        ['Female', 2]], dtype=object)

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)

# Expected:
## QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
##                               store_covariance=False,
##                               store_covariances=None, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
# Expected:
## KNeighborsClassifier(...)
print(neigh.predict([[1.1]]))
# Expected:
## [0]
print(neigh.predict_proba([[0.9]]))
# Expected:
## [[0.66666667 0.33333333]]

import numpy as np
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
data = [[1, 2], [3, 2], [4, 5]]
print(pt.fit(data))
# Expected:
## PowerTransformer(copy=True, method='yeo-johnson', standardize=True)
print(pt.lambdas_)
# Expected:
## [ 1.386... -3.100...]
print(pt.transform(data))
# Expected:
## [[-1.316... -0.707...]
##  [ 0.209... -0.707...]
##  [ 1.106...  1.414...]]

import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import SparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = SparsePCA(n_components=5,
        normalize_components=True,
        random_state=0)
transformer.fit(X)
# Expected:
## SparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.9666...

from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC(gamma="scale")
clf = GridSearchCV(svc, parameters, cv=5)
clf.fit(iris.data, iris.target)

# Expected:
## GridSearchCV(cv=5, error_score=...,
##        estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
##                      decision_function_shape='ovr', degree=..., gamma=...,
##                      kernel='rbf', max_iter=-1, probability=False,
##                      random_state=None, shrinking=True, tol=...,
##                      verbose=False),
##        fit_params=None, iid=..., n_jobs=None,
##        param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
##        scoring=..., verbose=...)
sorted(clf.cv_results_.keys())

# Expected:
## ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
##  'mean_train_score', 'param_C', 'param_kernel', 'params',...
##  'rank_test_score', 'split0_test_score',...
##  'split0_train_score', 'split1_test_score', 'split1_train_score',...
##  'split2_test_score', 'split2_train_score',...
##  'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]

from sklearn.linear_model import LassoCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = LassoCV(cv=5, random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9993...
reg.predict(X[:1,])
# Expected:
## array([-78.4951...])

from sklearn.datasets import load_diabetes
from sklearn.linear_model import RidgeCV
X, y = load_diabetes(return_X_y=True)
clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.5166...

import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
##               solver='svd', store_covariance=False, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn import linear_model
clf = linear_model.ARDRegression()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001,
##         verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([1.])

from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = OrthogonalMatchingPursuit().fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])

from sklearn.cross_decomposition import PLSCanonical
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
plsca = PLSCanonical(n_components=2)
plsca.fit(X, Y)

# Expected:
## PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
##              scale=True, tol=1e-06)
X_c, Y_c = plsca.transform(X, Y)

import numpy as np
from sklearn import datasets, cluster
digits = datasets.load_digits()
images = digits.images
X = np.reshape(images, (len(images), -1))
agglo = cluster.FeatureAgglomeration(n_clusters=32)
agglo.fit(X)
# Expected:
## FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
##            connectivity=None, linkage='ward', memory=None, n_clusters=32,
##            pooling_func=...)
X_reduced = agglo.transform(X)
X_reduced.shape
# Expected:
## (1797, 32)

from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectPercentile, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 7)

from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y)
# Expected:
## KernelRidge(alpha=1.0, coef0=1, degree=3, gamma=None, kernel='linear',
##             kernel_params=None)

from sklearn.linear_model import MultiTaskLassoCV
from sklearn.datasets import make_regression
X, y = make_regression(n_targets=2, noise=4, random_state=0)
reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9994...
reg.alpha_
# Expected:
## 0.5713...
reg.predict(X[:1,])
# Expected:
## array([[153.7971...,  94.9015...]])

import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
# Expected:
## GaussianNB(priors=None, var_smoothing=1e-09)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
# Expected:
## GaussianNB(priors=None, var_smoothing=1e-09)
print(clf_pf.predict([[-0.8, -1]]))
# Expected:
## [1]

import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
label_prop_model = LabelSpreading()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelSpreading(...)

from sklearn.decomposition import LatentDirichletAllocation
from sklearn.datasets import make_multilabel_classification
X, _ = make_multilabel_classification(random_state=0)
lda = LatentDirichletAllocation(n_components=5,
    random_state=0)
lda.fit(X)
# Expected:
## LatentDirichletAllocation(...)

lda.transform(X[-2:])
# Expected:
## array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],
##        [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])

import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(X)
H = model.components_

from sklearn.preprocessing import MaxAbsScaler
X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]
transformer = MaxAbsScaler().fit(X)
transformer
# Expected:
## MaxAbsScaler(copy=True)
transformer.transform(X)
# Expected:
## array([[ 0.5, -1. ,  1. ],
##        [ 1. ,  0. ,  0. ],
##        [ 0. ,  1. , -0.5]])

from sklearn.feature_extraction.text import HashingVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = HashingVectorizer(n_features=2**4)
X = vectorizer.fit_transform(corpus)
print(X.shape)
# Expected:
## (4, 16)

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegressionCV
X, y = load_iris(return_X_y=True)
clf = LogisticRegressionCV(cv=5, random_state=0,
                           multi_class='multinomial').fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :]).shape
# Expected:
## (2, 3)
clf.score(X, y)
# Expected:
## 0.98...

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import SVC
clf = SVC(gamma='auto')
clf.fit(X, y)
# Expected:
## SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
##     decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
##     max_iter=-1, probability=False, random_state=None, shrinking=True,
##     tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
X = v.fit_transform(D)
X
# Expected:
## array([[2., 0., 1.],
##        [0., 1., 3.]])
v.inverse_transform(X) ==         [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
# Expected:
## True
v.transform({'foo': 4, 'unseen_feature': 3})
# Expected:
## array([[0., 0., 4.]])

from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = LinearSVC(random_state=0, tol=1e-5)
clf.fit(X, y)
# Expected:
## LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
##      intercept_scaling=1, loss='squared_hinge', max_iter=1000,
##      multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)
print(clf.coef_)
# Expected:
## [[0.085... 0.394... 0.498... 0.375...]]
print(clf.intercept_)
# Expected:
## [0.284...]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.linear_model import RandomizedLasso
randomized_lasso = RandomizedLasso()

from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform([(1, 2), (3,)])
# Expected:
## array([[1, 1, 0],
##        [0, 0, 1]])
mlb.classes_
# Expected:
## array([1, 2, 3])

mlb.fit_transform([set(['sci-fi', 'thriller']), set(['comedy'])])
# Expected:
## array([[0, 1, 1],
##        [1, 0, 0]])
list(mlb.classes_)
# Expected:
## ['comedy', 'sci-fi', 'thriller']

from sklearn.datasets import load_digits
from sklearn.decomposition import FastICA
X, _ = load_digits(return_X_y=True)
transformer = FastICA(n_components=7,
        random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression

X, y = make_regression(n_features=4, n_informative=2,
                       random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0,
                             n_estimators=100)
regr.fit(X, y)
# Expected:
## RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
##            max_features='auto', max_leaf_nodes=None,
##            min_impurity_decrease=0.0, min_impurity_split=None,
##            min_samples_leaf=1, min_samples_split=2,
##            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
##            oob_score=False, random_state=0, verbose=0, warm_start=False)
print(regr.feature_importances_)
# Expected:
## [0.18146984 0.81473937 0.00145312 0.00233767]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-8.32987858]

import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
# Expected:
## MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit([1, 2, 2, 6])
# Expected:
## LabelEncoder()
le.classes_
# Expected:
## array([1, 2, 6])
le.transform([1, 1, 2, 6])
# Expected:
## array([0, 0, 1, 2]...)
le.inverse_transform([0, 0, 1, 2])
# Expected:
## array([1, 1, 2, 6])

le = preprocessing.LabelEncoder()
le.fit(["paris", "paris", "tokyo", "amsterdam"])
# Expected:
## LabelEncoder()
list(le.classes_)
# Expected:
## ['amsterdam', 'paris', 'tokyo']
le.transform(["tokyo", "tokyo", "paris"])
# Expected:
## array([2, 2, 1]...)
list(le.inverse_transform([2, 2, 1]))
# Expected:
## ['tokyo', 'tokyo', 'paris']

from sklearn.datasets import load_digits
from sklearn.manifold import LocallyLinearEmbedding
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = LocallyLinearEmbedding(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)

from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
X, y = load_iris(return_X_y=True)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel,
        random_state=0).fit(X, y)
gpc.score(X, y)
# Expected:
## 0.9866...
gpc.predict_proba(X[:2,:])
# Expected:
## array([[0.83548752, 0.03228706, 0.13222543],
##        [0.79064206, 0.06525643, 0.14410151]])

from sklearn.linear_model import LarsCV
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9996...
reg.alpha_
# Expected:
## 0.0254...
reg.predict(X[:1,])
# Expected:
## array([154.0842...])

from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import AdditiveChi2Sampler
X, y = load_digits(return_X_y=True)
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
X_transformed = chi2sampler.fit_transform(X, y)
clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
clf.fit(X_transformed, y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
##        early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
##        l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
##        n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
##        power_t=0.5, random_state=0, shuffle=True, tol=0.001,
##        validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_transformed, y)
# Expected:
## 0.9543...

from sklearn.cluster import Birch
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.5,
compute_labels=True)
brc.fit(X)
# Expected:
## Birch(branching_factor=50, compute_labels=True, copy=True, n_clusters=None,
##    threshold=0.5)
brc.predict(X)
# Expected:
## array([0, 0, 0, 1, 1, 1])

import numpy as np
from sklearn.preprocessing import QuantileTransformer
rng = np.random.RandomState(0)
X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
qt = QuantileTransformer(n_quantiles=10, random_state=0)
qt.fit_transform(X)
# Expected:
## array([...])

from sklearn.feature_extraction.text import CountVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.toarray())
# Expected:
## [[0 1 1 1 0 0 1 0 1]
##  [0 2 0 1 0 1 1 0 1]
##  [1 0 0 1 1 0 1 1 1]
##  [0 1 1 1 0 0 1 0 1]]

import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)

# Expected:
## LabelPropagation(...)

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsRegressor
neigh = RadiusNeighborsRegressor(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]

import numpy as np
from sklearn.cross_decomposition import PLSSVD
X = np.array([[0., 0., 1.],
    [1.,0.,0.],
    [2.,2.,2.],
    [2.,5.,4.]])
Y = np.array([[0.1, -0.2],
    [0.9, 1.1],
    [6.2, 5.9],
    [11.9, 12.3]])
plsca = PLSSVD(n_components=2)
plsca.fit(X, Y)
# Expected:
## PLSSVD(copy=True, n_components=2, scale=True)
X_c, Y_c = plsca.transform(X, Y)
X_c.shape, Y_c.shape
# Expected:
## ((4, 2), (4, 2))

import numpy as np
from sklearn.random_projection import GaussianRandomProjection
X = np.random.rand(100, 10000)
transformer = GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)

from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)

# Expected:
## OneHotEncoder(categorical_features=None, categories=None,
##        dtype=<... 'numpy.float64'>, handle_unknown='ignore',
##        n_values=None, sparse=True)

enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 1], ['Male', 4]]).toarray()
# Expected:
## array([[1., 0., 1., 0., 0.],
##        [0., 1., 0., 0., 0.]])
enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])
# Expected:
## array([['Male', 1],
##        [None, 2]], dtype=object)
enc.get_feature_names()
# Expected:
## array(['x0_Female', 'x0_Male', 'x1_1', 'x1_2', 'x1_3'], dtype=object)

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
# Expected:
## KNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]

from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
        random_state=0).fit(X, y)
gpr.score(X, y)
# Expected:
## 0.3680...
gpr.predict(X[:2,:], return_std=True)
# Expected:
## (array([653.0..., 592.1...]), array([316.6..., 316.6...]))

from sklearn.cluster import KMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [10, 2], [10, 4], [10, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0], dtype=int32)
kmeans.predict([[0, 0], [12, 3]])
# Expected:
## array([1, 0], dtype=int32)
kmeans.cluster_centers_
# Expected:
## array([[10.,  2.],
##        [ 1.,  2.]])

from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.datasets import make_regression

X, y = make_regression(n_features=4, random_state=0)
regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,
tol=1e-3)
regr.fit(X, y)
# Expected:
## PassiveAggressiveRegressor(C=1.0, average=False, early_stopping=False,
##               epsilon=0.1, fit_intercept=True, loss='epsilon_insensitive',
##               max_iter=100, n_iter=None, n_iter_no_change=5,
##               random_state=0, shuffle=True, tol=0.001,
##               validation_fraction=0.1, verbose=0, warm_start=False)
print(regr.coef_)
# Expected:
## [20.48736655 34.18818427 67.59122734 87.94731329]
print(regr.intercept_)
# Expected:
## [-0.02306214]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-0.02306214]

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier(n_estimators=100, max_depth=2,
                             random_state=0)
clf.fit(X, y)
# Expected:
## RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
##             max_depth=2, max_features='auto', max_leaf_nodes=None,
##             min_impurity_decrease=0.0, min_impurity_split=None,
##             min_samples_leaf=1, min_samples_split=2,
##             min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
##             oob_score=False, random_state=0, verbose=0, warm_start=False)
print(clf.feature_importances_)
# Expected:
## [0.14205973 0.76664038 0.0282433  0.06305659]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.linear_model import Ridge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y)
# Expected:
## Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
##       normalize=False, random_state=None, solver='auto', tol=0.001)

from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression

X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNet(random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
##       max_iter=1000, normalize=False, positive=False, precompute=False,
##       random_state=0, selection='cyclic', tol=0.0001, warm_start=False)
print(regr.coef_)
# Expected:
## [18.83816048 64.55968825]
print(regr.intercept_)
# Expected:
## 1.451...
print(regr.predict([[0, 0]]))
# Expected:
## [1.451...]

from sklearn.datasets import load_digits
from sklearn.manifold import Isomap
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = Isomap(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)

from sklearn.preprocessing import Binarizer
X = [[ 1., -1.,  2.],
     [ 2.,  0.,  0.],
     [ 0.,  1., -1.]]
transformer = Binarizer().fit(X) # fit does nothing.
transformer
# Expected:
## Binarizer(copy=True, threshold=0.0)
transformer.transform(X)
# Expected:
## array([[1., 0., 1.],
##        [1., 0., 0.],
##        [0., 1., 0.]])

from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
    'This is the first document.',
    'This document is the second document.',
    'And this is the third one.',
    'Is this the first document?',
]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.shape)
# Expected:
## (4, 9)

X = [[-2, 1, -4,   -1],
     [-1, 2, -3, -0.5],
     [ 0, 3, -2,  0.5],
     [ 1, 4, -1,    2]]
est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
est.fit(X)
# Expected:
## KBinsDiscretizer(...)
Xt = est.transform(X)
Xt
# Expected:
## array([[ 0., 0., 0., 0.],
##        [ 1., 1., 1., 0.],
##        [ 2., 2., 2., 1.],
##        [ 2., 2., 2., 2.]])

est.bin_edges_[0]
# Expected:
## array([-2., -1.,  0.,  1.])
est.inverse_transform(Xt)
# Expected:
## array([[-1.5,  1.5, -3.5, -0.5],
##        [-0.5,  2.5, -2.5, -0.5],
##        [ 0.5,  3.5, -1.5,  0.5],
##        [ 0.5,  3.5, -1.5,  1.5]])

from sklearn.datasets import load_digits
from sklearn.decomposition import IncrementalPCA
X, _ = load_digits(return_X_y=True)
transformer = IncrementalPCA(n_components=7, batch_size=200)
transformer.partial_fit(X[:100, :])
# Expected:
## IncrementalPCA(batch_size=200, copy=True, n_components=7, whiten=False)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import MiniBatchSparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = MiniBatchSparsePCA(n_components=5,
        batch_size=50,
        normalize_components=True,
        random_state=0)
transformer.fit(X)
# Expected:
## MiniBatchSparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.94

from sklearn.datasets import load_digits
from sklearn.decomposition import FactorAnalysis
X, _ = load_digits(return_X_y=True)
transformer = FactorAnalysis(n_components=7, random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn import linear_model
reg = linear_model.LassoLarsIC(criterion='bic')
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## LassoLarsIC(copy_X=True, criterion='bic', eps=..., fit_intercept=True,
##       max_iter=500, normalize=True, positive=False, precompute='auto',
##       verbose=False)
print(reg.coef_)
# Expected:
## [ 0.  -1.11...]

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True, False, False, False, False,
##        False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
##   svd_solver='auto', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.0075...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]

pca = PCA(n_components=2, svd_solver='full')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
##   svd_solver='full', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.00755...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]

pca = PCA(n_components=1, svd_solver='arpack')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
##   svd_solver='arpack', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.99244...]
print(pca.singular_values_)
# Expected:
## [6.30061...]

from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskLasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##         normalize=False, random_state=None, selection='cyclic', tol=0.0001,
##         warm_start=False)
print(clf.coef_)
# Expected:
## [[0.89393398 0.        ]
##  [0.89393398 0.        ]]
print(clf.intercept_)
# Expected:
## [0.10606602 0.10606602]

from sklearn.linear_model import RandomizedLogisticRegression
randomized_logistic = RandomizedLogisticRegression()

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFwe, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 15)

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNet(alpha=0.1, copy_X=True, fit_intercept=True,
##         l1_ratio=0.5, max_iter=1000, normalize=False, random_state=None,
##         selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [[0.45663524 0.45612256]
##  [0.45663524 0.45612256]]
print(clf.intercept_)
# Expected:
## [0.0872422 0.0872422]

from sklearn.preprocessing import StandardScaler
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
print(scaler.fit(data))
# Expected:
## StandardScaler(copy=True, with_mean=True, with_std=True)
print(scaler.mean_)
# Expected:
## [0.5 0.5]
print(scaler.transform(data))
# Expected:
## [[-1. -1.]
##  [-1. -1.]
##  [ 1.  1.]
##  [ 1.  1.]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[3. 3.]]

from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
iris = load_iris()
cross_val_score(clf, iris.data, iris.target, cv=10)

# Expected:
## array([ 1.     ,  0.93...,  0.86...,  0.93...,  0.93...,
##         0.93...,  0.93...,  1.     ,  0.93...,  1.      ])

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import GenericUnivariateSelect, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
transformer = GenericUnivariateSelect(chi2, 'k_best', param=20)
X_new = transformer.fit_transform(X, y)
X_new.shape
# Expected:
## (569, 20)

import numpy as np
X = np.random.randint(2, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
from sklearn.naive_bayes import BernoulliNB
clf = BernoulliNB()
clf.fit(X, Y)
# Expected:
## BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0, solver='lbfgs',
                         multi_class='multinomial').fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :])
# Expected:
## array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
##        [9.7...e-01, 2.8...e-02, ...e-08]])
clf.score(X, y)
# Expected:
## 0.97...

import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import ComplementNB
clf = ComplementNB()
clf.fit(X, y)
# Expected:
## ComplementNB(alpha=1.0, class_prior=None, fit_prior=True, norm=False)
print(clf.predict(X[2:3]))
# Expected:
## [3]

import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import NuSVC
clf = NuSVC(gamma='scale')
clf.fit(X, y)
# Expected:
## NuSVC(cache_size=200, class_weight=None, coef0=0.0,
##       decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
##       max_iter=-1, nu=0.5, probability=False, random_state=None,
##       shrinking=True, tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.neighbors.nearest_centroid import NearestCentroid
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = NearestCentroid()
clf.fit(X, y)
# Expected:
## NearestCentroid(metric='euclidean', shrink_threshold=None)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.cluster import MiniBatchKMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 0], [4, 4],
              [4, 5], [0, 1], [2, 2],
              [3, 2], [5, 5], [1, -1]])
kmeans = MiniBatchKMeans(n_clusters=2,
        random_state=0,
        batch_size=6)
kmeans = kmeans.partial_fit(X[0:6,:])
kmeans = kmeans.partial_fit(X[6:12,:])
kmeans.cluster_centers_
# Expected:
## array([[1, 1],
##        [3, 4]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1], dtype=int32)
kmeans = MiniBatchKMeans(n_clusters=2,
        random_state=0,
        batch_size=6,
        max_iter=10).fit(X)
kmeans.cluster_centers_
# Expected:
## array([[3.95918367, 2.40816327],
##        [1.12195122, 1.3902439 ]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([1, 0], dtype=int32)

from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])

# Expected:
## LassoLars(alpha=0.01, copy_X=True, eps=..., fit_intercept=True,
##      fit_path=True, max_iter=500, normalize=True, positive=False,
##      precompute='auto', verbose=False)
print(reg.coef_)
# Expected:
## [ 0.         -0.963257...]

from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
##    normalize=False, positive=False, precompute=False, random_state=None,
##    selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [0.85 0.  ]
print(clf.intercept_)
# Expected:
## 0.15...

from sklearn.linear_model import RANSACRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
    n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = RANSACRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9885...
reg.predict(X[:1,])
# Expected:
## array([-31.9417...])

import numpy as np
from sklearn.linear_model import HuberRegressor, LinearRegression
from sklearn.datasets import make_regression
np.random.seed(0)
X, y, coef = make_regression(
    n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
X[:4] = np.random.uniform(10, 20, (4, 2))
y[:4] = np.random.uniform(10, 20, 4)
huber = HuberRegressor().fit(X, y)
huber.score(X, y)
# Expected:
## -7.284608623514573
huber.predict(X[:1,])
# Expected:
## array([806.7200...])
linear = LinearRegression().fit(X, y)
print("True coefficients:", coef)
# Expected:
## True coefficients: [20.4923...  34.1698...]
print("Huber coefficients:", huber.coef_)
# Expected:
## Huber coefficients: [17.7906... 31.0106...]
print("Linear Regression coefficients:", linear.coef_)
# Expected:
## Linear Regression coefficients: [-1.9221...  7.0226...]

from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix
X = sparse_random_matrix(100, 100, density=0.01, random_state=42)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)
# Expected:
## TruncatedSVD(algorithm='randomized', n_components=5, n_iter=7,
##         random_state=42, tol=0.0)
print(svd.explained_variance_ratio_)
# Expected:
## [0.0606... 0.0584... 0.0497... 0.0434... 0.0372...]
print(svd.explained_variance_ratio_.sum())
# Expected:
## 0.249...
print(svd.singular_values_)
# Expected:
## [2.5841... 2.5245... 2.3201... 2.1753... 2.0443...]

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFpr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)

from sklearn.svm import NuSVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = NuSVR(gamma='scale', C=1.0, nu=0.1)
clf.fit(X, y)
# Expected:
## NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale',
##       kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001,
##       verbose=False)

import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
clf.fit(X, Y)

# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
##        early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
##        l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=1000,
##        n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
##        power_t=0.5, random_state=None, shuffle=True, tol=0.001,
##        validation_fraction=0.1, verbose=0, warm_start=False)

print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]

from sklearn.cluster import MeanShift
import numpy as np
X = np.array([[1, 1], [2, 1], [1, 0],
              [4, 7], [3, 5], [3, 6]])
clustering = MeanShift(bandwidth=2).fit(X)
clustering.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0])
clustering.predict([[0, 0], [5, 5]])
# Expected:
## array([1, 0])
clustering
# Expected:
## MeanShift(bandwidth=2, bin_seeding=False, cluster_all=True, min_bin_freq=1,
##      n_jobs=None, seeds=None)

from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import make_classification

X, y = make_classification(n_features=4, random_state=0)
clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
tol=1e-3)
clf.fit(X, y)
# Expected:
## PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
##               early_stopping=False, fit_intercept=True, loss='hinge',
##               max_iter=1000, n_iter=None, n_iter_no_change=5, n_jobs=None,
##               random_state=0, shuffle=True, tol=0.001,
##               validation_fraction=0.1, verbose=0, warm_start=False)
print(clf.coef_)
# Expected:
## [[-0.6543424   1.54603022  1.35361642  0.22199435]]
print(clf.intercept_)
# Expected:
## [0.63310933]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]

from sklearn.datasets import load_digits
from sklearn.decomposition import KernelPCA
X, _ = load_digits(return_X_y=True)
transformer = KernelPCA(n_components=7, kernel='linear')
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)

from sklearn.cluster import AffinityPropagation
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
              [4, 2], [4, 4], [4, 0]])
clustering = AffinityPropagation().fit(X)
clustering
# Expected:
## AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
##           damping=0.5, max_iter=200, preference=None, verbose=False)
clustering.labels_
# Expected:
## array([0, 0, 0, 1, 1, 1])
clustering.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1])
clustering.cluster_centers_
# Expected:
## array([[1, 2],
##        [4, 2]])

from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, Y)

# Expected:
## CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06)
X_c, Y_c = cca.transform(X, Y)

from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels
X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
K = pairwise_kernels(X, metric='linear')
K
# Expected:
## array([[  9.,   2.,  -2.],
##        [  2.,  14., -13.],
##        [ -2., -13.,  21.]])
transformer = KernelCenterer().fit(K)
transformer
# Expected:
## KernelCenterer()
transformer.transform(K)
# Expected:
## array([[  5.,   0.,  -5.],
##        [  0.,  14., -14.],
##        [ -5., -14.,  19.]])

X = np.arange(6).reshape(3, 2)
X
# Expected:
## array([[0, 1],
##        [2, 3],
##        [4, 5]])
poly = PolynomialFeatures(2)
poly.fit_transform(X)
# Expected:
## array([[ 1.,  0.,  1.,  0.,  0.,  1.],
##        [ 1.,  2.,  3.,  4.,  6.,  9.],
##        [ 1.,  4.,  5., 16., 20., 25.]])
poly = PolynomialFeatures(interaction_only=True)
poly.fit_transform(X)
# Expected:
## array([[ 1.,  0.,  1.,  0.],
##        [ 1.,  2.,  3.,  6.],
##        [ 1.,  4.,  5., 20.]])

from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFdr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)

from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectKBest, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 20)

from sklearn.preprocessing import Normalizer
X = [[4, 1, 2, 2],
     [1, 3, 9, 3],
     [5, 7, 5, 1]]
transformer = Normalizer().fit(X) # fit does nothing.
transformer
# Expected:
## Normalizer(copy=True, norm='l2')
transformer.transform(X)
# Expected:
## array([[0.8, 0.2, 0.4, 0.4],
##        [0.1, 0.3, 0.9, 0.3],
##        [0.5, 0.7, 0.5, 0.1]])

from sklearn import datasets, svm
from sklearn.kernel_approximation import Nystroem
digits = datasets.load_digits(n_class=9)
data = digits.data / 16.
clf = svm.LinearSVC()
feature_map_nystroem = Nystroem(gamma=.2,
                                random_state=1,
                                n_components=300)
data_transformed = feature_map_nystroem.fit_transform(data)
clf.fit(data_transformed, digits.target)

# Expected:
## LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
##      intercept_scaling=1, loss='squared_hinge', max_iter=1000,
##      multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
##      verbose=0)
clf.score(data_transformed, digits.target)
# Expected:
## 0.9987...

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                          random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
eclf1 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
print(eclf1.predict(X))
# Expected:
## [1 1 1 2 2 2]
np.array_equal(eclf1.named_estimators_.lr.predict(X),
               eclf1.named_estimators_['lr'].predict(X))
# Expected:
## True
eclf2 = VotingClassifier(estimators=[
        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
        voting='soft')
eclf2 = eclf2.fit(X, y)
print(eclf2.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf3 = VotingClassifier(estimators=[
       ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
       voting='soft', weights=[2,1,1],
       flatten_transform=True)
eclf3 = eclf3.fit(X, y)
print(eclf3.predict(X))
# Expected:
## [1 1 1 2 2 2]
print(eclf3.transform(X).shape)
# Expected:
## (6, 6)

from sklearn.linear_model import LassoLarsCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4.0, random_state=0)
reg = LassoLarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9992...
reg.alpha_
# Expected:
## 0.0484...
reg.predict(X[:1,])
# Expected:
## array([-77.8723...])

import numpy as np
from sklearn.neural_network import BernoulliRBM
X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
model = BernoulliRBM(n_components=2)
model.fit(X)
# Expected:
## BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10,
##        random_state=None, verbose=0)

from sklearn.preprocessing import RobustScaler
X = [[ 1., -2.,  2.],
     [ -2.,  1.,  3.],
     [ 4.,  1., -2.]]
transformer = RobustScaler().fit(X)
transformer
# Expected:
## RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
##        with_scaling=True)
transformer.transform(X)
# Expected:
## array([[ 0. , -2. ,  0. ],
##        [-1. ,  0. ,  0.4],
##        [ 1. ,  0. , -1.6]])

from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
boston = load_boston()
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, boston.data, boston.target, cv=10)

# Expected:
## array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
##         0.07..., 0.29..., 0.33..., -1.42..., -1.77...])

from sklearn import linear_model
reg = linear_model.Lars(n_nonzero_coefs=1)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])

# Expected:
## Lars(copy_X=True, eps=..., fit_intercept=True, fit_path=True,
##    n_nonzero_coefs=1, normalize=True, positive=False, precompute='auto',
##    verbose=False)
print(reg.coef_)
# Expected:
## [ 0. -1.11...]

from sklearn.svm import SVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = SVR(gamma='scale', C=1.0, epsilon=0.2)
clf.fit(X, y)
# Expected:
## SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale',
##     kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFECV(estimator, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True,  True,  True,  True,  True, False, False, False, False,
##        False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])

from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])

# Expected:
## BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
##         copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
##         n_iter=300, normalize=False, tol=0.001, verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([1.])

from sklearn import linear_model
clf = linear_model.MultiTaskElasticNetCV(cv=3)
clf.fit([[0,0], [1, 1], [2, 2]],
        [[0, 0], [1, 1], [2, 2]])

# Expected:
## MultiTaskElasticNetCV(alphas=None, copy_X=True, cv=3, eps=0.001,
##        fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
##        n_jobs=None, normalize=False, random_state=None, selection='cyclic',
##        tol=0.0001, verbose=0)
print(clf.coef_)
# Expected:
## [[0.52875032 0.46958558]
##  [0.52875032 0.46958558]]
print(clf.intercept_)
# Expected:
## [0.00166409 0.00166409]

from sklearn.cross_decomposition import PLSRegression
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)

# Expected:
## PLSRegression(copy=True, max_iter=500, n_components=2, scale=True,
##         tol=1e-06)
Y_pred = pls2.predict(X)

from sklearn.feature_extraction import FeatureHasher
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
f.toarray()
# Expected:
## array([[ 0.,  0., -4., -1.,  0.,  0.,  0.,  0.,  0.,  2.],
##        [ 0.,  0.,  0., -2., -5.,  0.,  0.,  0.,  0.,  0.]])

import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, y)
reg.score(X, y)
# Expected:
## 1.0
reg.coef_
# Expected:
## array([1., 2.])
reg.intercept_
# Expected:
## 3.0000...
reg.predict(np.array([[3, 5]]))
# Expected:
## array([16.])

from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit([1, 2, 6, 4, 2])
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([1, 2, 4, 6])
lb.transform([1, 6])
# Expected:
## array([[1, 0, 0, 0],
##        [0, 0, 0, 1]])

lb = preprocessing.LabelBinarizer()
lb.fit_transform(['yes', 'no', 'no', 'yes'])
# Expected:
## array([[1],
##        [0],
##        [0],
##        [1]])

import numpy as np
lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([0, 1, 2])
lb.transform([0, 1, 2, 1])
# Expected:
## array([[1, 0, 0],
##        [0, 1, 0],
##        [0, 0, 1],
##        [0, 1, 0]])

X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsClassifier(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0]

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifierCV
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.9630...