Code snippets for page Node ListΒΆ
Download node_list.py
.
Browse the code snippet index.
# -*- coding: utf-8 -*-
# Generated by codesnippet sphinx extension on 2020-12-16
import mdp
import numpy as np
np.random.seed(0)
import mdp
from mdp import numx
def identity(x): return x
def u3(x): return numx.absolute(x)**3 #A simple nonlinear transformation
def norm2(x): #Computes the norm of each sample returning an Nx1 array
return ((x**2).sum(axis=1)**0.5).reshape((-1,1))
x = numx.array([[-2., 2.], [0.2, 0.3], [0.6, 1.2]])
gen = mdp.nodes.GeneralExpansionNode(funcs=[identity, u3, norm2])
print(gen.execute(x))
[[-2. 2. 8. 8. 2.82842712]
[ 0.2 0.3 0.008 0.027 0.36055513]
[ 0.6 1.2 0.216 1.728 1.34164079]]
import numpy as np
from sklearn import linear_model
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = linear_model.SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(X, y)
# Expected:
## SGDRegressor(alpha=0.0001, average=False, early_stopping=False,
## epsilon=0.1, eta0=0.01, fit_intercept=True, l1_ratio=0.15,
## learning_rate='invscaling', loss='squared_loss', max_iter=1000,
## n_iter=None, n_iter_no_change=5, penalty='l2', power_t=0.25,
## random_state=None, shuffle=True, tol=0.001, validation_fraction=0.1,
## verbose=0, warm_start=False)
from sklearn.linear_model import TheilSenRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = TheilSenRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9884...
reg.predict(X[:1,])
# Expected:
## array([-31.5871...])
import numpy as np
from sklearn.random_projection import SparseRandomProjection
np.random.seed(42)
X = np.random.rand(100, 10000)
transformer = SparseRandomProjection()
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)
np.mean(transformer.components_ != 0)
# Expected:
## 0.0100...
from sklearn.preprocessing import MinMaxScaler
data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
scaler = MinMaxScaler()
print(scaler.fit(data))
# Expected:
## MinMaxScaler(copy=True, feature_range=(0, 1))
print(scaler.data_max_)
# Expected:
## [ 1. 18.]
print(scaler.transform(data))
# Expected:
## [[0. 0. ]
## [0.25 0.25]
## [0.5 0.5 ]
## [1. 1. ]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[1.5 0. ]]
from sklearn.linear_model import ElasticNetCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNetCV(cv=5, random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNetCV(alphas=None, copy_X=True, cv=5, eps=0.001, fit_intercept=True,
## l1_ratio=0.5, max_iter=1000, n_alphas=100, n_jobs=None,
## normalize=False, positive=False, precompute='auto', random_state=0,
## selection='cyclic', tol=0.0001, verbose=0)
print(regr.alpha_)
# Expected:
## 0.1994727942696716
print(regr.intercept_)
# Expected:
## 0.398...
print(regr.predict([[0, 0]]))
# Expected:
## [0.398...]
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
rbf_feature = RBFSampler(gamma=1, random_state=1)
X_features = rbf_feature.fit_transform(X)
clf = SGDClassifier(max_iter=5, tol=1e-3)
clf.fit(X_features, y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
## early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
## l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
## n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
## power_t=0.5, random_state=None, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_features, y)
# Expected:
## 1.0
from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.datasets import make_regression
X, y = make_regression(n_features=100, n_informative=10,
noise=4, random_state=0)
reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.n_nonzero_coefs_
# Expected:
## 10
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])
from sklearn.kernel_approximation import SkewedChi2Sampler
from sklearn.linear_model import SGDClassifier
X = [[0, 0], [1, 1], [1, 0], [0, 1]]
y = [0, 0, 1, 1]
chi2_feature = SkewedChi2Sampler(skewedness=.01,
n_components=10,
random_state=0)
X_features = chi2_feature.fit_transform(X, y)
clf = SGDClassifier(max_iter=10, tol=1e-3)
clf.fit(X_features, y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
## early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
## l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=10,
## n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
## power_t=0.5, random_state=None, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_features, y)
# Expected:
## 1.0
from sklearn.datasets import load_digits
from sklearn.linear_model import Perceptron
X, y = load_digits(return_X_y=True)
clf = Perceptron(tol=1e-3, random_state=0)
clf.fit(X, y)
# Expected:
## Perceptron(alpha=0.0001, class_weight=None, early_stopping=False, eta0=1.0,
## fit_intercept=True, max_iter=None, n_iter=None, n_iter_no_change=5,
## n_jobs=None, penalty=None, random_state=0, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X, y)
# Expected:
## 0.946...
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifier
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifier().fit(X, y)
clf.score(X, y)
# Expected:
## 0.9595...
from sklearn.svm import LinearSVR
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = LinearSVR(random_state=0, tol=1e-5)
regr.fit(X, y)
# Expected:
## LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
## intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
## random_state=0, tol=1e-05, verbose=0)
print(regr.coef_)
# Expected:
## [16.35... 26.91... 42.30... 60.47...]
print(regr.intercept_)
# Expected:
## [-4.29...]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-4.29...]
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)
# Expected:
## OrdinalEncoder(categories='auto', dtype=<... 'numpy.float64'>)
enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 3], ['Male', 1]])
# Expected:
## array([[0., 2.],
## [1., 0.]])
enc.inverse_transform([[1, 0], [0, 1]])
# Expected:
## array([['Male', 1],
## ['Female', 2]], dtype=object)
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = QuadraticDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## QuadraticDiscriminantAnalysis(priors=None, reg_param=0.0,
## store_covariance=False,
## store_covariances=None, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)
# Expected:
## KNeighborsClassifier(...)
print(neigh.predict([[1.1]]))
# Expected:
## [0]
print(neigh.predict_proba([[0.9]]))
# Expected:
## [[0.66666667 0.33333333]]
import numpy as np
from sklearn.preprocessing import PowerTransformer
pt = PowerTransformer()
data = [[1, 2], [3, 2], [4, 5]]
print(pt.fit(data))
# Expected:
## PowerTransformer(copy=True, method='yeo-johnson', standardize=True)
print(pt.lambdas_)
# Expected:
## [ 1.386... -3.100...]
print(pt.transform(data))
# Expected:
## [[-1.316... -0.707...]
## [ 0.209... -0.707...]
## [ 1.106... 1.414...]]
import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import SparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = SparsePCA(n_components=5,
normalize_components=True,
random_state=0)
transformer.fit(X)
# Expected:
## SparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.9666...
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV
iris = datasets.load_iris()
parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
svc = svm.SVC(gamma="scale")
clf = GridSearchCV(svc, parameters, cv=5)
clf.fit(iris.data, iris.target)
# Expected:
## GridSearchCV(cv=5, error_score=...,
## estimator=SVC(C=1.0, cache_size=..., class_weight=..., coef0=...,
## decision_function_shape='ovr', degree=..., gamma=...,
## kernel='rbf', max_iter=-1, probability=False,
## random_state=None, shrinking=True, tol=...,
## verbose=False),
## fit_params=None, iid=..., n_jobs=None,
## param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
## scoring=..., verbose=...)
sorted(clf.cv_results_.keys())
# Expected:
## ['mean_fit_time', 'mean_score_time', 'mean_test_score',...
## 'mean_train_score', 'param_C', 'param_kernel', 'params',...
## 'rank_test_score', 'split0_test_score',...
## 'split0_train_score', 'split1_test_score', 'split1_train_score',...
## 'split2_test_score', 'split2_train_score',...
## 'std_fit_time', 'std_score_time', 'std_test_score', 'std_train_score'...]
from sklearn.linear_model import LassoCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = LassoCV(cv=5, random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9993...
reg.predict(X[:1,])
# Expected:
## array([-78.4951...])
from sklearn.datasets import load_diabetes
from sklearn.linear_model import RidgeCV
X, y = load_diabetes(return_X_y=True)
clf = RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.5166...
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)
# Expected:
## LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,
## solver='svd', store_covariance=False, tol=0.0001)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
from sklearn import linear_model
clf = linear_model.ARDRegression()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## ARDRegression(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
## copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
## n_iter=300, normalize=False, threshold_lambda=10000.0, tol=0.001,
## verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([1.])
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.datasets import make_regression
X, y = make_regression(noise=4, random_state=0)
reg = OrthogonalMatchingPursuit().fit(X, y)
reg.score(X, y)
# Expected:
## 0.9991...
reg.predict(X[:1,])
# Expected:
## array([-78.3854...])
from sklearn.cross_decomposition import PLSCanonical
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
plsca = PLSCanonical(n_components=2)
plsca.fit(X, Y)
# Expected:
## PLSCanonical(algorithm='nipals', copy=True, max_iter=500, n_components=2,
## scale=True, tol=1e-06)
X_c, Y_c = plsca.transform(X, Y)
import numpy as np
from sklearn import datasets, cluster
digits = datasets.load_digits()
images = digits.images
X = np.reshape(images, (len(images), -1))
agglo = cluster.FeatureAgglomeration(n_clusters=32)
agglo.fit(X)
# Expected:
## FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',
## connectivity=None, linkage='ward', memory=None, n_clusters=32,
## pooling_func=...)
X_reduced = agglo.transform(X)
X_reduced.shape
# Expected:
## (1797, 32)
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectPercentile, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectPercentile(chi2, percentile=10).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 7)
from sklearn.kernel_ridge import KernelRidge
import numpy as np
n_samples, n_features = 10, 5
rng = np.random.RandomState(0)
y = rng.randn(n_samples)
X = rng.randn(n_samples, n_features)
clf = KernelRidge(alpha=1.0)
clf.fit(X, y)
# Expected:
## KernelRidge(alpha=1.0, coef0=1, degree=3, gamma=None, kernel='linear',
## kernel_params=None)
from sklearn.linear_model import MultiTaskLassoCV
from sklearn.datasets import make_regression
X, y = make_regression(n_targets=2, noise=4, random_state=0)
reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9994...
reg.alpha_
# Expected:
## 0.5713...
reg.predict(X[:1,])
# Expected:
## array([[153.7971..., 94.9015...]])
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
Y = np.array([1, 1, 1, 2, 2, 2])
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X, Y)
# Expected:
## GaussianNB(priors=None, var_smoothing=1e-09)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
clf_pf = GaussianNB()
clf_pf.partial_fit(X, Y, np.unique(Y))
# Expected:
## GaussianNB(priors=None, var_smoothing=1e-09)
print(clf_pf.predict([[-0.8, -1]]))
# Expected:
## [1]
import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelSpreading
label_prop_model = LabelSpreading()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)
# Expected:
## LabelSpreading(...)
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.datasets import make_multilabel_classification
X, _ = make_multilabel_classification(random_state=0)
lda = LatentDirichletAllocation(n_components=5,
random_state=0)
lda.fit(X)
# Expected:
## LatentDirichletAllocation(...)
lda.transform(X[-2:])
# Expected:
## array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],
## [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586 ]])
import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(X)
H = model.components_
from sklearn.preprocessing import MaxAbsScaler
X = [[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]]
transformer = MaxAbsScaler().fit(X)
transformer
# Expected:
## MaxAbsScaler(copy=True)
transformer.transform(X)
# Expected:
## array([[ 0.5, -1. , 1. ],
## [ 1. , 0. , 0. ],
## [ 0. , 1. , -0.5]])
from sklearn.feature_extraction.text import HashingVectorizer
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
vectorizer = HashingVectorizer(n_features=2**4)
X = vectorizer.fit_transform(corpus)
print(X.shape)
# Expected:
## (4, 16)
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegressionCV
X, y = load_iris(return_X_y=True)
clf = LogisticRegressionCV(cv=5, random_state=0,
multi_class='multinomial').fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :]).shape
# Expected:
## (2, 3)
clf.score(X, y)
# Expected:
## 0.98...
import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import SVC
clf = SVC(gamma='auto')
clf.fit(X, y)
# Expected:
## SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
## decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
## max_iter=-1, probability=False, random_state=None, shrinking=True,
## tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
from sklearn.feature_extraction import DictVectorizer
v = DictVectorizer(sparse=False)
D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
X = v.fit_transform(D)
X
# Expected:
## array([[2., 0., 1.],
## [0., 1., 3.]])
v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0}, {'baz': 1.0, 'foo': 3.0}]
# Expected:
## True
v.transform({'foo': 4, 'unseen_feature': 3})
# Expected:
## array([[0., 0., 4.]])
from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = LinearSVC(random_state=0, tol=1e-5)
clf.fit(X, y)
# Expected:
## LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
## intercept_scaling=1, loss='squared_hinge', max_iter=1000,
## multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)
print(clf.coef_)
# Expected:
## [[0.085... 0.394... 0.498... 0.375...]]
print(clf.intercept_)
# Expected:
## [0.284...]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]
from sklearn.linear_model import RandomizedLasso
randomized_lasso = RandomizedLasso()
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
mlb.fit_transform([(1, 2), (3,)])
# Expected:
## array([[1, 1, 0],
## [0, 0, 1]])
mlb.classes_
# Expected:
## array([1, 2, 3])
mlb.fit_transform([set(['sci-fi', 'thriller']), set(['comedy'])])
# Expected:
## array([[0, 1, 1],
## [1, 0, 0]])
list(mlb.classes_)
# Expected:
## ['comedy', 'sci-fi', 'thriller']
from sklearn.datasets import load_digits
from sklearn.decomposition import FastICA
X, _ = load_digits(return_X_y=True)
transformer = FastICA(n_components=7,
random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, n_informative=2,
random_state=0, shuffle=False)
regr = RandomForestRegressor(max_depth=2, random_state=0,
n_estimators=100)
regr.fit(X, y)
# Expected:
## RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=2,
## max_features='auto', max_leaf_nodes=None,
## min_impurity_decrease=0.0, min_impurity_split=None,
## min_samples_leaf=1, min_samples_split=2,
## min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
## oob_score=False, random_state=0, verbose=0, warm_start=False)
print(regr.feature_importances_)
# Expected:
## [0.18146984 0.81473937 0.00145312 0.00233767]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-8.32987858]
import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
# Expected:
## MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]
from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit([1, 2, 2, 6])
# Expected:
## LabelEncoder()
le.classes_
# Expected:
## array([1, 2, 6])
le.transform([1, 1, 2, 6])
# Expected:
## array([0, 0, 1, 2]...)
le.inverse_transform([0, 0, 1, 2])
# Expected:
## array([1, 1, 2, 6])
le = preprocessing.LabelEncoder()
le.fit(["paris", "paris", "tokyo", "amsterdam"])
# Expected:
## LabelEncoder()
list(le.classes_)
# Expected:
## ['amsterdam', 'paris', 'tokyo']
le.transform(["tokyo", "tokyo", "paris"])
# Expected:
## array([2, 2, 1]...)
list(le.inverse_transform([2, 2, 1]))
# Expected:
## ['tokyo', 'tokyo', 'paris']
from sklearn.datasets import load_digits
from sklearn.manifold import LocallyLinearEmbedding
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = LocallyLinearEmbedding(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
X, y = load_iris(return_X_y=True)
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel,
random_state=0).fit(X, y)
gpc.score(X, y)
# Expected:
## 0.9866...
gpc.predict_proba(X[:2,:])
# Expected:
## array([[0.83548752, 0.03228706, 0.13222543],
## [0.79064206, 0.06525643, 0.14410151]])
from sklearn.linear_model import LarsCV
from sklearn.datasets import make_regression
X, y = make_regression(n_samples=200, noise=4.0, random_state=0)
reg = LarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9996...
reg.alpha_
# Expected:
## 0.0254...
reg.predict(X[:1,])
# Expected:
## array([154.0842...])
from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier
from sklearn.kernel_approximation import AdditiveChi2Sampler
X, y = load_digits(return_X_y=True)
chi2sampler = AdditiveChi2Sampler(sample_steps=2)
X_transformed = chi2sampler.fit_transform(X, y)
clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3)
clf.fit(X_transformed, y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
## early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
## l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=5,
## n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
## power_t=0.5, random_state=0, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
clf.score(X_transformed, y)
# Expected:
## 0.9543...
from sklearn.cluster import Birch
X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]
brc = Birch(branching_factor=50, n_clusters=None, threshold=0.5,
compute_labels=True)
brc.fit(X)
# Expected:
## Birch(branching_factor=50, compute_labels=True, copy=True, n_clusters=None,
## threshold=0.5)
brc.predict(X)
# Expected:
## array([0, 0, 0, 1, 1, 1])
import numpy as np
from sklearn.preprocessing import QuantileTransformer
rng = np.random.RandomState(0)
X = np.sort(rng.normal(loc=0.5, scale=0.25, size=(25, 1)), axis=0)
qt = QuantileTransformer(n_quantiles=10, random_state=0)
qt.fit_transform(X)
# Expected:
## array([...])
from sklearn.feature_extraction.text import CountVectorizer
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.toarray())
# Expected:
## [[0 1 1 1 0 0 1 0 1]
## [0 2 0 1 0 1 1 0 1]
## [1 0 0 1 1 0 1 1 1]
## [0 1 1 1 0 0 1 0 1]]
import numpy as np
from sklearn import datasets
from sklearn.semi_supervised import LabelPropagation
label_prop_model = LabelPropagation()
iris = datasets.load_iris()
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
labels = np.copy(iris.target)
labels[random_unlabeled_points] = -1
label_prop_model.fit(iris.data, labels)
# Expected:
## LabelPropagation(...)
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsRegressor
neigh = RadiusNeighborsRegressor(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]
import numpy as np
from sklearn.cross_decomposition import PLSSVD
X = np.array([[0., 0., 1.],
[1.,0.,0.],
[2.,2.,2.],
[2.,5.,4.]])
Y = np.array([[0.1, -0.2],
[0.9, 1.1],
[6.2, 5.9],
[11.9, 12.3]])
plsca = PLSSVD(n_components=2)
plsca.fit(X, Y)
# Expected:
## PLSSVD(copy=True, n_components=2, scale=True)
X_c, Y_c = plsca.transform(X, Y)
X_c.shape, Y_c.shape
# Expected:
## ((4, 2), (4, 2))
import numpy as np
from sklearn.random_projection import GaussianRandomProjection
X = np.random.rand(100, 10000)
transformer = GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.shape
# Expected:
## (100, 3947)
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(handle_unknown='ignore')
X = [['Male', 1], ['Female', 3], ['Female', 2]]
enc.fit(X)
# Expected:
## OneHotEncoder(categorical_features=None, categories=None,
## dtype=<... 'numpy.float64'>, handle_unknown='ignore',
## n_values=None, sparse=True)
enc.categories_
# Expected:
## [array(['Female', 'Male'], dtype=object), array([1, 2, 3], dtype=object)]
enc.transform([['Female', 1], ['Male', 4]]).toarray()
# Expected:
## array([[1., 0., 1., 0., 0.],
## [0., 1., 0., 0., 0.]])
enc.inverse_transform([[0, 1, 1, 0, 0], [0, 0, 0, 1, 0]])
# Expected:
## array([['Male', 1],
## [None, 2]], dtype=object)
enc.get_feature_names()
# Expected:
## array(['x0_Female', 'x0_Male', 'x1_1', 'x1_2', 'x1_3'], dtype=object)
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import KNeighborsRegressor
neigh = KNeighborsRegressor(n_neighbors=2)
neigh.fit(X, y)
# Expected:
## KNeighborsRegressor(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0.5]
from sklearn.datasets import make_friedman2
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
X, y = make_friedman2(n_samples=500, noise=0, random_state=0)
kernel = DotProduct() + WhiteKernel()
gpr = GaussianProcessRegressor(kernel=kernel,
random_state=0).fit(X, y)
gpr.score(X, y)
# Expected:
## 0.3680...
gpr.predict(X[:2,:], return_std=True)
# Expected:
## (array([653.0..., 592.1...]), array([316.6..., 316.6...]))
from sklearn.cluster import KMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
[10, 2], [10, 4], [10, 0]])
kmeans = KMeans(n_clusters=2, random_state=0).fit(X)
kmeans.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0], dtype=int32)
kmeans.predict([[0, 0], [12, 3]])
# Expected:
## array([1, 0], dtype=int32)
kmeans.cluster_centers_
# Expected:
## array([[10., 2.],
## [ 1., 2.]])
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.datasets import make_regression
X, y = make_regression(n_features=4, random_state=0)
regr = PassiveAggressiveRegressor(max_iter=100, random_state=0,
tol=1e-3)
regr.fit(X, y)
# Expected:
## PassiveAggressiveRegressor(C=1.0, average=False, early_stopping=False,
## epsilon=0.1, fit_intercept=True, loss='epsilon_insensitive',
## max_iter=100, n_iter=None, n_iter_no_change=5,
## random_state=0, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
print(regr.coef_)
# Expected:
## [20.48736655 34.18818427 67.59122734 87.94731329]
print(regr.intercept_)
# Expected:
## [-0.02306214]
print(regr.predict([[0, 0, 0, 0]]))
# Expected:
## [-0.02306214]
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
n_informative=2, n_redundant=0,
random_state=0, shuffle=False)
clf = RandomForestClassifier(n_estimators=100, max_depth=2,
random_state=0)
clf.fit(X, y)
# Expected:
## RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
## max_depth=2, max_features='auto', max_leaf_nodes=None,
## min_impurity_decrease=0.0, min_impurity_split=None,
## min_samples_leaf=1, min_samples_split=2,
## min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
## oob_score=False, random_state=0, verbose=0, warm_start=False)
print(clf.feature_importances_)
# Expected:
## [0.14205973 0.76664038 0.0282433 0.06305659]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]
from sklearn.linear_model import Ridge
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = Ridge(alpha=1.0)
clf.fit(X, y)
# Expected:
## Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
## normalize=False, random_state=None, solver='auto', tol=0.001)
from sklearn.linear_model import ElasticNet
from sklearn.datasets import make_regression
X, y = make_regression(n_features=2, random_state=0)
regr = ElasticNet(random_state=0)
regr.fit(X, y)
# Expected:
## ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
## max_iter=1000, normalize=False, positive=False, precompute=False,
## random_state=0, selection='cyclic', tol=0.0001, warm_start=False)
print(regr.coef_)
# Expected:
## [18.83816048 64.55968825]
print(regr.intercept_)
# Expected:
## 1.451...
print(regr.predict([[0, 0]]))
# Expected:
## [1.451...]
from sklearn.datasets import load_digits
from sklearn.manifold import Isomap
X, _ = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
embedding = Isomap(n_components=2)
X_transformed = embedding.fit_transform(X[:100])
X_transformed.shape
# Expected:
## (100, 2)
from sklearn.preprocessing import Binarizer
X = [[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]]
transformer = Binarizer().fit(X) # fit does nothing.
transformer
# Expected:
## Binarizer(copy=True, threshold=0.0)
transformer.transform(X)
# Expected:
## array([[1., 0., 1.],
## [1., 0., 0.],
## [0., 1., 0.]])
from sklearn.feature_extraction.text import TfidfVectorizer
corpus = [
'This is the first document.',
'This document is the second document.',
'And this is the third one.',
'Is this the first document?',
]
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())
# Expected:
## ['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']
print(X.shape)
# Expected:
## (4, 9)
X = [[-2, 1, -4, -1],
[-1, 2, -3, -0.5],
[ 0, 3, -2, 0.5],
[ 1, 4, -1, 2]]
est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy='uniform')
est.fit(X)
# Expected:
## KBinsDiscretizer(...)
Xt = est.transform(X)
Xt
# Expected:
## array([[ 0., 0., 0., 0.],
## [ 1., 1., 1., 0.],
## [ 2., 2., 2., 1.],
## [ 2., 2., 2., 2.]])
est.bin_edges_[0]
# Expected:
## array([-2., -1., 0., 1.])
est.inverse_transform(Xt)
# Expected:
## array([[-1.5, 1.5, -3.5, -0.5],
## [-0.5, 2.5, -2.5, -0.5],
## [ 0.5, 3.5, -1.5, 0.5],
## [ 0.5, 3.5, -1.5, 1.5]])
from sklearn.datasets import load_digits
from sklearn.decomposition import IncrementalPCA
X, _ = load_digits(return_X_y=True)
transformer = IncrementalPCA(n_components=7, batch_size=200)
transformer.partial_fit(X[:100, :])
# Expected:
## IncrementalPCA(batch_size=200, copy=True, n_components=7, whiten=False)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)
import numpy as np
from sklearn.datasets import make_friedman1
from sklearn.decomposition import MiniBatchSparsePCA
X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
transformer = MiniBatchSparsePCA(n_components=5,
batch_size=50,
normalize_components=True,
random_state=0)
transformer.fit(X)
# Expected:
## MiniBatchSparsePCA(...)
X_transformed = transformer.transform(X)
X_transformed.shape
# Expected:
## (200, 5)
np.mean(transformer.components_ == 0)
# Expected:
## 0.94
from sklearn.datasets import load_digits
from sklearn.decomposition import FactorAnalysis
X, _ = load_digits(return_X_y=True)
transformer = FactorAnalysis(n_components=7, random_state=0)
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)
from sklearn import linear_model
reg = linear_model.LassoLarsIC(criterion='bic')
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
# Expected:
## LassoLarsIC(copy_X=True, criterion='bic', eps=..., fit_intercept=True,
## max_iter=500, normalize=True, positive=False, precompute='auto',
## verbose=False)
print(reg.coef_)
# Expected:
## [ 0. -1.11...]
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFE(estimator, 5, step=1)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True, True, True, True, True, False, False, False, False,
## False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
import numpy as np
from sklearn.decomposition import PCA
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
pca = PCA(n_components=2)
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
## svd_solver='auto', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.0075...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]
pca = PCA(n_components=2, svd_solver='full')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
## svd_solver='full', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.9924... 0.00755...]
print(pca.singular_values_)
# Expected:
## [6.30061... 0.54980...]
pca = PCA(n_components=1, svd_solver='arpack')
pca.fit(X)
# Expected:
## PCA(copy=True, iterated_power='auto', n_components=1, random_state=None,
## svd_solver='arpack', tol=0.0, whiten=False)
print(pca.explained_variance_ratio_)
# Expected:
## [0.99244...]
print(pca.singular_values_)
# Expected:
## [6.30061...]
from sklearn import linear_model
clf = linear_model.MultiTaskLasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskLasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
## normalize=False, random_state=None, selection='cyclic', tol=0.0001,
## warm_start=False)
print(clf.coef_)
# Expected:
## [[0.89393398 0. ]
## [0.89393398 0. ]]
print(clf.intercept_)
# Expected:
## [0.10606602 0.10606602]
from sklearn.linear_model import RandomizedLogisticRegression
randomized_logistic = RandomizedLogisticRegression()
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFwe, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFwe(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 15)
from sklearn import linear_model
clf = linear_model.MultiTaskElasticNet(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskElasticNet(alpha=0.1, copy_X=True, fit_intercept=True,
## l1_ratio=0.5, max_iter=1000, normalize=False, random_state=None,
## selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [[0.45663524 0.45612256]
## [0.45663524 0.45612256]]
print(clf.intercept_)
# Expected:
## [0.0872422 0.0872422]
from sklearn.preprocessing import StandardScaler
data = [[0, 0], [0, 0], [1, 1], [1, 1]]
scaler = StandardScaler()
print(scaler.fit(data))
# Expected:
## StandardScaler(copy=True, with_mean=True, with_std=True)
print(scaler.mean_)
# Expected:
## [0.5 0.5]
print(scaler.transform(data))
# Expected:
## [[-1. -1.]
## [-1. -1.]
## [ 1. 1.]
## [ 1. 1.]]
print(scaler.transform([[2, 2]]))
# Expected:
## [[3. 3.]]
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=0)
iris = load_iris()
cross_val_score(clf, iris.data, iris.target, cv=10)
# Expected:
## array([ 1. , 0.93..., 0.86..., 0.93..., 0.93...,
## 0.93..., 0.93..., 1. , 0.93..., 1. ])
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import GenericUnivariateSelect, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
transformer = GenericUnivariateSelect(chi2, 'k_best', param=20)
X_new = transformer.fit_transform(X, y)
X_new.shape
# Expected:
## (569, 20)
import numpy as np
X = np.random.randint(2, size=(6, 100))
Y = np.array([1, 2, 3, 4, 4, 5])
from sklearn.naive_bayes import BernoulliNB
clf = BernoulliNB()
clf.fit(X, Y)
# Expected:
## BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)
print(clf.predict(X[2:3]))
# Expected:
## [3]
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
X, y = load_iris(return_X_y=True)
clf = LogisticRegression(random_state=0, solver='lbfgs',
multi_class='multinomial').fit(X, y)
clf.predict(X[:2, :])
# Expected:
## array([0, 0])
clf.predict_proba(X[:2, :])
# Expected:
## array([[9.8...e-01, 1.8...e-02, 1.4...e-08],
## [9.7...e-01, 2.8...e-02, ...e-08]])
clf.score(X, y)
# Expected:
## 0.97...
import numpy as np
X = np.random.randint(5, size=(6, 100))
y = np.array([1, 2, 3, 4, 5, 6])
from sklearn.naive_bayes import ComplementNB
clf = ComplementNB()
clf.fit(X, y)
# Expected:
## ComplementNB(alpha=1.0, class_prior=None, fit_prior=True, norm=False)
print(clf.predict(X[2:3]))
# Expected:
## [3]
import numpy as np
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
y = np.array([1, 1, 2, 2])
from sklearn.svm import NuSVC
clf = NuSVC(gamma='scale')
clf.fit(X, y)
# Expected:
## NuSVC(cache_size=200, class_weight=None, coef0=0.0,
## decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
## max_iter=-1, nu=0.5, probability=False, random_state=None,
## shrinking=True, tol=0.001, verbose=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
from sklearn.neighbors.nearest_centroid import NearestCentroid
import numpy as np
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = NearestCentroid()
clf.fit(X, y)
# Expected:
## NearestCentroid(metric='euclidean', shrink_threshold=None)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
from sklearn.cluster import MiniBatchKMeans
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
[4, 2], [4, 0], [4, 4],
[4, 5], [0, 1], [2, 2],
[3, 2], [5, 5], [1, -1]])
kmeans = MiniBatchKMeans(n_clusters=2,
random_state=0,
batch_size=6)
kmeans = kmeans.partial_fit(X[0:6,:])
kmeans = kmeans.partial_fit(X[6:12,:])
kmeans.cluster_centers_
# Expected:
## array([[1, 1],
## [3, 4]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1], dtype=int32)
kmeans = MiniBatchKMeans(n_clusters=2,
random_state=0,
batch_size=6,
max_iter=10).fit(X)
kmeans.cluster_centers_
# Expected:
## array([[3.95918367, 2.40816327],
## [1.12195122, 1.3902439 ]])
kmeans.predict([[0, 0], [4, 4]])
# Expected:
## array([1, 0], dtype=int32)
from sklearn import linear_model
reg = linear_model.LassoLars(alpha=0.01)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
# Expected:
## LassoLars(alpha=0.01, copy_X=True, eps=..., fit_intercept=True,
## fit_path=True, max_iter=500, normalize=True, positive=False,
## precompute='auto', verbose=False)
print(reg.coef_)
# Expected:
## [ 0. -0.963257...]
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.1)
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
## normalize=False, positive=False, precompute=False, random_state=None,
## selection='cyclic', tol=0.0001, warm_start=False)
print(clf.coef_)
# Expected:
## [0.85 0. ]
print(clf.intercept_)
# Expected:
## 0.15...
from sklearn.linear_model import RANSACRegressor
from sklearn.datasets import make_regression
X, y = make_regression(
n_samples=200, n_features=2, noise=4.0, random_state=0)
reg = RANSACRegressor(random_state=0).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9885...
reg.predict(X[:1,])
# Expected:
## array([-31.9417...])
import numpy as np
from sklearn.linear_model import HuberRegressor, LinearRegression
from sklearn.datasets import make_regression
np.random.seed(0)
X, y, coef = make_regression(
n_samples=200, n_features=2, noise=4.0, coef=True, random_state=0)
X[:4] = np.random.uniform(10, 20, (4, 2))
y[:4] = np.random.uniform(10, 20, 4)
huber = HuberRegressor().fit(X, y)
huber.score(X, y)
# Expected:
## -7.284608623514573
huber.predict(X[:1,])
# Expected:
## array([806.7200...])
linear = LinearRegression().fit(X, y)
print("True coefficients:", coef)
# Expected:
## True coefficients: [20.4923... 34.1698...]
print("Huber coefficients:", huber.coef_)
# Expected:
## Huber coefficients: [17.7906... 31.0106...]
print("Linear Regression coefficients:", linear.coef_)
# Expected:
## Linear Regression coefficients: [-1.9221... 7.0226...]
from sklearn.decomposition import TruncatedSVD
from sklearn.random_projection import sparse_random_matrix
X = sparse_random_matrix(100, 100, density=0.01, random_state=42)
svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
svd.fit(X)
# Expected:
## TruncatedSVD(algorithm='randomized', n_components=5, n_iter=7,
## random_state=42, tol=0.0)
print(svd.explained_variance_ratio_)
# Expected:
## [0.0606... 0.0584... 0.0497... 0.0434... 0.0372...]
print(svd.explained_variance_ratio_.sum())
# Expected:
## 0.249...
print(svd.singular_values_)
# Expected:
## [2.5841... 2.5245... 2.3201... 2.1753... 2.0443...]
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFpr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFpr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)
from sklearn.svm import NuSVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = NuSVR(gamma='scale', C=1.0, nu=0.1)
clf.fit(X, y)
# Expected:
## NuSVR(C=1.0, cache_size=200, coef0=0.0, degree=3, gamma='scale',
## kernel='rbf', max_iter=-1, nu=0.1, shrinking=True, tol=0.001,
## verbose=False)
import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier(max_iter=1000, tol=1e-3)
clf.fit(X, Y)
# Expected:
## SGDClassifier(alpha=0.0001, average=False, class_weight=None,
## early_stopping=False, epsilon=0.1, eta0=0.0, fit_intercept=True,
## l1_ratio=0.15, learning_rate='optimal', loss='hinge', max_iter=1000,
## n_iter=None, n_iter_no_change=5, n_jobs=None, penalty='l2',
## power_t=0.5, random_state=None, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
print(clf.predict([[-0.8, -1]]))
# Expected:
## [1]
from sklearn.cluster import MeanShift
import numpy as np
X = np.array([[1, 1], [2, 1], [1, 0],
[4, 7], [3, 5], [3, 6]])
clustering = MeanShift(bandwidth=2).fit(X)
clustering.labels_
# Expected:
## array([1, 1, 1, 0, 0, 0])
clustering.predict([[0, 0], [5, 5]])
# Expected:
## array([1, 0])
clustering
# Expected:
## MeanShift(bandwidth=2, bin_seeding=False, cluster_all=True, min_bin_freq=1,
## n_jobs=None, seeds=None)
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.datasets import make_classification
X, y = make_classification(n_features=4, random_state=0)
clf = PassiveAggressiveClassifier(max_iter=1000, random_state=0,
tol=1e-3)
clf.fit(X, y)
# Expected:
## PassiveAggressiveClassifier(C=1.0, average=False, class_weight=None,
## early_stopping=False, fit_intercept=True, loss='hinge',
## max_iter=1000, n_iter=None, n_iter_no_change=5, n_jobs=None,
## random_state=0, shuffle=True, tol=0.001,
## validation_fraction=0.1, verbose=0, warm_start=False)
print(clf.coef_)
# Expected:
## [[-0.6543424 1.54603022 1.35361642 0.22199435]]
print(clf.intercept_)
# Expected:
## [0.63310933]
print(clf.predict([[0, 0, 0, 0]]))
# Expected:
## [1]
from sklearn.datasets import load_digits
from sklearn.decomposition import KernelPCA
X, _ = load_digits(return_X_y=True)
transformer = KernelPCA(n_components=7, kernel='linear')
X_transformed = transformer.fit_transform(X)
X_transformed.shape
# Expected:
## (1797, 7)
from sklearn.cluster import AffinityPropagation
import numpy as np
X = np.array([[1, 2], [1, 4], [1, 0],
[4, 2], [4, 4], [4, 0]])
clustering = AffinityPropagation().fit(X)
clustering
# Expected:
## AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
## damping=0.5, max_iter=200, preference=None, verbose=False)
clustering.labels_
# Expected:
## array([0, 0, 0, 1, 1, 1])
clustering.predict([[0, 0], [4, 4]])
# Expected:
## array([0, 1])
clustering.cluster_centers_
# Expected:
## array([[1, 2],
## [4, 2]])
from sklearn.cross_decomposition import CCA
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [3.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
cca = CCA(n_components=1)
cca.fit(X, Y)
# Expected:
## CCA(copy=True, max_iter=500, n_components=1, scale=True, tol=1e-06)
X_c, Y_c = cca.transform(X, Y)
from sklearn.preprocessing import KernelCenterer
from sklearn.metrics.pairwise import pairwise_kernels
X = [[ 1., -2., 2.],
[ -2., 1., 3.],
[ 4., 1., -2.]]
K = pairwise_kernels(X, metric='linear')
K
# Expected:
## array([[ 9., 2., -2.],
## [ 2., 14., -13.],
## [ -2., -13., 21.]])
transformer = KernelCenterer().fit(K)
transformer
# Expected:
## KernelCenterer()
transformer.transform(K)
# Expected:
## array([[ 5., 0., -5.],
## [ 0., 14., -14.],
## [ -5., -14., 19.]])
X = np.arange(6).reshape(3, 2)
X
# Expected:
## array([[0, 1],
## [2, 3],
## [4, 5]])
poly = PolynomialFeatures(2)
poly.fit_transform(X)
# Expected:
## array([[ 1., 0., 1., 0., 0., 1.],
## [ 1., 2., 3., 4., 6., 9.],
## [ 1., 4., 5., 16., 20., 25.]])
poly = PolynomialFeatures(interaction_only=True)
poly.fit_transform(X)
# Expected:
## array([[ 1., 0., 1., 0.],
## [ 1., 2., 3., 6.],
## [ 1., 4., 5., 20.]])
from sklearn.datasets import load_breast_cancer
from sklearn.feature_selection import SelectFdr, chi2
X, y = load_breast_cancer(return_X_y=True)
X.shape
# Expected:
## (569, 30)
X_new = SelectFdr(chi2, alpha=0.01).fit_transform(X, y)
X_new.shape
# Expected:
## (569, 16)
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectKBest, chi2
X, y = load_digits(return_X_y=True)
X.shape
# Expected:
## (1797, 64)
X_new = SelectKBest(chi2, k=20).fit_transform(X, y)
X_new.shape
# Expected:
## (1797, 20)
from sklearn.preprocessing import Normalizer
X = [[4, 1, 2, 2],
[1, 3, 9, 3],
[5, 7, 5, 1]]
transformer = Normalizer().fit(X) # fit does nothing.
transformer
# Expected:
## Normalizer(copy=True, norm='l2')
transformer.transform(X)
# Expected:
## array([[0.8, 0.2, 0.4, 0.4],
## [0.1, 0.3, 0.9, 0.3],
## [0.5, 0.7, 0.5, 0.1]])
from sklearn import datasets, svm
from sklearn.kernel_approximation import Nystroem
digits = datasets.load_digits(n_class=9)
data = digits.data / 16.
clf = svm.LinearSVC()
feature_map_nystroem = Nystroem(gamma=.2,
random_state=1,
n_components=300)
data_transformed = feature_map_nystroem.fit_transform(data)
clf.fit(data_transformed, digits.target)
# Expected:
## LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
## intercept_scaling=1, loss='squared_hinge', max_iter=1000,
## multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
## verbose=0)
clf.score(data_transformed, digits.target)
# Expected:
## 0.9987...
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
clf1 = LogisticRegression(solver='lbfgs', multi_class='multinomial',
random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
eclf1 = eclf1.fit(X, y)
print(eclf1.predict(X))
# Expected:
## [1 1 1 2 2 2]
np.array_equal(eclf1.named_estimators_.lr.predict(X),
eclf1.named_estimators_['lr'].predict(X))
# Expected:
## True
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft')
eclf2 = eclf2.fit(X, y)
print(eclf2.predict(X))
# Expected:
## [1 1 1 2 2 2]
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft', weights=[2,1,1],
flatten_transform=True)
eclf3 = eclf3.fit(X, y)
print(eclf3.predict(X))
# Expected:
## [1 1 1 2 2 2]
print(eclf3.transform(X).shape)
# Expected:
## (6, 6)
from sklearn.linear_model import LassoLarsCV
from sklearn.datasets import make_regression
X, y = make_regression(noise=4.0, random_state=0)
reg = LassoLarsCV(cv=5).fit(X, y)
reg.score(X, y)
# Expected:
## 0.9992...
reg.alpha_
# Expected:
## 0.0484...
reg.predict(X[:1,])
# Expected:
## array([-77.8723...])
import numpy as np
from sklearn.neural_network import BernoulliRBM
X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]])
model = BernoulliRBM(n_components=2)
model.fit(X)
# Expected:
## BernoulliRBM(batch_size=10, learning_rate=0.1, n_components=2, n_iter=10,
## random_state=None, verbose=0)
from sklearn.preprocessing import RobustScaler
X = [[ 1., -2., 2.],
[ -2., 1., 3.],
[ 4., 1., -2.]]
transformer = RobustScaler().fit(X)
transformer
# Expected:
## RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
## with_scaling=True)
transformer.transform(X)
# Expected:
## array([[ 0. , -2. , 0. ],
## [-1. , 0. , 0.4],
## [ 1. , 0. , -1.6]])
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
boston = load_boston()
regressor = DecisionTreeRegressor(random_state=0)
cross_val_score(regressor, boston.data, boston.target, cv=10)
# Expected:
## array([ 0.61..., 0.57..., -0.34..., 0.41..., 0.75...,
## 0.07..., 0.29..., 0.33..., -1.42..., -1.77...])
from sklearn import linear_model
reg = linear_model.Lars(n_nonzero_coefs=1)
reg.fit([[-1, 1], [0, 0], [1, 1]], [-1.1111, 0, -1.1111])
# Expected:
## Lars(copy_X=True, eps=..., fit_intercept=True, fit_path=True,
## n_nonzero_coefs=1, normalize=True, positive=False, precompute='auto',
## verbose=False)
print(reg.coef_)
# Expected:
## [ 0. -1.11...]
from sklearn.svm import SVR
import numpy as np
n_samples, n_features = 10, 5
np.random.seed(0)
y = np.random.randn(n_samples)
X = np.random.randn(n_samples, n_features)
clf = SVR(gamma='scale', C=1.0, epsilon=0.2)
clf.fit(X, y)
# Expected:
## SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale',
## kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR
X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel="linear")
selector = RFECV(estimator, step=1, cv=5)
selector = selector.fit(X, y)
selector.support_
# Expected:
## array([ True, True, True, True, True, False, False, False, False,
## False])
selector.ranking_
# Expected:
## array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
from sklearn import linear_model
clf = linear_model.BayesianRidge()
clf.fit([[0,0], [1, 1], [2, 2]], [0, 1, 2])
# Expected:
## BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False,
## copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06,
## n_iter=300, normalize=False, tol=0.001, verbose=False)
clf.predict([[1, 1]])
# Expected:
## array([1.])
from sklearn import linear_model
clf = linear_model.MultiTaskElasticNetCV(cv=3)
clf.fit([[0,0], [1, 1], [2, 2]],
[[0, 0], [1, 1], [2, 2]])
# Expected:
## MultiTaskElasticNetCV(alphas=None, copy_X=True, cv=3, eps=0.001,
## fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
## n_jobs=None, normalize=False, random_state=None, selection='cyclic',
## tol=0.0001, verbose=0)
print(clf.coef_)
# Expected:
## [[0.52875032 0.46958558]
## [0.52875032 0.46958558]]
print(clf.intercept_)
# Expected:
## [0.00166409 0.00166409]
from sklearn.cross_decomposition import PLSRegression
X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
Y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
pls2 = PLSRegression(n_components=2)
pls2.fit(X, Y)
# Expected:
## PLSRegression(copy=True, max_iter=500, n_components=2, scale=True,
## tol=1e-06)
Y_pred = pls2.predict(X)
from sklearn.feature_extraction import FeatureHasher
h = FeatureHasher(n_features=10)
D = [{'dog': 1, 'cat':2, 'elephant':4},{'dog': 2, 'run': 5}]
f = h.transform(D)
f.toarray()
# Expected:
## array([[ 0., 0., -4., -1., 0., 0., 0., 0., 0., 2.],
## [ 0., 0., 0., -2., -5., 0., 0., 0., 0., 0.]])
import numpy as np
from sklearn.linear_model import LinearRegression
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3
reg = LinearRegression().fit(X, y)
reg.score(X, y)
# Expected:
## 1.0
reg.coef_
# Expected:
## array([1., 2.])
reg.intercept_
# Expected:
## 3.0000...
reg.predict(np.array([[3, 5]]))
# Expected:
## array([16.])
from sklearn import preprocessing
lb = preprocessing.LabelBinarizer()
lb.fit([1, 2, 6, 4, 2])
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([1, 2, 4, 6])
lb.transform([1, 6])
# Expected:
## array([[1, 0, 0, 0],
## [0, 0, 0, 1]])
lb = preprocessing.LabelBinarizer()
lb.fit_transform(['yes', 'no', 'no', 'yes'])
# Expected:
## array([[1],
## [0],
## [0],
## [1]])
import numpy as np
lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
# Expected:
## LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
lb.classes_
# Expected:
## array([0, 1, 2])
lb.transform([0, 1, 2, 1])
# Expected:
## array([[1, 0, 0],
## [0, 1, 0],
## [0, 0, 1],
## [0, 1, 0]])
X = [[0], [1], [2], [3]]
y = [0, 0, 1, 1]
from sklearn.neighbors import RadiusNeighborsClassifier
neigh = RadiusNeighborsClassifier(radius=1.0)
neigh.fit(X, y)
# Expected:
## RadiusNeighborsClassifier(...)
print(neigh.predict([[1.5]]))
# Expected:
## [0]
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import RidgeClassifierCV
X, y = load_breast_cancer(return_X_y=True)
clf = RidgeClassifierCV(alphas=[1e-3, 1e-2, 1e-1, 1]).fit(X, y)
clf.score(X, y)
# Expected:
## 0.9630...