Skip to content

Commit fbb643d

Browse files
committed
Pushing the docs to dev/ for branch: main, commit c5aa12b68c59f01eba50ef64329081f8163342ce
1 parent f73a13b commit fbb643d

File tree

1,344 files changed

+7184
-7066
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,344 files changed

+7184
-7066
lines changed

dev/.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 01e9c99c2249d7137cd4f880cabae922
3+
config: 66f716243d05619d0b1b890cf46a456c
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

dev/_downloads/07960f9087d379e9d0da6350d6ee3f41/plot_classification_probability.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"\n# Plot classification probability\n\nPlot the classification probability for different classifiers. We use a 3 class\ndataset, and we classify it with a Support Vector classifier, L1 and L2\npenalized logistic regression with either a One-Vs-Rest or multinomial setting,\nand Gaussian process classification.\n\nLinear SVC is not a probabilistic classifier by default but it has a built-in\ncalibration option enabled in this example (`probability=True`).\n\nThe logistic regression with One-Vs-Rest is not a multiclass classifier out of\nthe box. As a result it has more trouble in separating class 2 and 3 than the\nother estimators.\n"
7+
"\n# Plot classification probability\n\nPlot the classification probability for different classifiers. We use a 3 class\ndataset, and we classify it with a Support Vector classifier, L1 and L2\npenalized logistic regression (multinomial multiclass), a One-Vs-Rest version with\nlogistic regression, and Gaussian process classification.\n\nLinear SVC is not a probabilistic classifier by default but it has a built-in\ncalibration option enabled in this example (`probability=True`).\n\nThe logistic regression with One-Vs-Rest is not a multiclass classifier out of\nthe box. As a result it has more trouble in separating class 2 and 3 than the\nother estimators.\n"
88
]
99
},
1010
{
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom matplotlib import cm\n\nfrom sklearn import datasets\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.svm import SVC\n\niris = datasets.load_iris()\nX = iris.data[:, 0:2] # we only take the first two features for visualization\ny = iris.target\n\nn_features = X.shape[1]\n\nC = 10\nkernel = 1.0 * RBF([1.0, 1.0]) # for GPC\n\n# Create different classifiers.\nclassifiers = {\n \"L1 logistic\": LogisticRegression(\n C=C, penalty=\"l1\", solver=\"saga\", multi_class=\"multinomial\", max_iter=10000\n ),\n \"L2 logistic (Multinomial)\": LogisticRegression(\n C=C, penalty=\"l2\", solver=\"saga\", multi_class=\"multinomial\", max_iter=10000\n ),\n \"L2 logistic (OvR)\": LogisticRegression(\n C=C, penalty=\"l2\", solver=\"saga\", multi_class=\"ovr\", max_iter=10000\n ),\n \"Linear SVC\": SVC(kernel=\"linear\", C=C, probability=True, random_state=0),\n \"GPC\": GaussianProcessClassifier(kernel),\n}\n\nn_classifiers = len(classifiers)\n\nfig, axes = plt.subplots(\n nrows=n_classifiers,\n ncols=len(iris.target_names),\n figsize=(3 * 2, n_classifiers * 2),\n)\nfor classifier_idx, (name, classifier) in enumerate(classifiers.items()):\n y_pred = classifier.fit(X, y).predict(X)\n accuracy = accuracy_score(y, y_pred)\n print(f\"Accuracy (train) for {name}: {accuracy:0.1%}\")\n for label in np.unique(y):\n # plot the probability estimate provided by the classifier\n disp = DecisionBoundaryDisplay.from_estimator(\n classifier,\n X,\n response_method=\"predict_proba\",\n class_of_interest=label,\n ax=axes[classifier_idx, label],\n vmin=0,\n vmax=1,\n )\n axes[classifier_idx, label].set_title(f\"Class {label}\")\n # plot data predicted to belong to given class\n mask_y_pred = y_pred == label\n axes[classifier_idx, label].scatter(\n X[mask_y_pred, 0], X[mask_y_pred, 1], marker=\"o\", c=\"w\", edgecolor=\"k\"\n )\n axes[classifier_idx, label].set(xticks=(), yticks=())\n axes[classifier_idx, 0].set_ylabel(name)\n\nax = plt.axes([0.15, 0.04, 0.7, 0.02])\nplt.title(\"Probability\")\n_ = plt.colorbar(\n cm.ScalarMappable(norm=None, cmap=\"viridis\"), cax=ax, orientation=\"horizontal\"\n)\n\nplt.show()"
18+
"# Author: Alexandre Gramfort <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom matplotlib import cm\n\nfrom sklearn import datasets\nfrom sklearn.gaussian_process import GaussianProcessClassifier\nfrom sklearn.gaussian_process.kernels import RBF\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.multiclass import OneVsRestClassifier\nfrom sklearn.svm import SVC\n\niris = datasets.load_iris()\nX = iris.data[:, 0:2] # we only take the first two features for visualization\ny = iris.target\n\nn_features = X.shape[1]\n\nC = 10\nkernel = 1.0 * RBF([1.0, 1.0]) # for GPC\n\n# Create different classifiers.\nclassifiers = {\n \"L1 logistic\": LogisticRegression(C=C, penalty=\"l1\", solver=\"saga\", max_iter=10000),\n \"L2 logistic (Multinomial)\": LogisticRegression(\n C=C, penalty=\"l2\", solver=\"saga\", max_iter=10000\n ),\n \"L2 logistic (OvR)\": OneVsRestClassifier(\n LogisticRegression(C=C, penalty=\"l2\", solver=\"saga\", max_iter=10000)\n ),\n \"Linear SVC\": SVC(kernel=\"linear\", C=C, probability=True, random_state=0),\n \"GPC\": GaussianProcessClassifier(kernel),\n}\n\nn_classifiers = len(classifiers)\n\nfig, axes = plt.subplots(\n nrows=n_classifiers,\n ncols=len(iris.target_names),\n figsize=(3 * 2, n_classifiers * 2),\n)\nfor classifier_idx, (name, classifier) in enumerate(classifiers.items()):\n y_pred = classifier.fit(X, y).predict(X)\n accuracy = accuracy_score(y, y_pred)\n print(f\"Accuracy (train) for {name}: {accuracy:0.1%}\")\n for label in np.unique(y):\n # plot the probability estimate provided by the classifier\n disp = DecisionBoundaryDisplay.from_estimator(\n classifier,\n X,\n response_method=\"predict_proba\",\n class_of_interest=label,\n ax=axes[classifier_idx, label],\n vmin=0,\n vmax=1,\n )\n axes[classifier_idx, label].set_title(f\"Class {label}\")\n # plot data predicted to belong to given class\n mask_y_pred = y_pred == label\n axes[classifier_idx, label].scatter(\n X[mask_y_pred, 0], X[mask_y_pred, 1], marker=\"o\", c=\"w\", edgecolor=\"k\"\n )\n axes[classifier_idx, label].set(xticks=(), yticks=())\n axes[classifier_idx, 0].set_ylabel(name)\n\nax = plt.axes([0.15, 0.04, 0.7, 0.02])\nplt.title(\"Probability\")\n_ = plt.colorbar(\n cm.ScalarMappable(norm=None, cmap=\"viridis\"), cax=ax, orientation=\"horizontal\"\n)\n\nplt.show()"
1919
]
2020
}
2121
],
Binary file not shown.

dev/_downloads/4c1663175b07cf9608b07331aa180eb7/plot_logistic_multinomial.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Authors: Tom Dupre la Tour <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.linear_model import LogisticRegression\n\n# make 3-class dataset for classification\ncenters = [[-5, 0], [0, 1.5], [5, -1]]\nX, y = make_blobs(n_samples=1000, centers=centers, random_state=40)\ntransformation = [[0.4, 0.2], [-0.4, 1.2]]\nX = np.dot(X, transformation)\n\nfor multi_class in (\"multinomial\", \"ovr\"):\n clf = LogisticRegression(\n solver=\"sag\", max_iter=100, random_state=42, multi_class=multi_class\n ).fit(X, y)\n\n # print the training scores\n print(\"training score : %.3f (%s)\" % (clf.score(X, y), multi_class))\n\n _, ax = plt.subplots()\n DecisionBoundaryDisplay.from_estimator(\n clf, X, response_method=\"predict\", cmap=plt.cm.Paired, ax=ax\n )\n plt.title(\"Decision surface of LogisticRegression (%s)\" % multi_class)\n plt.axis(\"tight\")\n\n # Plot also the training points\n colors = \"bry\"\n for i, color in zip(clf.classes_, colors):\n idx = np.where(y == i)\n plt.scatter(\n X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor=\"black\", s=20\n )\n\n # Plot the three one-against-all classifiers\n xmin, xmax = plt.xlim()\n ymin, ymax = plt.ylim()\n coef = clf.coef_\n intercept = clf.intercept_\n\n def plot_hyperplane(c, color):\n def line(x0):\n return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]\n\n plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls=\"--\", color=color)\n\n for i, color in zip(clf.classes_, colors):\n plot_hyperplane(i, color)\n\nplt.show()"
18+
"# Authors: Tom Dupre la Tour <[email protected]>\n# License: BSD 3 clause\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import make_blobs\nfrom sklearn.inspection import DecisionBoundaryDisplay\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.multiclass import OneVsRestClassifier\n\n# make 3-class dataset for classification\ncenters = [[-5, 0], [0, 1.5], [5, -1]]\nX, y = make_blobs(n_samples=1000, centers=centers, random_state=40)\ntransformation = [[0.4, 0.2], [-0.4, 1.2]]\nX = np.dot(X, transformation)\n\nfor multi_class in (\"multinomial\", \"ovr\"):\n clf = LogisticRegression(solver=\"sag\", max_iter=100, random_state=42)\n if multi_class == \"ovr\":\n clf = OneVsRestClassifier(clf)\n clf.fit(X, y)\n\n # print the training scores\n print(\"training score : %.3f (%s)\" % (clf.score(X, y), multi_class))\n\n _, ax = plt.subplots()\n DecisionBoundaryDisplay.from_estimator(\n clf, X, response_method=\"predict\", cmap=plt.cm.Paired, ax=ax\n )\n plt.title(\"Decision surface of LogisticRegression (%s)\" % multi_class)\n plt.axis(\"tight\")\n\n # Plot also the training points\n colors = \"bry\"\n for i, color in zip(clf.classes_, colors):\n idx = np.where(y == i)\n plt.scatter(\n X[idx, 0], X[idx, 1], c=color, cmap=plt.cm.Paired, edgecolor=\"black\", s=20\n )\n\n # Plot the three one-against-all classifiers\n xmin, xmax = plt.xlim()\n ymin, ymax = plt.ylim()\n if multi_class == \"ovr\":\n coef = np.concatenate([est.coef_ for est in clf.estimators_])\n intercept = np.concatenate([est.intercept_ for est in clf.estimators_])\n else:\n coef = clf.coef_\n intercept = clf.intercept_\n\n def plot_hyperplane(c, color):\n def line(x0):\n return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1]\n\n plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls=\"--\", color=color)\n\n for i, color in zip(clf.classes_, colors):\n plot_hyperplane(i, color)\n\nplt.show()"
1919
]
2020
}
2121
],

dev/_downloads/583de4ea98c6544c52ea4c57e62b1813/plot_sparse_logistic_regression_20newsgroups.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
},
1616
"outputs": [],
1717
"source": [
18-
"# Author: Arthur Mensch\n\nimport timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning, module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = \"saga\"\n\n# Turn down for faster run time\nn_samples = 5000\n\nX, y = fetch_20newsgroups_vectorized(subset=\"all\", return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, random_state=42, stratify=y, test_size=0.1\n)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint(\n \"Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i\"\n % (train_samples, n_features, n_classes)\n)\n\nmodels = {\n \"ovr\": {\"name\": \"One versus Rest\", \"iters\": [1, 2, 3]},\n \"multinomial\": {\"name\": \"Multinomial\", \"iters\": [1, 2, 5]},\n}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params[\"iters\"]:\n print(\n \"[model=%s, solver=%s] Number of epochs: %s\"\n % (model_params[\"name\"], solver, this_max_iter)\n )\n lr = LogisticRegression(\n solver=solver,\n multi_class=model,\n penalty=\"l1\",\n max_iter=this_max_iter,\n random_state=42,\n )\n t1 = timeit.default_timer()\n lr.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = lr.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n density = np.mean(lr.coef_ != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model][\"times\"] = times\n models[model][\"densities\"] = densities\n models[model][\"accuracies\"] = accuracies\n print(\"Test accuracy for model %s: %.4f\" % (model, accuracies[-1]))\n print(\n \"%% non-zero coefficients for model %s, per class:\\n %s\"\n % (model, densities[-1])\n )\n print(\n \"Run time (%i epochs) for model %s:%.2f\"\n % (model_params[\"iters\"][-1], model, times[-1])\n )\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model][\"name\"]\n times = models[model][\"times\"]\n accuracies = models[model][\"accuracies\"]\n ax.plot(times, accuracies, marker=\"o\", label=\"Model: %s\" % name)\n ax.set_xlabel(\"Train time (s)\")\n ax.set_ylabel(\"Test accuracy\")\nax.legend()\nfig.suptitle(\"Multinomial vs One-vs-Rest Logistic L1\\nDataset %s\" % \"20newsgroups\")\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint(\"Example run in %.3f s\" % run_time)\nplt.show()"
18+
"# Author: Arthur Mensch\n\nimport timeit\nimport warnings\n\nimport matplotlib.pyplot as plt\nimport numpy as np\n\nfrom sklearn.datasets import fetch_20newsgroups_vectorized\nfrom sklearn.exceptions import ConvergenceWarning\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.multiclass import OneVsRestClassifier\n\nwarnings.filterwarnings(\"ignore\", category=ConvergenceWarning, module=\"sklearn\")\nt0 = timeit.default_timer()\n\n# We use SAGA solver\nsolver = \"saga\"\n\n# Turn down for faster run time\nn_samples = 5000\n\nX, y = fetch_20newsgroups_vectorized(subset=\"all\", return_X_y=True)\nX = X[:n_samples]\ny = y[:n_samples]\n\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, random_state=42, stratify=y, test_size=0.1\n)\ntrain_samples, n_features = X_train.shape\nn_classes = np.unique(y).shape[0]\n\nprint(\n \"Dataset 20newsgroup, train_samples=%i, n_features=%i, n_classes=%i\"\n % (train_samples, n_features, n_classes)\n)\n\nmodels = {\n \"ovr\": {\"name\": \"One versus Rest\", \"iters\": [1, 2, 3]},\n \"multinomial\": {\"name\": \"Multinomial\", \"iters\": [1, 2, 5]},\n}\n\nfor model in models:\n # Add initial chance-level values for plotting purpose\n accuracies = [1 / n_classes]\n times = [0]\n densities = [1]\n\n model_params = models[model]\n\n # Small number of epochs for fast runtime\n for this_max_iter in model_params[\"iters\"]:\n print(\n \"[model=%s, solver=%s] Number of epochs: %s\"\n % (model_params[\"name\"], solver, this_max_iter)\n )\n clf = LogisticRegression(\n solver=solver,\n penalty=\"l1\",\n max_iter=this_max_iter,\n random_state=42,\n )\n if model == \"ovr\":\n clf = OneVsRestClassifier(clf)\n t1 = timeit.default_timer()\n clf.fit(X_train, y_train)\n train_time = timeit.default_timer() - t1\n\n y_pred = clf.predict(X_test)\n accuracy = np.sum(y_pred == y_test) / y_test.shape[0]\n if model == \"ovr\":\n coef = np.concatenate([est.coef_ for est in clf.estimators_])\n else:\n coef = clf.coef_\n density = np.mean(coef != 0, axis=1) * 100\n accuracies.append(accuracy)\n densities.append(density)\n times.append(train_time)\n models[model][\"times\"] = times\n models[model][\"densities\"] = densities\n models[model][\"accuracies\"] = accuracies\n print(\"Test accuracy for model %s: %.4f\" % (model, accuracies[-1]))\n print(\n \"%% non-zero coefficients for model %s, per class:\\n %s\"\n % (model, densities[-1])\n )\n print(\n \"Run time (%i epochs) for model %s:%.2f\"\n % (model_params[\"iters\"][-1], model, times[-1])\n )\n\nfig = plt.figure()\nax = fig.add_subplot(111)\n\nfor model in models:\n name = models[model][\"name\"]\n times = models[model][\"times\"]\n accuracies = models[model][\"accuracies\"]\n ax.plot(times, accuracies, marker=\"o\", label=\"Model: %s\" % name)\n ax.set_xlabel(\"Train time (s)\")\n ax.set_ylabel(\"Test accuracy\")\nax.legend()\nfig.suptitle(\"Multinomial vs One-vs-Rest Logistic L1\\nDataset %s\" % \"20newsgroups\")\nfig.tight_layout()\nfig.subplots_adjust(top=0.85)\nrun_time = timeit.default_timer() - t0\nprint(\"Example run in %.3f s\" % run_time)\nplt.show()"
1919
]
2020
}
2121
],
Binary file not shown.

dev/_downloads/7e30d5b899fc588cbc75553c65cb76ed/plot_sparse_logistic_regression_20newsgroups.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from sklearn.exceptions import ConvergenceWarning
3333
from sklearn.linear_model import LogisticRegression
3434
from sklearn.model_selection import train_test_split
35+
from sklearn.multiclass import OneVsRestClassifier
3536

3637
warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
3738
t0 = timeit.default_timer()
@@ -76,20 +77,25 @@
7677
"[model=%s, solver=%s] Number of epochs: %s"
7778
% (model_params["name"], solver, this_max_iter)
7879
)
79-
lr = LogisticRegression(
80+
clf = LogisticRegression(
8081
solver=solver,
81-
multi_class=model,
8282
penalty="l1",
8383
max_iter=this_max_iter,
8484
random_state=42,
8585
)
86+
if model == "ovr":
87+
clf = OneVsRestClassifier(clf)
8688
t1 = timeit.default_timer()
87-
lr.fit(X_train, y_train)
89+
clf.fit(X_train, y_train)
8890
train_time = timeit.default_timer() - t1
8991

90-
y_pred = lr.predict(X_test)
92+
y_pred = clf.predict(X_test)
9193
accuracy = np.sum(y_pred == y_test) / y_test.shape[0]
92-
density = np.mean(lr.coef_ != 0, axis=1) * 100
94+
if model == "ovr":
95+
coef = np.concatenate([est.coef_ for est in clf.estimators_])
96+
else:
97+
coef = clf.coef_
98+
density = np.mean(coef != 0, axis=1) * 100
9399
accuracies.append(accuracy)
94100
densities.append(density)
95101
times.append(train_time)

0 commit comments

Comments
 (0)