Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
21c8029
Trying out multioutput PCovC
rvasav26 Jun 1, 2025
87a3091
Furthering multioutput support for decision_function
rvasav26 Jun 4, 2025
90277a3
Starting on docstrings
rvasav26 Jun 10, 2025
a7ea950
Score function and tests
rvasav26 Jun 22, 2025
71cfc19
Fixing _version.py tracking
rvasav26 Jun 23, 2025
febab6d
Continuing multiouput work
rvasav26 Jun 25, 2025
880aa65
Cleaning things up and adding more tests
rvasav26 Jun 27, 2025
d0dc35c
Adding multioutput support for KPCovC
rvasav26 Jun 30, 2025
aef9d91
Fixes after rebase
rvasav26 Aug 26, 2025
02a9edf
Remembering to add TODOs after last week conversation
rvasav26 Sep 4, 2025
e008a65
Adding example
Sep 11, 2025
9957180
Adding `score` methods for multiclass-multilabel problems
Sep 12, 2025
9276860
Fix typo
Sep 12, 2025
31abefb
Making linter happy
Sep 12, 2025
d0e1adc
Example touch-ups
Sep 12, 2025
c308bf2
Linting again
Sep 12, 2025
c20262c
New tests
Sep 15, 2025
1ff4fed
Fixing ptz and pxz for multioutput
Sep 15, 2025
7b345f3
fix linting
Sep 15, 2025
c00b77a
Merge remote-tracking branch 'origin/main' into multioutput-pcovc
Oct 3, 2025
35c1b0f
fixing docs
Oct 6, 2025
13d266a
Merge branch 'main' into multioutput-pcovc
cajchristian Oct 6, 2025
ff58c8d
Deleting duplicate scale_z
cajchristian Oct 6, 2025
5fdba2a
Deleting duplicate again
Oct 6, 2025
314e6d3
Deleting another duplicate
Oct 6, 2025
cdb33b5
Fixing a test with kpcovc
Oct 6, 2025
701b6ca
Fixing opacity in example
Oct 6, 2025
156fa22
Fix linting
Oct 6, 2025
861a66a
Modifying pyproject.toml
Nov 20, 2025
72dad2d
Reverting pyproject.toml
Nov 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions examples/pcovc/KPCovC_Comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@

mixing = 0.5
alpha_d = 0.5
alpha_p = 0.4
alpha_train = 0.2
alpha_test = 0.8

models = {
PCA(n_components=n_components): "PCA",
Expand All @@ -107,8 +108,10 @@
t_train = model.fit_transform(X_train_scaled, y_train)
t_test = model.transform(X_test_scaled)

ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_p, cmap=cm_bright, c=y_test)
ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train)
ax.scatter(
t_train[:, 0], t_train[:, 1], alpha=alpha_train, cmap=cm_bright, c=y_train
)
ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_test, cmap=cm_bright, c=y_test)

ax.set_title(models[model])
plt.tight_layout()
Expand Down Expand Up @@ -166,8 +169,10 @@
eps=models[model]["eps"],
grid_resolution=resolution,
)
ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_p, cmap=cm_bright, c=y_test)
ax.scatter(t_train[:, 0], t_train[:, 1], cmap=cm_bright, c=y_train)
ax.scatter(
t_train[:, 0], t_train[:, 1], alpha=alpha_train, cmap=cm_bright, c=y_train
)
ax.scatter(t_test[:, 0], t_test[:, 1], alpha=alpha_test, cmap=cm_bright, c=y_test)
ax.set_title(models[model]["title"])

ax.text(
Expand Down Expand Up @@ -241,14 +246,22 @@
grid_resolution=resolution,
)

ax.scatter(
t_kpcovc_train[:, 0],
t_kpcovc_train[:, 1],
alpha=alpha_train,
cmap=cm_bright,
c=y_train,
)

ax.scatter(
t_kpcovc_test[:, 0],
t_kpcovc_test[:, 1],
cmap=cm_bright,
alpha=alpha_p,
alpha=alpha_test,
c=y_test,
)
ax.scatter(t_kpcovc_train[:, 0], t_kpcovc_train[:, 1], cmap=cm_bright, c=y_train)

ax.text(
0.70,
0.03,
Expand Down
131 changes: 131 additions & 0 deletions examples/pcovc/PCovC_multioutput.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#!/usr/bin/env python
# coding: utf-8

"""
Multioutput PCovC
=================
"""
# %%
#

import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegressionCV
from sklearn.multioutput import MultiOutputClassifier

from skmatter.decomposition import PCovC

plt.rcParams["image.cmap"] = "tab10"
plt.rcParams["scatter.edgecolors"] = "k"
# %%
# For this, we will use the `sklearn.datasets.load_digits` dataset.
# This dataset contains 8x8 images of handwritten digits (0-9).
X, y = load_digits(return_X_y=True)
x_scaler = StandardScaler()
X_scaled = StandardScaler().fit_transform(X)

np.unique(y)
# %%
# Let's begin by trying to make a PCovC map to separate the digits.
# This is a one-label, ten-class classification problem.
pca = PCA(n_components=2)
T_pca = pca.fit_transform(X_scaled, y)

pcovc = PCovC(n_components=2, mixing=0.5)
T_pcovc = pcovc.fit_transform(X_scaled, y)

fig, axs = plt.subplots(1, 2, figsize=(10, 6))

scat_pca = axs[0].scatter(T_pca[:, 0], T_pca[:, 1], c=y)
scat_pcovc = axs[1].scatter(T_pcovc[:, 0], T_pcovc[:, 1], c=y)
fig.colorbar(scat_pca, ax=axs, orientation="horizontal")
fig.suptitle("Multiclass PCovC with One Label")

# %%
# Next, let's try a two-label classification problem, with both labels
# being binary classification tasks.

is_even = (y % 2).reshape(-1, 1)
is_less_than_five = (y < 5).reshape(-1, 1)

y2 = np.hstack([is_even, is_less_than_five])
y2.shape
# %%
# Here, we can build a map that considers both of these labels simultaneously.

clf = MultiOutputClassifier(estimator=LogisticRegressionCV())
pcovc = PCovC(n_components=2, mixing=0.5, classifier=clf)

T_pcovc = pcovc.fit_transform(X_scaled, y2)

fig, axs = plt.subplots(2, 3, figsize=(15, 10))
cmap1 = "Set1"
cmap2 = "Set2"
cmap3 = "tab10"

labels_list = [["Even", "Odd"], [">= 5", "< 5"]]

for i, c, cmap in zip(range(3), [is_even, is_less_than_five, y], [cmap1, cmap2, cmap3]):
scat_pca = axs[0, i].scatter(T_pca[:, 0], T_pca[:, 1], c=c, cmap=cmap)
axs[1, i].scatter(T_pcovc[:, 0], T_pcovc[:, 1], c=c, cmap=cmap)

if i == 0 or i == 1:
handles, _ = scat_pca.legend_elements()
labels = labels_list[i]
axs[0, i].legend(handles, labels)

axs[0, 0].set_title("Even/Odd")
axs[0, 1].set_title("Greater/Less than 5")
axs[0, 2].set_title("Digit")

axs[0, 0].set_ylabel("PCA")
axs[1, 0].set_ylabel("PCovC")
fig.colorbar(scat_pca, ax=axs, orientation="horizontal")
fig.suptitle("Multilabel PCovC with Binary Labels")
# %%
# Let's try a more complicated example:

num_holes = np.array(
[0 if i in [1, 2, 3, 5, 7] else 1 if i in [0, 4, 6, 9] else 2 for i in y]
).reshape(-1, 1)

y3 = np.hstack([is_even, num_holes])
# %%
# Now, we have a two-label classification
# problem, with one binary label and one label with three
# possible classes.
clf = MultiOutputClassifier(estimator=LogisticRegressionCV())
pcovc = PCovC(n_components=2, mixing=0.5, classifier=clf)

T_pcovc = pcovc.fit_transform(X_scaled, y3)

fig, axs = plt.subplots(2, 3, figsize=(15, 10))
cmap1 = "Set1"
cmap2 = "Set3"
cmap3 = "tab10"

labels_list = [["Even", "Odd"], ["0", "1", "2"]]

for i, c, cmap in zip(range(3), [is_even, num_holes, y], [cmap1, cmap2, cmap3]):
scat_pca = axs[0, i].scatter(T_pca[:, 0], T_pca[:, 1], c=c, cmap=cmap)
axs[1, i].scatter(T_pcovc[:, 0], T_pcovc[:, 1], c=c, cmap=cmap)

if i == 0 or i == 1:
handles, _ = scat_pca.legend_elements()
labels = labels_list[i]
axs[0, i].legend(handles, labels)

axs[0, 0].set_title("Even/Odd")
axs[0, 1].set_title("Number of Holes")
axs[0, 2].set_title("Digit")

axs[0, 0].set_ylabel("PCA")
axs[1, 0].set_ylabel("PCovC")
fig.colorbar(scat_pca, ax=axs, orientation="horizontal")
fig.suptitle("Multiclass-Multilabel PCovC")

# %%
Loading