79 lines
2.6 KiB
Python
79 lines
2.6 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import torch.optim as optim
|
|
from torch.utils.data import DataLoader, TensorDataset
|
|
|
|
|
|
class MLP(nn.Module):
|
|
def __init__(self, hidden_sizes=(128, 64)):
|
|
super().__init__()
|
|
layers = []
|
|
in_size = 784
|
|
for h in hidden_sizes:
|
|
layers.append(nn.Linear(in_size, h))
|
|
layers.append(nn.ReLU())
|
|
in_size = h
|
|
layers.append(nn.Linear(in_size, 10))
|
|
self.net = nn.Sequential(*layers)
|
|
|
|
def forward(self, x):
|
|
return self.net(x)
|
|
|
|
|
|
class MLPClassifier:
|
|
def __init__(self, hidden_sizes=(128, 64), epochs=10):
|
|
self.hidden_sizes = tuple(hidden_sizes)
|
|
self.epochs = epochs
|
|
|
|
def fit(self, X, y):
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
self._device = device
|
|
|
|
X_tr = torch.tensor(X, dtype=torch.float32)
|
|
y_tr = torch.tensor(y, dtype=torch.long)
|
|
|
|
# Hold out 10% of the training data to track progress each epoch
|
|
n_val = len(X_tr) // 10
|
|
X_val, X_tr = X_tr[:n_val], X_tr[n_val:]
|
|
y_val, y_tr = y_tr[:n_val], y_tr[n_val:]
|
|
|
|
loader = DataLoader(TensorDataset(X_tr, y_tr), batch_size=64, shuffle=True)
|
|
|
|
model = MLP(hidden_sizes=self.hidden_sizes).to(device)
|
|
optimizer = optim.Adam(model.parameters(), lr=1e-3)
|
|
loss_fn = nn.CrossEntropyLoss()
|
|
|
|
print(f"\nTraining MLP (hidden_sizes={self.hidden_sizes}, epochs={self.epochs})")
|
|
for epoch in range(1, self.epochs + 1):
|
|
model.train()
|
|
total_loss = 0
|
|
for xb, yb in loader:
|
|
xb, yb = xb.to(device), yb.to(device)
|
|
optimizer.zero_grad()
|
|
loss = loss_fn(model(xb), yb)
|
|
loss.backward()
|
|
optimizer.step()
|
|
total_loss += loss.item()
|
|
|
|
model.eval()
|
|
with torch.no_grad():
|
|
val_pred = model(X_val.to(device)).argmax(dim=1).cpu()
|
|
val_accuracy = (val_pred == y_val).float().mean().item()
|
|
|
|
print(f" epoch {epoch:2d}/{self.epochs} loss={total_loss / len(loader):.3f} val_accuracy={val_accuracy:.3f}")
|
|
print()
|
|
|
|
self._model = model
|
|
return self
|
|
|
|
def predict_proba(self, X):
|
|
X_te = torch.tensor(X, dtype=torch.float32)
|
|
self._model.eval()
|
|
with torch.no_grad():
|
|
logits = self._model(X_te.to(self._device))
|
|
probabilities = torch.softmax(logits, dim=1).cpu().numpy()
|
|
return probabilities
|
|
|
|
def predict(self, X):
|
|
return self.predict_proba(X).argmax(axis=1)
|