From 255c189d2fbffe537f213c647fd7e421d30ebce2 Mon Sep 17 00:00:00 2001 From: Chris Proctor Date: Mon, 22 Jun 2026 16:08:23 -0400 Subject: [PATCH] Updates --- .commit_template | 9 +++++ cli/data.py | 4 +- cli/main.py | 22 +++++++---- cli/output.py | 7 ++++ cli/persistence.py | 16 ++++++-- cli/webcam.py | 8 +++- models/cnn.py | 14 ++++--- models/mlp.py | 91 ++++++++++++++++++++++++---------------------- questions.md | 8 +--- 9 files changed, 111 insertions(+), 68 deletions(-) create mode 100644 .commit_template diff --git a/.commit_template b/.commit_template new file mode 100644 index 0000000..ae43eb1 --- /dev/null +++ b/.commit_template @@ -0,0 +1,9 @@ + + +# ----------------------------------------------------------------- +# Write your commit message above this line. +# +# The first line should be a quick description of what you changed. +# Then leave a blank line. +# Then write a few sentences describing an idea or a question you +# have been thinking about. diff --git a/cli/data.py b/cli/data.py index 4112556..3bbf294 100644 --- a/cli/data.py +++ b/cli/data.py @@ -3,7 +3,7 @@ from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split -def load_mnist(n_train=10000, n_test=2000): +def load_mnist(n_train=10000, n_test=2000, full=False): """Load MNIST from sklearn (downloads on first run). For speed, uses a subset of the data by default. Set n_train=60000 @@ -18,6 +18,8 @@ def load_mnist(n_train=10000, n_test=2000): X = mnist.data.astype(np.float32) / 255.0 y = mnist.target.astype(int) + if full: + n_train, n_test = 60000, 10000 return train_test_split( X, y, train_size=n_train, test_size=n_test, random_state=42, stratify=y ) diff --git a/cli/main.py b/cli/main.py index b3f6c9f..6630565 100644 --- a/cli/main.py +++ b/cli/main.py @@ -10,9 +10,10 @@ Usage: digits models.mlp.MLPClassifier -a digits models.cnn.CNNClassifier --epochs 3 digits models.cnn.CNNClassifier -a 5 - digits models.cnn.CNNClassifier --save weights/cnn - digits weights/cnn - digits weights/cnn --run + digits models.cnn.CNNClassifier --save cnn + digits cnn + digits cnn --run + digits models.cnn.CNNClassifier --full """ import argparse @@ -77,8 +78,13 @@ def main(): ) parser.add_argument( "--save", - metavar="DIR", - help="After training, save the model's configuration and weights to DIR", + metavar="NAME", + help="After training, save the model to weights/NAME (e.g. --save cnn)", + ) + parser.add_argument( + "--full", + action="store_true", + help="Train on the full MNIST dataset (60,000 examples) instead of the default 10,000-example subset", ) parser.add_argument( "--run", @@ -91,7 +97,7 @@ def main(): parser.print_help() return - X_train, X_test, y_train, y_test = load_mnist() + X_train, X_test, y_train, y_test = load_mnist(full=args.full) if args.explore is not None: out.explore(X_train, y_train, args.explore) @@ -102,7 +108,7 @@ def main(): if is_saved_model(args.classifier): clf = load_model(args.classifier) - print(f"Loaded saved model from {args.classifier}\n") + print(f"Loaded saved model: {args.classifier}\n") else: clf = load_classifier( args.classifier, @@ -112,7 +118,7 @@ def main(): clf.fit(X_train, y_train) if args.save: save_model(clf, args.save) - print(f"Saved model to {args.save}\n") + print(f"Saved model: {args.save}\n") y_pred = clf.predict(X_test) diff --git a/cli/output.py b/cli/output.py index 717ac7a..056d82a 100644 --- a/cli/output.py +++ b/cli/output.py @@ -54,6 +54,13 @@ def evaluation(y_true, y_pred, clf_name): print(f" {digit}: {acc:.3f} {bar}") print() + print("Confusion matrix (row=actual, col=predicted):") + header = " " + "".join(f"{d:5d}" for d in range(10)) + print(header) + for actual, row in enumerate(cm): + print(f" {actual:3d} " + "".join(f"{v:5d}" for v in row)) + print() + def error_analysis(X, y_true, y_pred, n): errors = [ diff --git a/cli/persistence.py b/cli/persistence.py index 3d9c5b9..417eafa 100644 --- a/cli/persistence.py +++ b/cli/persistence.py @@ -3,16 +3,26 @@ import os import joblib MODEL_FILE = "model.joblib" +WEIGHTS_DIR = "weights" + + +def _resolve(name): + if name.startswith(WEIGHTS_DIR + os.sep) or name.startswith(WEIGHTS_DIR + "/"): + return name + return os.path.join(WEIGHTS_DIR, name) def is_saved_model(path): - return os.path.isdir(path) and os.path.exists(os.path.join(path, MODEL_FILE)) + directory = _resolve(path) + return os.path.isdir(directory) and os.path.exists(os.path.join(directory, MODEL_FILE)) -def save_model(clf, directory): +def save_model(clf, name): + directory = _resolve(name) os.makedirs(directory, exist_ok=True) joblib.dump(clf, os.path.join(directory, MODEL_FILE)) -def load_model(directory): +def load_model(path): + directory = _resolve(path) return joblib.load(os.path.join(directory, MODEL_FILE)) diff --git a/cli/webcam.py b/cli/webcam.py index 301484b..f1d4a36 100644 --- a/cli/webcam.py +++ b/cli/webcam.py @@ -32,6 +32,12 @@ def run(clf): print("Could not open the webcam.") return + capture.set(cv2.CAP_PROP_BUFFERSIZE, 1) + + # Discard the first several frames while the camera warms up + for _ in range(10): + capture.read() + print("Hold a handwritten digit up to the camera, inside the box.") print("Press 'q' (with the video window focused) to quit.\n") @@ -56,7 +62,7 @@ def run(clf): cv2.putText(frame, label, (left, top - 12), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0), 2) cv2.imshow(WINDOW_TITLE, frame) - if cv2.waitKey(1) & 0xFF == ord("q"): + if cv2.waitKey(30) & 0xFF == ord("q"): break except KeyboardInterrupt: pass diff --git a/models/cnn.py b/models/cnn.py index db8f75a..60ff530 100644 --- a/models/cnn.py +++ b/models/cnn.py @@ -1,3 +1,5 @@ +import time + import torch import torch.nn as nn import torch.optim as optim @@ -8,16 +10,14 @@ class CNN(nn.Module): def __init__(self): super().__init__() self.conv = nn.Sequential( - nn.Conv2d(1, 32, kernel_size=3), # 28x28 -> 26x26 + nn.Conv2d(1, 32, kernel_size=3, stride=2), # 28x28 -> 13x13 nn.ReLU(), - nn.MaxPool2d(2), # 26x26 -> 13x13 - nn.Conv2d(32, 64, kernel_size=3), # 13x13 -> 11x11 + nn.Conv2d(32, 64, kernel_size=3, stride=2), # 13x13 -> 6x6 nn.ReLU(), - nn.MaxPool2d(2), # 11x11 -> 5x5 ) self.fc = nn.Sequential( nn.Flatten(), - nn.Linear(64 * 5 * 5, 128), + nn.Linear(64 * 6 * 6, 128), nn.ReLU(), nn.Linear(128, 10), ) @@ -51,6 +51,7 @@ class CNNClassifier: print(f"\nTraining CNN (epochs={self.epochs})") for epoch in range(1, self.epochs + 1): + t0 = time.time() model.train() total_loss = 0 for xb, yb in loader: @@ -66,7 +67,8 @@ class CNNClassifier: val_pred = model(X_val.to(device)).argmax(dim=1).cpu() val_accuracy = (val_pred == y_val).float().mean().item() - print(f" epoch {epoch:2d}/{self.epochs} loss={total_loss / len(loader):.3f} val_accuracy={val_accuracy:.3f}") + elapsed = time.time() - t0 + print(f" epoch {epoch:2d}/{self.epochs} loss={total_loss / len(loader):.3f} val_accuracy={val_accuracy:.3f} {elapsed:.1f}s") print() self._model = model diff --git a/models/mlp.py b/models/mlp.py index 2bbfc36..4d5a91e 100644 --- a/models/mlp.py +++ b/models/mlp.py @@ -1,3 +1,5 @@ +import time + import torch import torch.nn as nn import torch.optim as optim @@ -8,16 +10,16 @@ class MLP(nn.Module): def __init__(self, hidden_sizes=(128, 64)): super().__init__() layers = [] - in_size = 784 - for h in hidden_sizes: - layers.append(nn.Linear(in_size, h)) + input_size = 784 + for hidden_size in hidden_sizes: + layers.append(nn.Linear(input_size, hidden_size)) layers.append(nn.ReLU()) - in_size = h - layers.append(nn.Linear(in_size, 10)) + input_size = hidden_size + layers.append(nn.Linear(input_size, 10)) self.net = nn.Sequential(*layers) - def forward(self, x): - return self.net(x) + def forward(self, pixels): + return self.net(pixels) class MLPClassifier: @@ -26,53 +28,56 @@ class MLPClassifier: self.epochs = epochs def fit(self, X, y): - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self._device = device + self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self._model = MLP(hidden_sizes=self.hidden_sizes).to(self._device) - X_tr = torch.tensor(X, dtype=torch.float32) - y_tr = torch.tensor(y, dtype=torch.long) + images = torch.tensor(X, dtype=torch.float32) + labels = torch.tensor(y, dtype=torch.long) + train_images, train_labels, val_images, val_labels = self._split(images, labels) - # Hold out 10% of the training data to track progress each epoch - n_val = len(X_tr) // 10 - X_val, X_tr = X_tr[:n_val], X_tr[n_val:] - y_val, y_tr = y_tr[:n_val], y_tr[n_val:] - - loader = DataLoader(TensorDataset(X_tr, y_tr), batch_size=64, shuffle=True) - - model = MLP(hidden_sizes=self.hidden_sizes).to(device) - optimizer = optim.Adam(model.parameters(), lr=1e-3) + batches = DataLoader(TensorDataset(train_images, train_labels), batch_size=64, shuffle=True) + optimizer = optim.Adam(self._model.parameters(), lr=1e-3) loss_fn = nn.CrossEntropyLoss() print(f"\nTraining MLP (hidden_sizes={self.hidden_sizes}, epochs={self.epochs})") for epoch in range(1, self.epochs + 1): - model.train() - total_loss = 0 - for xb, yb in loader: - xb, yb = xb.to(device), yb.to(device) - optimizer.zero_grad() - loss = loss_fn(model(xb), yb) - loss.backward() - optimizer.step() - total_loss += loss.item() - - model.eval() - with torch.no_grad(): - val_pred = model(X_val.to(device)).argmax(dim=1).cpu() - val_accuracy = (val_pred == y_val).float().mean().item() - - print(f" epoch {epoch:2d}/{self.epochs} loss={total_loss / len(loader):.3f} val_accuracy={val_accuracy:.3f}") + t0 = time.time() + avg_loss = self._train_one_epoch(batches, optimizer, loss_fn) + val_accuracy = self._accuracy(val_images, val_labels) + elapsed = time.time() - t0 + print(f" epoch {epoch:2d}/{self.epochs} loss={avg_loss:.3f} val_accuracy={val_accuracy:.3f} {elapsed:.1f}s") print() - - self._model = model return self - def predict_proba(self, X): - X_te = torch.tensor(X, dtype=torch.float32) + def _split(self, images, labels): + n_val = len(images) // 10 + return images[n_val:], labels[n_val:], images[:n_val], labels[:n_val] + + def _train_one_epoch(self, batches, optimizer, loss_fn): + self._model.train() + total_loss = 0 + for image_batch, label_batch in batches: + image_batch = image_batch.to(self._device) + label_batch = label_batch.to(self._device) + optimizer.zero_grad() + loss = loss_fn(self._model(image_batch), label_batch) + loss.backward() + optimizer.step() + total_loss += loss.item() + return total_loss / len(batches) + + def _accuracy(self, images, labels): self._model.eval() with torch.no_grad(): - logits = self._model(X_te.to(self._device)) - probabilities = torch.softmax(logits, dim=1).cpu().numpy() - return probabilities + predictions = self._model(images.to(self._device)).argmax(dim=1).cpu() + return (predictions == labels).float().mean().item() + + def predict_proba(self, X): + images = torch.tensor(X, dtype=torch.float32) + self._model.eval() + with torch.no_grad(): + logits = self._model(images.to(self._device)) + return torch.softmax(logits, dim=1).cpu().numpy() def predict(self, X): return self.predict_proba(X).argmax(axis=1) diff --git a/questions.md b/questions.md index 76388a8..9693f4b 100644 --- a/questions.md +++ b/questions.md @@ -106,13 +106,9 @@ Output layer: _____ neurons (one per digit) ``` Input: ___x___x___ (height × width × channels) ↓ -Conv layer 1: ___ filters, ___x___ kernel → output: ___x___x___ +Conv layer 1: ___ filters, ___x___ kernel, stride ___ → output: ___x___x___ ↓ -Pooling: ___x___ max pool → output: ___x___x___ -↓ -Conv layer 2: ___ filters, ___x___ kernel → output: ___x___x___ -↓ -Pooling: ___x___ max pool → output: ___x___x___ +Conv layer 2: ___ filters, ___x___ kernel, stride ___ → output: ___x___x___ ↓ Flatten: _____ values ↓