From 255c189d2fbffe537f213c647fd7e421d30ebce2 Mon Sep 17 00:00:00 2001
From: Chris Proctor <chrisp@buffalo.edu>
Date: Mon, 22 Jun 2026 16:08:23 -0400
Subject: [PATCH] Updates

---
 .commit_template   |  9 +++++
 cli/data.py        |  4 +-
 cli/main.py        | 22 +++++++----
 cli/output.py      |  7 ++++
 cli/persistence.py | 16 ++++++--
 cli/webcam.py      |  8 +++-
 models/cnn.py      | 14 ++++---
 models/mlp.py      | 91 ++++++++++++++++++++++++----------------------
 questions.md       |  8 +---
 9 files changed, 111 insertions(+), 68 deletions(-)
 create mode 100644 .commit_template

diff --git a/.commit_template b/.commit_template
new file mode 100644
index 0000000..ae43eb1
--- /dev/null
+++ b/.commit_template
@@ -0,0 +1,9 @@
+
+
+# -----------------------------------------------------------------
+# Write your commit message above this line.
+# 
+# The first line should be a quick description of what you changed.
+# Then leave a blank line. 
+# Then write a few sentences describing an idea or a question you 
+# have been thinking about. 
diff --git a/cli/data.py b/cli/data.py
index 4112556..3bbf294 100644
--- a/cli/data.py
+++ b/cli/data.py
@@ -3,7 +3,7 @@ from sklearn.datasets import fetch_openml
 from sklearn.model_selection import train_test_split
 
 
-def load_mnist(n_train=10000, n_test=2000):
+def load_mnist(n_train=10000, n_test=2000, full=False):
     """Load MNIST from sklearn (downloads on first run).
 
     For speed, uses a subset of the data by default. Set n_train=60000
@@ -18,6 +18,8 @@ def load_mnist(n_train=10000, n_test=2000):
     X = mnist.data.astype(np.float32) / 255.0
     y = mnist.target.astype(int)
 
+    if full:
+        n_train, n_test = 60000, 10000
     return train_test_split(
         X, y, train_size=n_train, test_size=n_test, random_state=42, stratify=y
     )
diff --git a/cli/main.py b/cli/main.py
index b3f6c9f..6630565 100644
--- a/cli/main.py
+++ b/cli/main.py
@@ -10,9 +10,10 @@ Usage:
     digits models.mlp.MLPClassifier -a
     digits models.cnn.CNNClassifier --epochs 3
     digits models.cnn.CNNClassifier -a 5
-    digits models.cnn.CNNClassifier --save weights/cnn
-    digits weights/cnn
-    digits weights/cnn --run
+    digits models.cnn.CNNClassifier --save cnn
+    digits cnn
+    digits cnn --run
+    digits models.cnn.CNNClassifier --full
 """
 
 import argparse
@@ -77,8 +78,13 @@ def main():
     )
     parser.add_argument(
         "--save",
-        metavar="DIR",
-        help="After training, save the model's configuration and weights to DIR",
+        metavar="NAME",
+        help="After training, save the model to weights/NAME (e.g. --save cnn)",
+    )
+    parser.add_argument(
+        "--full",
+        action="store_true",
+        help="Train on the full MNIST dataset (60,000 examples) instead of the default 10,000-example subset",
     )
     parser.add_argument(
         "--run",
@@ -91,7 +97,7 @@ def main():
         parser.print_help()
         return
 
-    X_train, X_test, y_train, y_test = load_mnist()
+    X_train, X_test, y_train, y_test = load_mnist(full=args.full)
 
     if args.explore is not None:
         out.explore(X_train, y_train, args.explore)
@@ -102,7 +108,7 @@ def main():
 
     if is_saved_model(args.classifier):
         clf = load_model(args.classifier)
-        print(f"Loaded saved model from {args.classifier}\n")
+        print(f"Loaded saved model: {args.classifier}\n")
     else:
         clf = load_classifier(
             args.classifier,
@@ -112,7 +118,7 @@ def main():
         clf.fit(X_train, y_train)
         if args.save:
             save_model(clf, args.save)
-            print(f"Saved model to {args.save}\n")
+            print(f"Saved model: {args.save}\n")
 
     y_pred = clf.predict(X_test)
 
diff --git a/cli/output.py b/cli/output.py
index 717ac7a..056d82a 100644
--- a/cli/output.py
+++ b/cli/output.py
@@ -54,6 +54,13 @@ def evaluation(y_true, y_pred, clf_name):
         print(f"  {digit}: {acc:.3f}  {bar}")
     print()
 
+    print("Confusion matrix (row=actual, col=predicted):")
+    header = "        " + "".join(f"{d:5d}" for d in range(10))
+    print(header)
+    for actual, row in enumerate(cm):
+        print(f"  {actual:3d}   " + "".join(f"{v:5d}" for v in row))
+    print()
+
 
 def error_analysis(X, y_true, y_pred, n):
     errors = [
diff --git a/cli/persistence.py b/cli/persistence.py
index 3d9c5b9..417eafa 100644
--- a/cli/persistence.py
+++ b/cli/persistence.py
@@ -3,16 +3,26 @@ import os
 import joblib
 
 MODEL_FILE = "model.joblib"
+WEIGHTS_DIR = "weights"
+
+
+def _resolve(name):
+    if name.startswith(WEIGHTS_DIR + os.sep) or name.startswith(WEIGHTS_DIR + "/"):
+        return name
+    return os.path.join(WEIGHTS_DIR, name)
 
 
 def is_saved_model(path):
-    return os.path.isdir(path) and os.path.exists(os.path.join(path, MODEL_FILE))
+    directory = _resolve(path)
+    return os.path.isdir(directory) and os.path.exists(os.path.join(directory, MODEL_FILE))
 
 
-def save_model(clf, directory):
+def save_model(clf, name):
+    directory = _resolve(name)
     os.makedirs(directory, exist_ok=True)
     joblib.dump(clf, os.path.join(directory, MODEL_FILE))
 
 
-def load_model(directory):
+def load_model(path):
+    directory = _resolve(path)
     return joblib.load(os.path.join(directory, MODEL_FILE))
diff --git a/cli/webcam.py b/cli/webcam.py
index 301484b..f1d4a36 100644
--- a/cli/webcam.py
+++ b/cli/webcam.py
@@ -32,6 +32,12 @@ def run(clf):
         print("Could not open the webcam.")
         return
 
+    capture.set(cv2.CAP_PROP_BUFFERSIZE, 1)
+
+    # Discard the first several frames while the camera warms up
+    for _ in range(10):
+        capture.read()
+
     print("Hold a handwritten digit up to the camera, inside the box.")
     print("Press 'q' (with the video window focused) to quit.\n")
 
@@ -56,7 +62,7 @@ def run(clf):
             cv2.putText(frame, label, (left, top - 12), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0), 2)
             cv2.imshow(WINDOW_TITLE, frame)
 
-            if cv2.waitKey(1) & 0xFF == ord("q"):
+            if cv2.waitKey(30) & 0xFF == ord("q"):
                 break
     except KeyboardInterrupt:
         pass
diff --git a/models/cnn.py b/models/cnn.py
index db8f75a..60ff530 100644
--- a/models/cnn.py
+++ b/models/cnn.py
@@ -1,3 +1,5 @@
+import time
+
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -8,16 +10,14 @@ class CNN(nn.Module):
     def __init__(self):
         super().__init__()
         self.conv = nn.Sequential(
-            nn.Conv2d(1, 32, kernel_size=3),   # 28x28 -> 26x26
+            nn.Conv2d(1, 32, kernel_size=3, stride=2),   # 28x28 -> 13x13
             nn.ReLU(),
-            nn.MaxPool2d(2),                   # 26x26 -> 13x13
-            nn.Conv2d(32, 64, kernel_size=3),  # 13x13 -> 11x11
+            nn.Conv2d(32, 64, kernel_size=3, stride=2),  # 13x13 -> 6x6
             nn.ReLU(),
-            nn.MaxPool2d(2),                   # 11x11 -> 5x5
         )
         self.fc = nn.Sequential(
             nn.Flatten(),
-            nn.Linear(64 * 5 * 5, 128),
+            nn.Linear(64 * 6 * 6, 128),
             nn.ReLU(),
             nn.Linear(128, 10),
         )
@@ -51,6 +51,7 @@ class CNNClassifier:
 
         print(f"\nTraining CNN (epochs={self.epochs})")
         for epoch in range(1, self.epochs + 1):
+            t0 = time.time()
             model.train()
             total_loss = 0
             for xb, yb in loader:
@@ -66,7 +67,8 @@ class CNNClassifier:
                 val_pred = model(X_val.to(device)).argmax(dim=1).cpu()
                 val_accuracy = (val_pred == y_val).float().mean().item()
 
-            print(f"  epoch {epoch:2d}/{self.epochs}  loss={total_loss / len(loader):.3f}  val_accuracy={val_accuracy:.3f}")
+            elapsed = time.time() - t0
+            print(f"  epoch {epoch:2d}/{self.epochs}  loss={total_loss / len(loader):.3f}  val_accuracy={val_accuracy:.3f}  {elapsed:.1f}s")
         print()
 
         self._model = model
diff --git a/models/mlp.py b/models/mlp.py
index 2bbfc36..4d5a91e 100644
--- a/models/mlp.py
+++ b/models/mlp.py
@@ -1,3 +1,5 @@
+import time
+
 import torch
 import torch.nn as nn
 import torch.optim as optim
@@ -8,16 +10,16 @@ class MLP(nn.Module):
     def __init__(self, hidden_sizes=(128, 64)):
         super().__init__()
         layers = []
-        in_size = 784
-        for h in hidden_sizes:
-            layers.append(nn.Linear(in_size, h))
+        input_size = 784
+        for hidden_size in hidden_sizes:
+            layers.append(nn.Linear(input_size, hidden_size))
             layers.append(nn.ReLU())
-            in_size = h
-        layers.append(nn.Linear(in_size, 10))
+            input_size = hidden_size
+        layers.append(nn.Linear(input_size, 10))
         self.net = nn.Sequential(*layers)
 
-    def forward(self, x):
-        return self.net(x)
+    def forward(self, pixels):
+        return self.net(pixels)
 
 
 class MLPClassifier:
@@ -26,53 +28,56 @@ class MLPClassifier:
         self.epochs = epochs
 
     def fit(self, X, y):
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self._device = device
+        self._device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self._model = MLP(hidden_sizes=self.hidden_sizes).to(self._device)
 
-        X_tr = torch.tensor(X, dtype=torch.float32)
-        y_tr = torch.tensor(y, dtype=torch.long)
+        images = torch.tensor(X, dtype=torch.float32)
+        labels = torch.tensor(y, dtype=torch.long)
+        train_images, train_labels, val_images, val_labels = self._split(images, labels)
 
-        # Hold out 10% of the training data to track progress each epoch
-        n_val = len(X_tr) // 10
-        X_val, X_tr = X_tr[:n_val], X_tr[n_val:]
-        y_val, y_tr = y_tr[:n_val], y_tr[n_val:]
-
-        loader = DataLoader(TensorDataset(X_tr, y_tr), batch_size=64, shuffle=True)
-
-        model = MLP(hidden_sizes=self.hidden_sizes).to(device)
-        optimizer = optim.Adam(model.parameters(), lr=1e-3)
+        batches = DataLoader(TensorDataset(train_images, train_labels), batch_size=64, shuffle=True)
+        optimizer = optim.Adam(self._model.parameters(), lr=1e-3)
         loss_fn = nn.CrossEntropyLoss()
 
         print(f"\nTraining MLP (hidden_sizes={self.hidden_sizes}, epochs={self.epochs})")
         for epoch in range(1, self.epochs + 1):
-            model.train()
-            total_loss = 0
-            for xb, yb in loader:
-                xb, yb = xb.to(device), yb.to(device)
-                optimizer.zero_grad()
-                loss = loss_fn(model(xb), yb)
-                loss.backward()
-                optimizer.step()
-                total_loss += loss.item()
-
-            model.eval()
-            with torch.no_grad():
-                val_pred = model(X_val.to(device)).argmax(dim=1).cpu()
-                val_accuracy = (val_pred == y_val).float().mean().item()
-
-            print(f"  epoch {epoch:2d}/{self.epochs}  loss={total_loss / len(loader):.3f}  val_accuracy={val_accuracy:.3f}")
+            t0 = time.time()
+            avg_loss = self._train_one_epoch(batches, optimizer, loss_fn)
+            val_accuracy = self._accuracy(val_images, val_labels)
+            elapsed = time.time() - t0
+            print(f"  epoch {epoch:2d}/{self.epochs}  loss={avg_loss:.3f}  val_accuracy={val_accuracy:.3f}  {elapsed:.1f}s")
         print()
-
-        self._model = model
         return self
 
-    def predict_proba(self, X):
-        X_te = torch.tensor(X, dtype=torch.float32)
+    def _split(self, images, labels):
+        n_val = len(images) // 10
+        return images[n_val:], labels[n_val:], images[:n_val], labels[:n_val]
+
+    def _train_one_epoch(self, batches, optimizer, loss_fn):
+        self._model.train()
+        total_loss = 0
+        for image_batch, label_batch in batches:
+            image_batch = image_batch.to(self._device)
+            label_batch = label_batch.to(self._device)
+            optimizer.zero_grad()
+            loss = loss_fn(self._model(image_batch), label_batch)
+            loss.backward()
+            optimizer.step()
+            total_loss += loss.item()
+        return total_loss / len(batches)
+
+    def _accuracy(self, images, labels):
         self._model.eval()
         with torch.no_grad():
-            logits = self._model(X_te.to(self._device))
-            probabilities = torch.softmax(logits, dim=1).cpu().numpy()
-        return probabilities
+            predictions = self._model(images.to(self._device)).argmax(dim=1).cpu()
+            return (predictions == labels).float().mean().item()
+
+    def predict_proba(self, X):
+        images = torch.tensor(X, dtype=torch.float32)
+        self._model.eval()
+        with torch.no_grad():
+            logits = self._model(images.to(self._device))
+            return torch.softmax(logits, dim=1).cpu().numpy()
 
     def predict(self, X):
         return self.predict_proba(X).argmax(axis=1)
diff --git a/questions.md b/questions.md
index 76388a8..9693f4b 100644
--- a/questions.md
+++ b/questions.md
@@ -106,13 +106,9 @@ Output layer:   _____ neurons (one per digit)
 ```
 Input: ___x___x___ (height × width × channels)
 ↓
-Conv layer 1: ___ filters, ___x___ kernel → output: ___x___x___
+Conv layer 1: ___ filters, ___x___ kernel, stride ___ → output: ___x___x___
 ↓
-Pooling: ___x___ max pool → output: ___x___x___
-↓
-Conv layer 2: ___ filters, ___x___ kernel → output: ___x___x___
-↓
-Pooling: ___x___ max pool → output: ___x___x___
+Conv layer 2: ___ filters, ___x___ kernel, stride ___ → output: ___x___x___
 ↓
 Flatten: _____ values
 ↓