Files
lab_classification_neural/cli/webcam.py
Chris Proctor 49c4e43f45 Revisions
2026-06-08 15:15:52 -04:00

67 lines
2.1 KiB
Python

import cv2
import numpy as np
WINDOW_TITLE = "Hold up a digit -- press q to quit"
def preprocess(region):
"""Turn a captured square region into a 784-value array like MNIST's.
MNIST digits are white strokes on a black background, so after
converting to grayscale and shrinking to 28x28, we invert the
brightness (ink-on-paper is normally dark-on-light) and scale
pixel values down to the [0, 1] range `load_mnist` uses.
"""
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
small = cv2.resize(gray, (28, 28))
inverted = 255 - small
return (inverted.astype(np.float32) / 255.0).flatten()
def central_square(frame):
height, width = frame.shape[:2]
size = min(height, width)
top = (height - size) // 2
left = (width - size) // 2
return top, left, size
def run(clf):
capture = cv2.VideoCapture(0)
if not capture.isOpened():
print("Could not open the webcam.")
return
print("Hold a handwritten digit up to the camera, inside the box.")
print("Press 'q' (with the video window focused) to quit.\n")
try:
while True:
found, frame = capture.read()
if not found:
break
frame = cv2.flip(frame, 1)
top, left, size = central_square(frame)
region = frame[top:top + size, left:left + size]
pixels = preprocess(region)
probabilities = clf.predict_proba([pixels])[0]
digit = probabilities.argmax()
confidence = probabilities[digit]
print(f"\rpredicted digit: {digit} confidence: {confidence:.2f} ", end="", flush=True)
label = f"{digit} ({confidence:.0%})"
cv2.rectangle(frame, (left, top), (left + size, top + size), (0, 200, 0), 2)
cv2.putText(frame, label, (left, top - 12), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0), 2)
cv2.imshow(WINDOW_TITLE, frame)
if cv2.waitKey(1) & 0xFF == ord("q"):
break
except KeyboardInterrupt:
pass
finally:
capture.release()
cv2.destroyAllWindows()
print()