import cv2 import numpy as np WINDOW_TITLE = "Hold up a digit -- press q to quit" def preprocess(region): """Turn a captured square region into a 784-value array like MNIST's. MNIST digits are white strokes on a black background, so after converting to grayscale and shrinking to 28x28, we invert the brightness (ink-on-paper is normally dark-on-light) and scale pixel values down to the [0, 1] range `load_mnist` uses. """ gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY) small = cv2.resize(gray, (28, 28)) inverted = 255 - small return (inverted.astype(np.float32) / 255.0).flatten() def central_square(frame): height, width = frame.shape[:2] size = min(height, width) top = (height - size) // 2 left = (width - size) // 2 return top, left, size def run(clf): capture = cv2.VideoCapture(0) if not capture.isOpened(): print("Could not open the webcam.") return print("Hold a handwritten digit up to the camera, inside the box.") print("Press 'q' (with the video window focused) to quit.\n") try: while True: found, frame = capture.read() if not found: break frame = cv2.flip(frame, 1) top, left, size = central_square(frame) region = frame[top:top + size, left:left + size] pixels = preprocess(region) probabilities = clf.predict_proba([pixels])[0] digit = probabilities.argmax() confidence = probabilities[digit] print(f"\rpredicted digit: {digit} confidence: {confidence:.2f} ", end="", flush=True) label = f"{digit} ({confidence:.0%})" cv2.rectangle(frame, (left, top), (left + size, top + size), (0, 200, 0), 2) cv2.putText(frame, label, (left, top - 12), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 200, 0), 2) cv2.imshow(WINDOW_TITLE, frame) if cv2.waitKey(1) & 0xFF == ord("q"): break except KeyboardInterrupt: pass finally: capture.release() cv2.destroyAllWindows() print()