from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline class FeatureExtractor: def fit(self, X, y=None): return self def transform(self, X): return [self.extract_features(pixels) for pixels in X] def extract_features(self, pixels): """Extract hand-designed features from a 784-pixel image. Add at least two features of your own. Each feature should be a number computed from the pixel array. Arguments: pixels: numpy array of 784 float values in [0, 1] Returns: dict: feature name -> numerical value """ img = pixels.reshape(28, 28) return { "mean_brightness": float(pixels.mean()), "top_half_brightness": float(img[:14, :].mean()), "bottom_half_brightness": float(img[14:, :].mean()), # ---- Add your features here ---- # "left_half_brightness": float(img[:, :14].mean()), # "right_half_brightness": float(img[:, 14:].mean()), # "num_bright_pixels": float((pixels > 0.5).sum()), } class HandPickedClassifier: def fit(self, X, y): self._pipeline = Pipeline([ ("features", FeatureExtractor()), ("vectorizer", DictVectorizer()), ("classifier", LogisticRegression(max_iter=1000)), ]) self._pipeline.fit(X, y) return self def predict(self, X): return self._pipeline.predict(X)