from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline class FeatureExtractor: def fit(self, X, y=None): return self def transform(self, X): return [self.extract_features(pixels) for pixels in X] def extract_features(self, pixels): img = pixels.reshape(28, 28) return { "mean_brightness": float(pixels.mean()), "top_half_brightness": float(img[:14, :].mean()), } class FeatureClassifier: def fit(self, X, y): self._pipeline = Pipeline([ ("features", FeatureExtractor()), ("vectorizer", DictVectorizer()), ("classifier", LogisticRegression(max_iter=1000)), ]) self._pipeline.fit(X, y) return self def predict(self, X): return self._pipeline.predict(X) def predict_proba(self, X): return self._pipeline.predict_proba(X)