Simplify cleaning transformers and shorten module names

Move cleaning transformers into classifiers/cleaning.py (dropping the
separate cleaning package) and implement them as plain classes rather
than BaseEstimator/TransformerMixin subclasses, since Pipeline only
needs fit/transform via duck typing. Also rename feature_classifier.py
and bag_of_words.py to features.py and bow.py for brevity.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Chris Proctor
2026-06-08 10:02:41 -04:00
parent 5f6f171369
commit bbe8054910
6 changed files with 6 additions and 10 deletions

View File

@@ -5,7 +5,7 @@ from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from cleaning.transformers import LowercaseTransformer, PunctuationRemover
from classifiers.cleaning import LowercaseTransformer, PunctuationRemover
class FeatureExtractor:

View File

@@ -1,7 +1,6 @@
import re
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
STOPWORDS = {
@@ -14,18 +13,16 @@ STOPWORDS = {
}
class LowercaseTransformer(BaseEstimator, TransformerMixin):
class LowercaseTransformer:
def fit(self, X, y=None):
self.fitted_ = True
return self
def transform(self, X):
return np.array([msg.lower() for msg in X])
class StopwordRemover(BaseEstimator, TransformerMixin):
class StopwordRemover:
def fit(self, X, y=None):
self.fitted_ = True
return self
def transform(self, X):
@@ -36,9 +33,8 @@ class StopwordRemover(BaseEstimator, TransformerMixin):
return " ".join(w for w in words if w.lower() not in STOPWORDS)
class PunctuationRemover(BaseEstimator, TransformerMixin):
class PunctuationRemover:
def fit(self, X, y=None):
self.fitted_ = True
return self
def transform(self, X):

View File

View File

@@ -3,7 +3,7 @@
Usage:
spam -e
spam classifiers.manual.ManualClassifier
spam classifiers.feature_classifier.FeatureClassifier
spam classifiers.features.FeatureClassifier
spam classifiers.manual.ManualClassifier -t 0.2
spam classifiers.manual.ManualClassifier -a
spam classifiers.manual.ManualClassifier -a 5

View File

@@ -16,4 +16,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["cli", "classifiers", "cleaning"]
packages = ["cli", "classifiers"]