Simplify cleaning transformers and shorten module names
Move cleaning transformers into classifiers/cleaning.py (dropping the separate cleaning package) and implement them as plain classes rather than BaseEstimator/TransformerMixin subclasses, since Pipeline only needs fit/transform via duck typing. Also rename feature_classifier.py and bag_of_words.py to features.py and bow.py for brevity. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@ from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from cleaning.transformers import LowercaseTransformer, PunctuationRemover
|
||||
from classifiers.cleaning import LowercaseTransformer, PunctuationRemover
|
||||
|
||||
|
||||
class FeatureExtractor:
|
||||
@@ -1,7 +1,6 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
|
||||
|
||||
STOPWORDS = {
|
||||
@@ -14,18 +13,16 @@ STOPWORDS = {
|
||||
}
|
||||
|
||||
|
||||
class LowercaseTransformer(BaseEstimator, TransformerMixin):
|
||||
class LowercaseTransformer:
|
||||
def fit(self, X, y=None):
|
||||
self.fitted_ = True
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
return np.array([msg.lower() for msg in X])
|
||||
|
||||
|
||||
class StopwordRemover(BaseEstimator, TransformerMixin):
|
||||
class StopwordRemover:
|
||||
def fit(self, X, y=None):
|
||||
self.fitted_ = True
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
@@ -36,9 +33,8 @@ class StopwordRemover(BaseEstimator, TransformerMixin):
|
||||
return " ".join(w for w in words if w.lower() not in STOPWORDS)
|
||||
|
||||
|
||||
class PunctuationRemover(BaseEstimator, TransformerMixin):
|
||||
class PunctuationRemover:
|
||||
def fit(self, X, y=None):
|
||||
self.fitted_ = True
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
@@ -3,7 +3,7 @@
|
||||
Usage:
|
||||
spam -e
|
||||
spam classifiers.manual.ManualClassifier
|
||||
spam classifiers.feature_classifier.FeatureClassifier
|
||||
spam classifiers.features.FeatureClassifier
|
||||
spam classifiers.manual.ManualClassifier -t 0.2
|
||||
spam classifiers.manual.ManualClassifier -a
|
||||
spam classifiers.manual.ManualClassifier -a 5
|
||||
|
||||
@@ -16,4 +16,4 @@ requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["cli", "classifiers", "cleaning"]
|
||||
packages = ["cli", "classifiers"]
|
||||
|
||||
Reference in New Issue
Block a user