Initial commit

This commit is contained in:
Chris Proctor
2026-06-06 21:36:59 -04:00
commit aaf5b17ad8
14 changed files with 811 additions and 0 deletions

32
cli/data.py Normal file
View File

@@ -0,0 +1,32 @@
import io
import os
import zipfile
import urllib.request
import pandas as pd
from sklearn.datasets import get_data_home
URL = (
"https://archive.ics.uci.edu/ml/machine-learning-databases/"
"00228/smsspamcollection.zip"
)
def load_spam():
path = os.path.join(get_data_home(), "spam", "SMSSpamCollection")
if not os.path.exists(path):
_fetch(path)
return pd.read_csv(path, sep="\t", header=None, names=["label", "message"])
def _fetch(dest):
os.makedirs(os.path.dirname(dest), exist_ok=True)
print("Downloading SMS Spam Collection...")
with urllib.request.urlopen(URL) as response:
data = response.read()
with zipfile.ZipFile(io.BytesIO(data)) as zf:
with zf.open("SMSSpamCollection") as f:
content = f.read()
with open(dest, "wb") as f:
f.write(content)