Initial commit
This commit is contained in:
32
cli/data.py
Normal file
32
cli/data.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import io
|
||||
import os
|
||||
import zipfile
|
||||
import urllib.request
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.datasets import get_data_home
|
||||
|
||||
|
||||
URL = (
|
||||
"https://archive.ics.uci.edu/ml/machine-learning-databases/"
|
||||
"00228/smsspamcollection.zip"
|
||||
)
|
||||
|
||||
|
||||
def load_spam():
|
||||
path = os.path.join(get_data_home(), "spam", "SMSSpamCollection")
|
||||
if not os.path.exists(path):
|
||||
_fetch(path)
|
||||
return pd.read_csv(path, sep="\t", header=None, names=["label", "message"])
|
||||
|
||||
|
||||
def _fetch(dest):
|
||||
os.makedirs(os.path.dirname(dest), exist_ok=True)
|
||||
print("Downloading SMS Spam Collection...")
|
||||
with urllib.request.urlopen(URL) as response:
|
||||
data = response.read()
|
||||
with zipfile.ZipFile(io.BytesIO(data)) as zf:
|
||||
with zf.open("SMSSpamCollection") as f:
|
||||
content = f.read()
|
||||
with open(dest, "wb") as f:
|
||||
f.write(content)
|
||||
Reference in New Issue
Block a user