From 60fb58f08fd3aceb909ffa4722a7ee00bca8478a Mon Sep 17 00:00:00 2001 From: Chris Proctor Date: Fri, 15 Apr 2022 12:10:47 -0400 Subject: [PATCH] Updates to lab and added codebook --- clean_brfss_2020.py | 37 ------------- lab_04.ipynb | 131 ++++++++++++++++++++++++++++++++++++++------ 2 files changed, 114 insertions(+), 54 deletions(-) delete mode 100644 clean_brfss_2020.py diff --git a/clean_brfss_2020.py b/clean_brfss_2020.py deleted file mode 100644 index 3342582..0000000 --- a/clean_brfss_2020.py +++ /dev/null @@ -1,37 +0,0 @@ -# Clean BRFSS 2020 -# ---------------- -# -# This module documents the process used to clean and simplify the -# BRFSS data set used in this lab. Students don't need to interact with this. -# Read more about BRFSS at https://www.cdc.gov/brfss/annual_data/annual_2020.html - -# First, download and unzip https://www.cdc.gov/brfss/annual_data/2020/files/LLCP2020XPT.zip -# You should now have a file called LLCP2020.XPT - -import pandas as pd -df = pd.read_sas("LLCP2020.XPT") -df = df[odf.DISPCODE == 1100] -df["sex"] = df["SEXVAR"].map({1: "male", 2: "female"}) -df = df[df.GENHLTH <= 5] -df["health"] = df.GENHLTH.map({1:5, 2:4, 3:3, 4:2, 5:1}) -df = df[df.MEDCOST <= 2] -df["no_doctor"] = df.MEDCOST.map({1: True, 2: False}) -df = df[df.EXERANY2 <= 2] -df["exercise"] = df.EXERANY2.map({1: True, 2: False}) -df = df[df.SLEPTIM1 < 25] -df["sleep"] = df.SLEPTIM1.astype(int) -df = df[df.INCOME2 < 9] -df["income"] = df.INCOME2.astype(int) -df = df[~df.WTKG3.isna()] -df["weight"] = df.WTKG3 / 100 -df = df[~df.HTM4.isna()] -df["height"] = df.HTM4 / 100 -df = df[(df.SOFEMALE.isin([1, 2, 3, 4, 7, 9])) | (df.SOMALE.isin([1, 2, 3, 4, 7, 9]))] -df["sexual_orientation"] = df.SOFEMALE -df["sexual_orientation"].fillna(df.SOMALE, inplace=True) -df["sexual_orientation"] = df["sexual_orientation"].map({1: "homosexual", 2: "heterosexual", 3: "bisexual", 4: "other", 7: "other", 9: "other"}) -df = df[df._EDUCAG.isin([1, 2, 3, 4])] -df["education"] = df._EDUCAG.map({1: "none_completed", 2: "high_school", 3: "some_college", 4: "college"}) -df["age"] = df._AGE_G.map({1: 18, 2: 25, 3: 35, 4: 45, 5: 55, 6: 65}) -df = df[["age", "sex", "income", "education", "sexual_orientation", "height", "weight", "health", "no_doctor", "exercise", "sleep"]] -df.to_csv("brfss_2020.csv", index=False) diff --git a/lab_04.ipynb b/lab_04.ipynb index 3aa6385..cd1f5b8 100644 --- a/lab_04.ipynb +++ b/lab_04.ipynb @@ -1995,7 +1995,7 @@ "id": "931d602b-ddf4-4c8b-80e0-f886267cce76", "metadata": {}, "source": [ - "### 1.5. Plotting\n", + "### 1.5. Plotting \n", "\n", "Pandas has excellent built-in plotting capabilities, but \n", "we are going to use the [seaborn](https://seaborn.pydata.org/) library because it's a bit \n", @@ -2092,7 +2092,7 @@ "id": "2aac9186-86c0-41db-a1c4-8719bb78b46b", "metadata": {}, "source": [ - "**When you want to compare distributions across categories**, a [barplot](https://seaborn.pydata.org/generated/seaborn.barplot.html) is a good choice. Choose one numeric column and one categorical column. \n", + "**When you want to compare the distribution of a numeric variable across categories**, a [barplot](https://seaborn.pydata.org/generated/seaborn.barplot.html) is a good choice. Choose one numeric column and one categorical column. \n", "\n", "Let's see pokémon hit points by legendary/non-legendary. `ci=\"sd\"` shows the standard deviation for each category. " ] @@ -2133,7 +2133,7 @@ "id": "4f75e1fa-a5d7-4d2c-a458-8190a7cd700e", "metadata": {}, "source": [ - "Here, we use a barplot to show average hit points by type." + "Here, we use a barplot to show average hit points by type. `ci=None` removes the standard deviation bars, because they clutter up the plot with too much detail. " ] }, { @@ -2167,6 +2167,59 @@ "sns.barplot(data=pokemon, x=\"hp\", y=\"type\", ci=None, palette=\"muted\")" ] }, + { + "cell_type": "markdown", + "id": "213d6139-203f-4d81-a4b1-6f98cb184662", + "metadata": {}, + "source": [ + "**When you want to show how many observations are the intersection of multiple categories,** a [countplot](https://seaborn.pydata.org/generated/seaborn.countplot.html) is a good choice. \n", + "\n", + "To demonstrate this, let's convert the numeric variable `speed` into a categorical variable, `speed_category`, using the built-in function [cut](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.cut.html). " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "3c8e9f47-9aea-4bf0-a628-7aa1a66a8eee", + "metadata": {}, + "outputs": [], + "source": [ + "bins = [0, 50, 100, 200]\n", + "labels = [\"slow\", \"medium\", \"fast\"]\n", + "pokemon[\"speed_category\"] = pd.cut(pokemon.speed, bins=bins, labels=labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "22f78bec-3d18-4133-ba9f-6595d7181ded", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEJCAYAAAB/pOvWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAqy0lEQVR4nO3deWCM997//+ckQ0jDbZuEqua+a6lqbxRFqpJWj9BG7M6JJeiitKKVr9oi+OGQ0JSborRHtaVquyPV0GgPpUcTy0lbSy1dRFpBNsQS2Wau3x/uM6fpWJIymURej79mrvU9c5HXXJ/PdX0uk2EYBiIiIr/h5uoCRESk/FE4iIiIA4WDiIg4UDiIiIgDhYOIiDhQOIiIiAOFg4iIODC7uoA75fz5K9hsumVDRKQk3NxM1K59zw3n3zXhYLMZCgcRkTtEzUoiIuLA6eEwd+5cJk2aBMDRo0fp168f3bp1Y8qUKRQVFQFw+vRpBg8eTPfu3Xn55Ze5cuWKs8sSEZGbMDlzbKWkpCTCw8N58skniY6OpkePHvz1r3+ldevWRERE8MgjjzBo0CBGjhxJz549CQoKYsmSJeTm5jJ+/PhS7Ss7+7KalUSkGMMwOH8+k4KCPKBy/n1wdzfj5VWL6tWL9y+4uZmoW9frhus5rc/hwoULLFiwgFGjRnHs2DHS0tLIy8ujdevWAPTt25dFixYxYMAA9u/fz5IlS+zThwwZUupwEBH5vcuXczCZTPj43IfJVPla0Q3DoLCwgAsXMgEcAuJmnPZtTZs2jfDwcGrWrAlARkYGFovFPt9isZCens758+fx8vLCbDYXmy4icruuXr1MjRq1KmUwAJhMJqpW9aBWLQuXL18o1bpOOXPYsGEDDRo0wM/Pj9jYWOBagv2eyWS64fTSutnpkYhUThkZBh4eVf/Q35S7ibt7NS5csGGx1CjxOk4Jh61bt5KZmUmvXr3IyckhNzcXk8lEVlaWfZnMzEy8vb2pU6cOly9fxmq14u7ubp9eWupzEJHfs9lsWK0GlbW/4bdsNhuZmZfs713S57By5Ur769jYWPbt20dUVBQ9evQgOTmZtm3bEhcXh7+/P1WqVKFdu3Zs3bqV4OBg+3SR66lRsxrVPKq4uoxSycsv5NLFPFeXIdfxzTf/ZMGCeaxatd5lNXTt2pkPP1xHgwb3uqyG6ynTm+BiYmKIjIzkypUrtGjRgqFDhwIwffp0Jk2axNtvv02DBg2YP39+WZYlFUg1jyoMmvCRq8solTXzBnMJhYNULE4Ph759+9K3b18AmjdvzsaNGx2WadiwIatWrXJ2KSIi11VYWMjbby/iu+++wWq10azZg4wd+zr33OPFkSOHefPNuRQVFXLvvfeRnn6GsLBw2rRpx+7dX/HBBysoKiqkWrVqjB49lkceacmKFcs5e/YM2dlZnD17hlq1ajNzZhT16lk4cOBbFix4A5MJmjd/GJvNBlxr9lm0aD7ff3+Iq1dzMQyDiRMjadmyNbNn/39cvJhDWloafn6d+PTTON55533uv98XgLFjX6Ffvz/TufOTd+w7qZxd+CIiv7F69fu4u5tZsWI1H3zwMfXqWXj77cUUFRURGTmRF18cxQcfrGXAgBB+/PEHAH799RfeeWcJMTELWblyDePHT2HKlPFcvXoVgAMHvmXWrGjWrPlfatSowSefxFJYWMjUqZMICxvLypVraNOmLfn5+QAcOXKYrKxMli9fyerVG+jevQerV39grzEvL5/Vq9czevRrPPNMD+Lj4wBISzvFL7+k8vjjne/od3LXjK0kIvJHJSb+g0uXLrN//14AiooKqVWrNidO/ASAn18nANq0accDDzQGYP/+vWRnZ/Haa6/Yt2MyuXHq1K8APPpoW+6551qHb7Nmzbl4MYeff/4Js9lMu3btAejatTtvvBEFwCOPtOSll2ryySexpKWd4ttvk/H09LRvu2XLVvbXffr0JyzsJV56aTSffBJLcHBv3N3d7+h3onAQkUrParXx2mvj7CGQm5tLQUEB2dlZDpfbu7lda3Cx2ay0bduemTOj7PPS089Sr56Fr776Eg8Pj2LrGYZx3cv3//VHPTFxNwsXxhASMoTOnQPw9f1Ptm3bal+uevV/B8X99/vSuHET/vGPnXzxRQLvvvsBd5qalUSk0uvQwY/Y2PUUFhZis9mYO/evLF++GF/f/6RKlSrs2ZMIXGv6OXHiZ0wmE23aPMa+fXtITT0JQFLSboYNG0hBQcEN99O4cRMMwyApaTcAu3fv4tKli8C1M5FOnTrTp09/mjdvwT/+sdPeH3E9ffsOYOnSRbRo8TD16lluuNwfpTMHEan0hg9/gcWLF/Lcc4Ox2aw0bdqMsLCxmM1mZs+exxtvRLF8+WIaNfKlTp26VKtWjQceaMyECVOYPj0CwzBwd3dn7tz5VK9e/Yb7MZvNREXF/N/2ltK0aTNq164DQO/e/ZgxYwrDhoXg5uZGq1Zt2LVrxw0D4vHHOzN37l/p3bufU74Tpw68V5Z0E1zlYLHUqJCXsv725iMpO2fPplK/vu9tbWPJkoUMHDiEOnXqkp5+luHDB7F+/SfUqFHyu42d4dChA8ydO5tVq9aV6A7w338XLht4T0TkblC/fn1ee+1lzGYzhgGTJkW6PBj++tfpfPttMpGRM5w2NIjCQUTkJvr1+wv9+v3F1WUUExk5w+n7UIe0iIg4UDiIiIgDhYOIiDhQOIiIiAOFg4iIONDVSiJSqTjrmSAleW7HmTOnGTiwL//5nw8Umz537nx8fOo7LL9ixXIAXnhh5J0rtIQUDiJSqTjrmSAlfW5HvXoW3n9/zR3f/52mcBARcbETJ35iwYI3uHr1KufPnyMkZAgDBoTY5xcVFREVNYMTJ34GoE+fAfTs2Ydz57J54405pKen4+bmxsiRo3nssQ53pCaFg4hIGcrKymT48EH294GB3cnMzGTYsBdo1649aWmnGD58ULFwOHToABcvXmTlyjXk5Fxg8eL/oWfPPixcGENQUE+eeCKArKwsXnnlBd5/fw2envfcdp0KBxGRMnS9ZiWr1crevUmsWrWSn376katXc4vNf+CBxvzySyr/7/+F0bFjJ15+eQwA//znPlJTU/nb3671TRQVFZGWdoqmTR+87TqdGg4LFy5k27ZtmEwm+vfvz3PPPcfkyZNJTk62j1wYFhZG165dSUxMJCoqivz8fJ555hnCw8OdWZqISLkxbdokatSoSadOnXn66UC2b/+82Pz/+I9arFq1nv3795KU9DXPPz+EVavWY7XaWLTobWrW/A/g2lnJv0Z5vV1OC4d9+/axZ88eNm/eTFFREc8++ywBAQEcPnyY1atX4+3tbV82Ly+PiIgIVq1aRYMGDRg5ciS7du0iICDAWeWJiJQb+/fvY82ajdSrZ2Hr1k+Ba2cT/7J79y4SErYya1Y0HTr4kZy8j4yMdNq2bUds7AaGD3+RlJQThIWNYMOGzeW7Wal9+/Z8+OGHmM1m0tPTsVqteHh4cPr0aaZOncrp06fp2rUrYWFhHDx4EF9fXxo1agRAcHAwCQkJCgcRuePy8gtZM2+wU7b7Rz3//AhefvlFatTwolEjXxo0uJczZ07b53fs2Ikvv9xOaOifqVq1KgEBXWjcuAnh4ROYN282w4aFYBgGkZEz70gwgJOblapUqcKiRYt477336N69O1arlY4dOzJz5kw8PT0ZOXIkGzduxNPTE4vl308y8vb2Jj09vVT7utm45CKuZrG4dojnyiojww2zufi9vldzC7iae+Ontd2O3+/r9xo1uo+4uC0O04cMGcqQIUMdpo8c+bL99YwZf3WYX7++D/PnLypRbW5ubqX6d+j0DulXX32VESNGMGrUKJKSkliyZIl9XmhoKHFxcXTv3t1hvdKOUa6H/VQOFfWPrB724xo2m42iohs/arMysdlsxf4d3uphP04bPuPnn3/m6NGjAFSvXp3AwEC2bt3Ktm3b7MsYhoHZbMbHx4esrCz79IyMjGJ9EiIiUracFg6nTp0iMjKSgoICCgoK2L59O4899hhz5swhJyeHwsJC1q1bR9euXWnVqhUpKSmkpqZitVqJj4/H39/fWaWJiMgtOK1ZKSAggAMHDtC7d2/c3d0JDAwkLCyM2rVrM3DgQIqKiggMDKRHjx4AREdHM2bMGPLz8wkICLhuU5OIiJQNk2EYd0VDvfocKgeLpYZTxsVxpjXzBqvPwUXOnk2lfn1fV5dRLvz+u3BZn4OIiFRcGj5DRCqV2v9RFXNVjzu+3aKCfM7n3PwS2TffnMuhQwcoKirk1Klf7UN3DxgQQlBQzzte0+1QOIhIpWKu6kHyvBfv+HbbTvgbcPNwGDduInDtuQ5jxows10N3q1lJRMTF+vcPZtq0yQwc2JcjRw7Tv3+wfd6KFcvtD/3ZsyeRESOG8txzg4iIGE9OzgWn1aRwEBEpBzp2fJyPP4694cB558+fZ9myxbz55mJWrlxD+/Ydefvtt5xWj5qVRETKgRYtHrnp/CNHDpOefpZXXx0FgM1mtY/G6gwKBxGRcsDD41onuclk4rd3GBQVFWE2m7HZrLRs2Yq5cxcAkJ+fT25u7nW3dSeoWUlEpBzx8qrBpUuXOH/+PAUFBezdmwRcO7P4/vtD/PJLKgDvv/83li5d6LQ6dOYgIlKOeHl5MWhQKCNGDMXb24cWLR4GoG7dekyaNI1p0yZjs1mxWHyYNm2m0+rQHdJSoegOaSmN690h7cr7HFyptHdI68xBRCqVa3/Ay+8f8fJCfQ4iIuJA4SAiIg4UDiJyV7tLulVvi2HYgNI9XVPhICJ3LbO5KleuXKy0AWEYBkVFhVy4kEXVqtVKta46pEXkrlW7toXz5zO5fPmCq0txGTc3d6pX98LLq3R3UyscROSu5e5upl69Bq4uo0JyarPSwoULefbZZwkKCmLlypUAJCYmEhwcTGBgIAsWLLAve/ToUfr160e3bt2YMmUKRUVFzixNRERuwmnhsG/fPvbs2cPmzZv53//9X1atWsWxY8eIiIhg6dKlbN26lcOHD7Nr1y4Axo8fz9SpU9m2bRuGYbB+/XpnlSYiIrfgtHBo3749H374IWazmezsbKxWKxcvXsTX15dGjRphNpsJDg4mISGBtLQ08vLyaN26NQB9+/YlISHBWaWJiMgtOLXPoUqVKixatIj33nuP7t27k5GRgcVisc/39vYmPT3dYbrFYiE9Pb1U+7rZbeAirmax1HB1CSKl4vQO6VdffZURI0YwatQoTp486TD/98PT/nZ6aWhspcqhov6R1dhKUt7camwlpzUr/fzzzxw9ehSA6tWrExgYyN69e8nKyrIvk5GRgbe3Nz4+PsWmZ2Zm4u3t7azSRETkFpwWDqdOnSIyMpKCggIKCgrYvn07ISEhpKSkkJqaitVqJT4+Hn9/fxo2bIiHhwfJyckAxMXF4e/v76zSRETkFpzWrBQQEMCBAwfo3bs37u7uBAYGEhQURJ06dRgzZgz5+fkEBATQvXt3AGJiYoiMjOTKlSu0aNGCoUOHOqs0ERG5BT3PQSoUPc9B5M5wWZ+DiIhUXAoHERFxoHAQEREHCgcREXGgcBAREQcKBxERcaBwEBERBwoHERFxoHAQEREHCgcREXGgcBAREQcKBxERcaBwEBERBwoHERFxoHAQEREHCgcREXHgtCfBlVc1alajmkcVV5dRKnn5hVy6mOfqMkSkEnFqOCxevJjPPvsMuPbY0AkTJjB58mSSk5OpXr06AGFhYXTt2pXExESioqLIz8/nmWeeITw83Ck1VfOoUiGfJHYJhYOIlB2nhUNiYiK7d+9m06ZNmEwmXnzxRb744gsOHz7M6tWr8fb2ti+bl5dHREQEq1atokGDBowcOZJdu3YREBDgrPJEROQmnNbnYLFYmDRpElWrVqVKlSo0btyY06dPc/r0aaZOnUpwcDCLFi3CZrNx8OBBfH19adSoEWazmeDgYBISEpxVmoiI3ILTzhyaNm1qf33y5Em2bt3KmjVr2LdvHzNnzsTT05ORI0eyceNGPD09sVgs9uW9vb1JT093VmkiInILTu+Q/vHHHxk5ciQTJ07kgQceYMmSJfZ5oaGhxMXF0b17d4f1TCZTqfZTt67XbddanlksNVxdgtwGHT+paJwaDsnJybz66qtEREQQFBTE8ePHOXnyJN26dQPAMAzMZjM+Pj5kZWXZ18vIyCjWJ1ES2dmXsdmMWy5XUf+TZmZecnUJ5YKOn8id4eZmuumPaqf1OZw5c4bRo0cTExNDUFAQcC0M5syZQ05ODoWFhaxbt46uXbvSqlUrUlJSSE1NxWq1Eh8fj7+/v7NKExGRW3DamcOKFSvIz88nOjraPi0kJISXXnqJgQMHUlRURGBgID169AAgOjqaMWPGkJ+fT0BAwHWbmkREpGw4LRwiIyOJjIy87rzBgwc7TPPz82Pz5s3OKkdEREpBw2eIiIgDhYOIiDhQOIiIiAOFg4iIOChROFzvbuWffvrpjhcjIiLlw03D4cKFC1y4cIERI0aQk5Njf5+VlcUrr7xSVjWKiEgZu+mlrOPGjePrr78GoEOHDv9eyWzmT3/6k3MrExERl7lpOKxYsQKAyZMnExUVVSYFiYiI65XoJrioqCjS0tLIycnBMP49ftHDDz/stMJERMR1ShQOMTExrFq1irp169qnmUwmtm/f7rTCRETEdUoUDlu3buXzzz/Hx8fH2fWIiEg5UKJLWRs0aKBgEBGpREp05uDn58e8efN4+umnqVatmn26+hxERO5OJQqH2NhYgGLPdVafg4jI3atE4bBjxw5n1yEiIuVIicJh5cqV153+3HPP3dFiRESkfChROPzwww/21wUFBSQnJxe7Y1pERO4uJb4J7rfOnTvHhAkTbrne4sWL+eyzzwAICAhgwoQJJCYmEhUVRX5+Ps888wzh4eEAHD16lMjISC5fvky7du2YMWMGZrPTHlQnIiI38YeG7K5Tpw5paWk3XSYxMZHdu3ezadMm4uLi+P7774mPjyciIoKlS5eydetWDh8+zK5duwAYP348U6dOZdu2bRiGwfr16/9IaSIicgeUus/BMAwOHz5c7G7p67FYLEyaNImqVasC0LhxY06ePImvry+NGjUCIDg4mISEBJo0aUJeXh6tW7cGoG/fvixatIhBgwb9kc8kIiK3qdR9DnDtprhbNSs1bdrU/vrkyZNs3bqV0NBQLBaLfbq3tzfp6elkZGQUm26xWK77DAkRESkbpepzSEtLo6ioCF9f3xLv4Mcff2TkyJFMnDgRs9lMSkpKsfkmk6nYYH6/nV4adet6lWr5isZiqeHqEuQ26PhJRVOicEhNTeWVV14hIyMDm81G7dq1Wb58OY0bN77pesnJybz66qtEREQQFBTEvn37yMrKss/PyMjA29sbHx+fYtMzMzPx9vYu1QfJzr6MzeYYMr9XUf+TZmZecnUJ5YKOn8id4eZmuumP6hJ1SM+cOZMXX3yR/fv3k5yczMsvv8yMGTNuus6ZM2cYPXo0MTExBAUFAdCqVStSUlJITU3FarUSHx+Pv78/DRs2xMPDg+TkZADi4uLw9/cv6WcUEZE7rERnDtnZ2fTp08f+vl+/frz//vs3XWfFihXk5+cTHR1tnxYSEkJ0dDRjxowhPz+fgIAAunfvDlwbFjwyMpIrV67QokULhg4d+gc+joiI3AklCger1cqFCxeoVasWcO0+h1uJjIwkMjLyuvM2b97sMK158+Zs3LixJOWIiIiTlSgchgwZwl/+8heeeeYZAD777DOGDRvm1MJERMR1StTnEBAQAEBhYSEnTpwgPT2drl27OrUwERFxnRKdOUyaNInBgwczdOhQ8vPz+fjjj4mIiODdd991dn0iIuICJTpzOH/+vL2D2MPDg+HDh5OZmenUwkRExHVKFA5Wq7XYHctZWVnXvXFNRETuDiVqVho+fDi9e/emc+fOmEwmEhMTSzQqq4iIVEwlCof+/fvzyCOPsGfPHtzd3XnhhRdo1qyZs2sTEREXKfEDE5o3b07z5s2dWYuIiJQTf+h5DiIicndTOIiIiAOFg4iIOFA4iIiIA4WDiIg4UDiIiIgDhYOIiDhQOIiIiAOFg4iIOHB6OFy+fJkePXpw6tQpACZPnkxgYCC9evWiV69efPHFFwAkJiYSHBxMYGAgCxYscHZZIiJyEyUePuOPOHDgAJGRkZw8edI+7fDhw6xevRpvb2/7tLy8PCIiIli1ahUNGjRg5MiR7Nq1y/6QIRERKVtOPXNYv34906dPtwdBbm4up0+fZurUqQQHB7No0SJsNhsHDx7E19eXRo0aYTabCQ4OJiEhwZmliYjITTj1zGH27NnF3mdnZ9OxY0dmzpyJp6cnI0eOZOPGjXh6emKxWOzLeXt7F3t+hIiIlC2nhsPvNWrUiCVLltjfh4aGEhcXR/fu3R2WNZlMpdp23bpet11feWax1HB1CXIbdPykoinTcDh+/DgnT56kW7duABiGgdlsxsfHh6ysLPtyGRkZxfokSiI7+zI2262fTldR/5NmZl5ydQnlgo6fyJ3h5ma66Y/qMr2U1TAM5syZQ05ODoWFhaxbt46uXbvSqlUrUlJSSE1NxWq1Eh8fj7+/f1mWJiIiv1GmZw7NmzfnpZdeYuDAgRQVFREYGEiPHj0AiI6OZsyYMeTn5xMQEHDdpiYRESkbZRIOO3bssL8ePHgwgwcPdljGz8+PzZs3l0U5IiJyC7pDWkREHCgcRETEgcJBREQcKBxERMSBwkFERBwoHERExIHCQUREHCgcRETEgcJBREQcKBxERMSBwkFERBwoHERExIHCQUREHCgcRETEgcJBREQcKBxERMSBwkFERBw4PRwuX75Mjx49OHXqFACJiYkEBwcTGBjIggUL7MsdPXqUfv360a1bN6ZMmUJRUZGzSxMRkRtwajgcOHCAgQMHcvLkSQDy8vKIiIhg6dKlbN26lcOHD7Nr1y4Axo8fz9SpU9m2bRuGYbB+/XpnliYiIjfh1HBYv34906dPx9vbG4CDBw/i6+tLo0aNMJvNBAcHk5CQQFpaGnl5ebRu3RqAvn37kpCQ4MzSRETkJszO3Pjs2bOLvc/IyMBisdjfe3t7k56e7jDdYrGQnp7uzNJEROQmnBoOv2cYhsM0k8l0w+mlUbeu1x+uqyKwWGq4ugS5DTp+UtGUaTj4+PiQlZVlf5+RkYG3t7fD9MzMTHtTVEllZ1/GZnMMmd+rqP9JMzMvubqEckHHT+TOcHMz3fRHdZleytqqVStSUlJITU3FarUSHx+Pv78/DRs2xMPDg+TkZADi4uLw9/cvy9JEROQ3yvTMwcPDg+joaMaMGUN+fj4BAQF0794dgJiYGCIjI7ly5QotWrRg6NChZVmaiIj8RpmEw44dO+yv/fz82Lx5s8MyzZs3Z+PGjWVRjoiI3ILukBYREQcKBxERcaBwEBERBwoHERFxoHAQEREHZXopq4hUbjVqVqOaRxVXl1FiefmFXLqY5+oyXELhICJlpppHFQZN+MjVZZTYmnmDuUTlDAc1K4mIiAOFg4iIOFA4iIiIA4WDiIg4UDiIiIgDhYOIiDhQOIiIiAOFg4iIOFA4iIiIA4WDiIg4cMnwGUOHDiU7Oxuz+druZ86cyS+//MLbb79NYWEhw4cPZ/Dgwa4oTUREcEE4GIbBiRMn2Llzpz0c0tPTCQ8PJzY2lqpVqxISEkKHDh1o0qRJWZcnIiK4IBxOnDiByWRixIgRZGdn8+c//5l77rmHjh07UqtWLQC6detGQkICYWFhZV2eiIjggj6Hixcv4ufnx5IlS3j//fdZu3Ytp0+fxmKx2Jfx9vYmPT29rEsTEZH/U+ZnDo8++iiPPvooAJ6envTv35+oqChGjRpVbDmTyVSq7dat63XHaiyPLJYari5BboOOX8VVWY9dmYfDP//5TwoLC/Hz8wOu9UE0bNiQrKws+zIZGRl4e3uXarvZ2Zex2YxbLldRD3Rm5iVXl1Au6PhVbBXx+N2tx87NzXTTH9Vl3qx06dIl5s2bR35+PpcvX2bTpk288cYbJCUlce7cOa5evcrnn3+Ov79/WZcmIiL/p8zPHJ566ikOHDhA7969sdlsDBo0iLZt2xIeHs7QoUMpLCykf//+tGzZsqxLExGR/+OS+xzGjh3L2LFji00LDg4mODjYFeWIiMjv6A5pERFxoHAQEREHCgcREXHgkj4HkcrEVlRYoS7hLCrI53xOgavLEBdTOIg4mZu5CsnzXnR1GSXWdsLfAIVDZadmJRERcaBwEBERBwoHERFxoD4HEZEbqGgXE8Cdu6BA4SAicgMV7WICuHMXFKhZSUREHCgcRETEgcJBREQcKBxERMSBwkFERBwoHERExIHCQUREHCgcRETEQbkKh08//ZRnn32Wrl278tFHH7m6HBGRSqvc3CGdnp7OggULiI2NpWrVqoSEhNChQweaNGni6tJcrjLfwi8irlFuwiExMZGOHTtSq1YtALp160ZCQgJhYWElWt/NzVTifdWrfc8fKdFl3MxVOLRsoqvLKJX/HjUXN7dCp2y7oh0/gKo167q6hFIpzf+n0qpox6+iHTso2fG71TImwzCMO1XQ7Vi+fDm5ubmEh4cDsGHDBg4ePMisWbNcXJmISOVTbvocrpdRJpPzfr2IiMiNlZtw8PHxISsry/4+IyMDb29vF1YkIlJ5lZtwePzxx0lKSuLcuXNcvXqVzz//HH9/f1eXJSJSKZWbDmkfHx/Cw8MZOnQohYWF9O/fn5YtW7q6LBGRSqncdEiLiEj5UW6alUREpPxQOIiIiAOFg4iIOFA4iIiIA4VDOdKlSxdOnTrl6jLkDtq7dy+hoaEATJkyhUOHDrm4IvmtyZMn061bN+Lj40u8zo4dO1i5cqUTqyofys2lrCJ3u9mzZ7u6BPmdTZs2cfDgQapWrVridb7//nsnVlR+KBxc5OzZs7z++uvk5ubi5uZGZGSkfZ7NZmPOnDkkJSVhMpno2bMnL730EsHBwfzP//wPjRs3Zty4cXh5eTFjxgy+++47lixZwrvvvuvCT3R32bt3L8uWLcMwDH755Re6detGjRo1+Pvf/w7AO++8w5EjR1i0aBFFRUXcd999zJo1i9q1a7N7926ioqLw8PDgv/7rv+zbDA0NtQ8kuXjxYlatWgXApEmTaN++Pe3bt2f06NE0atSIH374gUceeYT27duzadMmcnJyWLJkCY0bNy77L+MuNWrUKAzDYMCAAbRu3ZqjR4+Sk5ND7dq1eeutt6hVqxYRERH8+OOPAAwaNIg2bdqwdu1aAO6991769evnyo/gVGpWcpGNGzfy5JNPEhsby/jx40lOTrbP+/jjjzlz5gybN29mw4YNfP755+zcuZOAgACSkpIA+OGHH/jmm28A+Oqrr3jyySdd8THuagcOHCAqKootW7awdu1a6tSpQ2xsLA8++CBr167lzTffZMWKFcTFxfHEE08QExNDQUEBkyZNYtGiRcTGxlKtWrVS7fP48eO88sorJCQkcOjQIdLS0li3bh09evRg3bp1TvqkldOyZcsAWLRoEefOnWPt2rVs27aN+++/n08//ZRvv/2WnJwc4uLiWLlyJd988w1NmjQhJCSEkJCQuzoYQOHgMn5+frz33nuMGzeO9PR0hgwZYp+3d+9e+vTpg7u7O9WrVyc4OJikpCSefPJJkpKS+Omnn2jSpAlubm5kZ2fz1Vdf8dRTT7nw09ydmjVrRoMGDahevTq1a9fGz88PuPaLcceOHZw5c4ahQ4fSq1cvPvroI1JTUzl+/Dje3t72X/h9+vQp1T7r1atHixYtcHNzo379+sX2efHixTv7AQUAX19fJk6cyIYNG4iOjua7774jNzeXpk2bkpKSwgsvvMDmzZt5/fXXXV1qmVKzkou0bduWLVu2sHPnTrZu3cqmTZvs82w2W7FlDcPAarXy6KOPMmHCBBITE2nfvj1169YlISGBwsJC7r333rL+CHe9KlWqFHvv7u5uf22z2WjTpo3912d+fj5Xrlzh9OnTxY7fb9f5F5PJVGwU4sLCfz/34vdt39dbX+6sw4cPM27cOIYPH063bt1wc3PDMAxq167Nli1b+Prrr9m1axd9+vRhy5Ytri63zOjMwUXmzZvHJ598Qp8+fZg2bRpHjhyxz+vYsSNxcXFYrVauXr3Kp59+SocOHXB3d6dVq1asWrWK9u3b07FjR5YtW0ZAQIALP0nl1LJlS7777jtSUlIAWLp0KfPmzePBBx8kOzubY8eOAVz3j0nt2rX59ddfyc/P58KFC8WaFKXs7d+/n/bt2zNw4ECaNGnC119/jdVqZfv27bz++us8+eSTREZG4unpyZkzZ3B3d6eoqMjVZTudzhxcJDQ0lHHjxrFp0ybc3d2ZPn06MTExAPzlL3/h5MmT9OrVi8LCQnr27EnXrl0BCAgIYP/+/TRu3BiLxUJ2drb6G1zAYrEwZ84cxo4di81mw8fHhzfeeIMqVaowf/58xo8fj9lspkWLFg7rNm3alICAAIKCgmjYsCFt27Z1wSeQf3n22WcJCwsjODiYKlWq8OCDD3Lq1ClGjx7Ntm3bCAoKwsPDg8DAQB588EEuXrzIxIkTqVevnv0y5buRBt4TEREHalYSEREHCgcREXGgcBAREQcKBxERcaBwEBERBwoHkRI6dOgQXbp0ccq2n3/+ec6dO+eUbYv8EQoHkXLg66+/dnUJIsXoJjipkK5cucLkyZNJTU3Fzc2Nhx9+mKCgIGJiYvDx8eHXX3+lWrVqREdH07hxYwoKCoiJiWH//v1YrVZatGhBZGQkXl5epKenM3PmTM6cOUNhYSFBQUGMGjUKgDVr1vDBBx/g5eVFs2bNSlRbSkoK06ZN49y5c7i5ufHyyy/z7LPP8uWXX7J8+XIKCgo4d+4cvXv3ZuzYsUyePBmAYcOG8c477+Dm5nbDemJjY3nnnXeoVq0aHTt25MMPP+TIkSMUFhYSHR1NUlIS7u7utGzZksmTJ+Pl5UWXLl1o2bIlx48fp2fPnqxdu5Yvv/wSNzc3rl69SpcuXYiPj6du3brOOVhSMRkiFdCmTZuM559/3jAMwygqKjKmTJlirF+/3mjevLmxf/9+wzAMY82aNUafPn0MwzCMt956y4iOjjZsNpthGIbx5ptvGtOnTzcMwzBCQ0ON7du3G4ZhGHl5eUZoaKixZcsW48iRI4afn5+RkZFhGIZhTJ061XjqqaduWVvv3r2N1atXG4ZhGKdPnzaefvpp4+LFi8aQIUOMlJQUwzAM4+zZs8ZDDz1kZGdnG4ZhGM2aNbO/vlE9P/74o+Hn52ecOXPG/pmaNWtmGIZhLFy40AgLCzMKCgoMq9VqTJo0yZg6daphGIbx1FNPGYsXL7bX17NnT2Pnzp2GYRjGhg0bjPDw8JJ/8VJp6MxBKqS2bduyYMECQkNDefzxxxk2bBjnzp2jefPmtGvXDoB+/foxc+ZMzp8/z86dO7l06RKJiYnAtcHu6tatS25uLvv37ycnJ4eFCxcCkJuby7Fjxzh79iydOnXCYrEA14Y12b17903runDhAseOHWPAgAEANGjQwP4MiGXLlrFz507i4+P5+eefMQyDq1evFlv/ZvVkZGTQqVMn6tevD8CQIUN46623gGvDtoeHh9sHCwwNDWX06NH27f7rOwEYPHgw69evJyAggHXr1jFhwoTSfv1SCSgcpEJq1KgRX3zxBXv37mXPnj0899xzREZGOoxiahgG7u7u2Gw2IiIi7IMUXrlyhfz8fGw2G4ZhsHbtWqpXrw7AuXPn8PDwYP369cVGTy3JCKlm87X/UiaTyT7txIkT1K9fnz59+vCnP/2Jdu3a0a9fP/7+978X2z5w03piY2NvWM/vR/K12WzFRnv19PS0vw4ODmb+/Pns2bOH3NxcHnvssVt+Lql81CEtFdKaNWuYPHkyTzzxBOPHj+eJJ57go48+4tixY/YRUdetW0ebNm2oWbOmfX5BQQE2m42pU6cyf/58vLy8aN26tf2ZwBcvXmTgwIFs376dxx9/nK+//pqzZ88CFBtW/Ua8vLx4+OGHiYuLA+DMmTMMHDiQH374gcuXLzN27Fi6dOnCvn377LUA9pE+b1bPE088QVJSEunp6QBs2LDBvt/OnTuzdu1aCgsLsdlsfPTRR3Tq1Om6NVavXp2ePXsSERFBSEjIH/j2pTLQwHtSIeXm5hIREcHx48epXr069957L7169WL27Nk0b96ctLQ06tSpw+zZs7nvvvvIy8tj7ty57Nu3D6vVykMPPcSsWbPw8vLi1KlTzJo1i9OnT1NQUECPHj0YM2YMcK0DePny5dxzzz20bNmSr776ih07dty0ttTUVGbMmEFWVhYmk4kxY8bQpUsXIiMj2bt3LzVr1uT+++/np59+YtKkSXTu3JnXXnuN77//nqVLl+Lp6XnDeuLj41m2bBlVq1bloYceYsuWLXz33XfFPl9RUREtW7Zk6tSp1KxZky5durBw4UL++7//217jsWPHCAkJ4R//+Ac1atRw3oGSCkvhIHeNvXv3MmvWLOLj411dilP8+uuvfPLJJ7zyyiu4ubnx+eef8+677xY7gygJwzB49913SUtLY8aMGU6qVio69TmIlNLmzZtZsWLFdecFBwfz4osvOmW/9evXJyMjg+DgYNzd3alRowZz5swp9Xaefvpp6tSpw9tvv+2EKuVuoTMHERFxoA5pERFxoHAQEREHCgcREXGgcBAREQcKBxERcaBwEBERB/8/PIZiHnma5j0AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(data=pokemon, x=\"speed_category\", hue=\"legendary\")" + ] + }, { "cell_type": "markdown", "id": "fd508c13-9900-4be1-958f-4f9e9e9b633a", @@ -2179,7 +2232,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 33, "id": "444d9832-bd57-4238-9ea4-5ee898847170", "metadata": {}, "outputs": [ @@ -2189,7 +2242,7 @@ "" ] }, - "execution_count": 31, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, @@ -2218,7 +2271,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 34, "id": "86f9747b-00a3-407f-9b73-0bce40bac50d", "metadata": {}, "outputs": [ @@ -2228,7 +2281,7 @@ "" ] }, - "execution_count": 32, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" }, @@ -2257,17 +2310,17 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 35, "id": "7385237c-6a5c-4041-af46-559d6d84d1fa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 33, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" }, @@ -2299,7 +2352,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 36, "id": "3b268a30-42ff-4ab8-b2cd-c58a76121f9c", "metadata": {}, "outputs": [], @@ -2317,7 +2370,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 37, "id": "ee30c851-14b1-4901-9182-4304d54d53a6", "metadata": {}, "outputs": [], @@ -2335,7 +2388,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 38, "id": "13eeecd8-2518-4ed9-aac5-727a96b5bf80", "metadata": {}, "outputs": [], @@ -2353,7 +2406,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 39, "id": "4ee2eb69-2f9a-42e7-b5d3-9499631bfd06", "metadata": {}, "outputs": [], @@ -2361,17 +2414,35 @@ "# Your code here" ] }, + { + "cell_type": "markdown", + "id": "84b1e240-4f75-4c86-8c1f-1026aa223717", + "metadata": {}, + "source": [ + "**1.5.4.** Create a plot showing the number of people at each income level, for each education level. " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "d7e02da8-beab-40e7-95d0-74a5c2bc838e", + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] + }, { "cell_type": "markdown", "id": "ac717580-4157-402c-9262-b2b50dfe606f", "metadata": {}, "source": [ - "**1.5.4.** Plot side-by-side scatter plots showing the relationship between height and weight for males and females. " + "**1.5.5.** Plot side-by-side scatter plots showing the relationship between height and weight for males and females. (There are so many overlapping dots that the plot will be more informative if you lower the opacity of each dot. Try using `alpha=0.1` and `edgecolor=None`.)" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 41, "id": "b00dd7d6-226b-469c-86d8-b71b328aa576", "metadata": {}, "outputs": [], @@ -2386,8 +2457,34 @@ "source": [ "## 2. Crafting a data argument\n", "\n", - "Everything up to here are just tools, worthless without a clear research question and a convincing argument. Frame a question that interests you which might be answerable using the `people` dataset. Then do your best to find the answer in the space below." + "Everything up to here are just tools, worthless without a clear research question and a convincing argument. Choose a research question that interests you which might be answerable using the `people` dataset. Then do your best to find the answer in the space below. This answer should include data analysis (code cells) as well as written argument (text cells) explaining what the data means and why you believe it answers your question. \n", + "\n", + "Examples of research questions might include:\n", + "\n", + "- Do older people tend to have higher incomes?\n", + "- Do people who sleep at least 6 hours a night tend to report better health? \n", + "- Is it more common for males to be bisexual than females?\n", + "\n", + "**A note of caution:** this lab has given you tools for exploring associations--patterns that tend to co-occur. These tools *do not* equip you to argue that one variable causes another to change. For example: Plot 1.5.4 showed that people who are taller also tend to be heaver, with a lot of individual variation. But are people heavier *because* they are taller? Are they taller because they are heavier? Or maybe neither variable causes the other--perhaps they're both caused by something else. If you want to be able to answer questions like these, take a course on statistics." ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "6f934273-b829-4bc2-a7f4-a27a3fc44a99", + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here. Feel free to add new text cells and code cells as necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b4b852b-402c-45d4-b3bb-840e47b249ed", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {