Updates to lab and added codebook
This commit is contained in:
parent
1c08207a70
commit
60fb58f08f
|
@ -1,37 +0,0 @@
|
|||
# Clean BRFSS 2020
|
||||
# ----------------
|
||||
#
|
||||
# This module documents the process used to clean and simplify the
|
||||
# BRFSS data set used in this lab. Students don't need to interact with this.
|
||||
# Read more about BRFSS at https://www.cdc.gov/brfss/annual_data/annual_2020.html
|
||||
|
||||
# First, download and unzip https://www.cdc.gov/brfss/annual_data/2020/files/LLCP2020XPT.zip
|
||||
# You should now have a file called LLCP2020.XPT
|
||||
|
||||
import pandas as pd
|
||||
df = pd.read_sas("LLCP2020.XPT")
|
||||
df = df[odf.DISPCODE == 1100]
|
||||
df["sex"] = df["SEXVAR"].map({1: "male", 2: "female"})
|
||||
df = df[df.GENHLTH <= 5]
|
||||
df["health"] = df.GENHLTH.map({1:5, 2:4, 3:3, 4:2, 5:1})
|
||||
df = df[df.MEDCOST <= 2]
|
||||
df["no_doctor"] = df.MEDCOST.map({1: True, 2: False})
|
||||
df = df[df.EXERANY2 <= 2]
|
||||
df["exercise"] = df.EXERANY2.map({1: True, 2: False})
|
||||
df = df[df.SLEPTIM1 < 25]
|
||||
df["sleep"] = df.SLEPTIM1.astype(int)
|
||||
df = df[df.INCOME2 < 9]
|
||||
df["income"] = df.INCOME2.astype(int)
|
||||
df = df[~df.WTKG3.isna()]
|
||||
df["weight"] = df.WTKG3 / 100
|
||||
df = df[~df.HTM4.isna()]
|
||||
df["height"] = df.HTM4 / 100
|
||||
df = df[(df.SOFEMALE.isin([1, 2, 3, 4, 7, 9])) | (df.SOMALE.isin([1, 2, 3, 4, 7, 9]))]
|
||||
df["sexual_orientation"] = df.SOFEMALE
|
||||
df["sexual_orientation"].fillna(df.SOMALE, inplace=True)
|
||||
df["sexual_orientation"] = df["sexual_orientation"].map({1: "homosexual", 2: "heterosexual", 3: "bisexual", 4: "other", 7: "other", 9: "other"})
|
||||
df = df[df._EDUCAG.isin([1, 2, 3, 4])]
|
||||
df["education"] = df._EDUCAG.map({1: "none_completed", 2: "high_school", 3: "some_college", 4: "college"})
|
||||
df["age"] = df._AGE_G.map({1: 18, 2: 25, 3: 35, 4: 45, 5: 55, 6: 65})
|
||||
df = df[["age", "sex", "income", "education", "sexual_orientation", "height", "weight", "health", "no_doctor", "exercise", "sleep"]]
|
||||
df.to_csv("brfss_2020.csv", index=False)
|
131
lab_04.ipynb
131
lab_04.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue