From ca517d7d43fa07bbfa876fab3a1f4622d4546ce5 Mon Sep 17 00:00:00 2001 From: tsmith37 Date: Tue, 4 Nov 2025 20:22:03 -0500 Subject: [PATCH] I cleaned up my data but adding a colunm and removing one I learned how to change data set by using google sheets --- .ipynb_checkpoints/argument-checkpoint.ipynb | 69 ++++++++++---------- argument.ipynb | 69 ++++++++++---------- data/Bad_Driver - bad-drivers.csv | 52 +++++++++++++++ 3 files changed, 122 insertions(+), 68 deletions(-) create mode 100644 data/Bad_Driver - bad-drivers.csv diff --git a/.ipynb_checkpoints/argument-checkpoint.ipynb b/.ipynb_checkpoints/argument-checkpoint.ipynb index 20be531..ecde3e4 100644 --- a/.ipynb_checkpoints/argument-checkpoint.ipynb +++ b/.ipynb_checkpoints/argument-checkpoint.ipynb @@ -5,7 +5,7 @@ "id": "worldwide-blood", "metadata": {}, "source": [ - "# Introduction" + "# A Data Science Investigation About Fatal Car Crashes in America " ] }, { @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "id": "technical-evans", "metadata": {}, "outputs": [], @@ -59,14 +59,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "overhead-sigma", "metadata": {}, "outputs": [], "source": [ "### 💻 FILL IN YOUR DATASET FILE NAME BELOW 💻 ###\n", "\n", - "file_name = \"bad-drivers.csv\"\n", + "file_name = \"Bad_Driver - bad-drivers.csv\"\n", "dataset_path = \"data/\" + file_name\n", "\n", "df = pd.read_csv(dataset_path)" @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "heated-blade", "metadata": {}, "outputs": [ @@ -106,7 +106,7 @@ " Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted\n", " Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents\n", " Car Insurance Premiums ($)\n", - " Losses incurred by insurance companies for collisions per insured driver ($)\n", + " Region\n", " \n", " \n", " \n", @@ -119,7 +119,7 @@ " 96\n", " 80\n", " 784.55\n", - " 145.08\n", + " Southeast\n", " \n", " \n", " 1\n", @@ -130,7 +130,7 @@ " 90\n", " 94\n", " 1053.48\n", - " 133.93\n", + " West\n", " \n", " \n", " 2\n", @@ -141,7 +141,7 @@ " 84\n", " 96\n", " 899.47\n", - " 110.35\n", + " Southeast\n", " \n", " \n", " 3\n", @@ -152,7 +152,7 @@ " 94\n", " 95\n", " 827.34\n", - " 142.39\n", + " Southeast\n", " \n", " \n", " 4\n", @@ -163,7 +163,7 @@ " 91\n", " 89\n", " 878.41\n", - " 165.63\n", + " West\n", " \n", " \n", "\n", @@ -212,22 +212,15 @@ "3 95 \n", "4 89 \n", "\n", - " Car Insurance Premiums ($) \\\n", - "0 784.55 \n", - "1 1053.48 \n", - "2 899.47 \n", - "3 827.34 \n", - "4 878.41 \n", - "\n", - " Losses incurred by insurance companies for collisions per insured driver ($) \n", - "0 145.08 \n", - "1 133.93 \n", - "2 110.35 \n", - "3 142.39 \n", - "4 165.63 " + " Car Insurance Premiums ($) Region \n", + "0 784.55 Southeast \n", + "1 1053.48 West \n", + "2 899.47 Southeast \n", + "3 827.34 Southeast \n", + "4 878.41 West " ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -245,23 +238,31 @@ "\n", "*✏️ Write 2-3 sentences describing this dataset. Be sure to include where the data comes from and what it contains.*\n", "\n", + "### When is this data set from?\n", + "\n", "I got the data set from FiveThirtyEight. It was used for an article called\n", "\"Dear Mona, Which state has the worst drivers?\" in October 2014. The person who wrote the article is Mona Chalabi, they are a data editor at the Guardian US, \n", "a columnist at New York Margazine, and a lead news writer for FiveThirtyEight.\n", "\n", "The date is about fatal collisions in each state. There are 7 rows, some of the rows\n", "are about \"Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired\" and \"Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted\"\n", + "\n", + "### How did this data set get clean?\n", + "\n", + "I did not need to do much cleaning of the data myself, but I did add a column called \"Region\" to separate the state into 5 different regions: Northwest, Midwest, Southeast, West, and Northeast. I also excluded data on Losses incurred by insurance companies for collisions per insured driver because insurance companies are well known for finding ways to get out of paying customers for collisions, thus it is not an accurate representation of fatal car crashes. \n", + "\n", + "## What specific research questions will you investigate?\n", + "\n", + "1. Is drinking and driving the biggest cause of fatal collisions?\n", + "\n", + "2. What state are you most likely to get into a fatal collision for their first accident?\n", + "\n", + "3. What state is the most unlucky state for fatal collisions?\n", + "\n", + "4. Is there a connection between the speed and the roads that are causing fatal collisions, that would make the Car Insurance Premiums more expensive?\n", "\n" ] }, - { - "cell_type": "markdown", - "id": "6ba44c9c-60d1-46a4-8257-b4e8eeea348d", - "metadata": {}, - "source": [ - "I will recategorise the data so that all of the states data will be separated into the five regions of the United States" - ] - }, { "cell_type": "code", "execution_count": 40, @@ -375,7 +376,7 @@ "id": "recognized-positive", "metadata": {}, "source": [ - "## First Research Question: Is drinking and driving the biggest cause of fatal collisions?\\" + "## First Research Question: Is drinking and driving the biggest cause of fatal collisions?" ] }, { diff --git a/argument.ipynb b/argument.ipynb index 20be531..ecde3e4 100644 --- a/argument.ipynb +++ b/argument.ipynb @@ -5,7 +5,7 @@ "id": "worldwide-blood", "metadata": {}, "source": [ - "# Introduction" + "# A Data Science Investigation About Fatal Car Crashes in America " ] }, { @@ -47,7 +47,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "id": "technical-evans", "metadata": {}, "outputs": [], @@ -59,14 +59,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "id": "overhead-sigma", "metadata": {}, "outputs": [], "source": [ "### 💻 FILL IN YOUR DATASET FILE NAME BELOW 💻 ###\n", "\n", - "file_name = \"bad-drivers.csv\"\n", + "file_name = \"Bad_Driver - bad-drivers.csv\"\n", "dataset_path = \"data/\" + file_name\n", "\n", "df = pd.read_csv(dataset_path)" @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "heated-blade", "metadata": {}, "outputs": [ @@ -106,7 +106,7 @@ " Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted\n", " Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents\n", " Car Insurance Premiums ($)\n", - " Losses incurred by insurance companies for collisions per insured driver ($)\n", + " Region\n", " \n", " \n", " \n", @@ -119,7 +119,7 @@ " 96\n", " 80\n", " 784.55\n", - " 145.08\n", + " Southeast\n", " \n", " \n", " 1\n", @@ -130,7 +130,7 @@ " 90\n", " 94\n", " 1053.48\n", - " 133.93\n", + " West\n", " \n", " \n", " 2\n", @@ -141,7 +141,7 @@ " 84\n", " 96\n", " 899.47\n", - " 110.35\n", + " Southeast\n", " \n", " \n", " 3\n", @@ -152,7 +152,7 @@ " 94\n", " 95\n", " 827.34\n", - " 142.39\n", + " Southeast\n", " \n", " \n", " 4\n", @@ -163,7 +163,7 @@ " 91\n", " 89\n", " 878.41\n", - " 165.63\n", + " West\n", " \n", " \n", "\n", @@ -212,22 +212,15 @@ "3 95 \n", "4 89 \n", "\n", - " Car Insurance Premiums ($) \\\n", - "0 784.55 \n", - "1 1053.48 \n", - "2 899.47 \n", - "3 827.34 \n", - "4 878.41 \n", - "\n", - " Losses incurred by insurance companies for collisions per insured driver ($) \n", - "0 145.08 \n", - "1 133.93 \n", - "2 110.35 \n", - "3 142.39 \n", - "4 165.63 " + " Car Insurance Premiums ($) Region \n", + "0 784.55 Southeast \n", + "1 1053.48 West \n", + "2 899.47 Southeast \n", + "3 827.34 Southeast \n", + "4 878.41 West " ] }, - "execution_count": 8, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -245,23 +238,31 @@ "\n", "*✏️ Write 2-3 sentences describing this dataset. Be sure to include where the data comes from and what it contains.*\n", "\n", + "### When is this data set from?\n", + "\n", "I got the data set from FiveThirtyEight. It was used for an article called\n", "\"Dear Mona, Which state has the worst drivers?\" in October 2014. The person who wrote the article is Mona Chalabi, they are a data editor at the Guardian US, \n", "a columnist at New York Margazine, and a lead news writer for FiveThirtyEight.\n", "\n", "The date is about fatal collisions in each state. There are 7 rows, some of the rows\n", "are about \"Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired\" and \"Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted\"\n", + "\n", + "### How did this data set get clean?\n", + "\n", + "I did not need to do much cleaning of the data myself, but I did add a column called \"Region\" to separate the state into 5 different regions: Northwest, Midwest, Southeast, West, and Northeast. I also excluded data on Losses incurred by insurance companies for collisions per insured driver because insurance companies are well known for finding ways to get out of paying customers for collisions, thus it is not an accurate representation of fatal car crashes. \n", + "\n", + "## What specific research questions will you investigate?\n", + "\n", + "1. Is drinking and driving the biggest cause of fatal collisions?\n", + "\n", + "2. What state are you most likely to get into a fatal collision for their first accident?\n", + "\n", + "3. What state is the most unlucky state for fatal collisions?\n", + "\n", + "4. Is there a connection between the speed and the roads that are causing fatal collisions, that would make the Car Insurance Premiums more expensive?\n", "\n" ] }, - { - "cell_type": "markdown", - "id": "6ba44c9c-60d1-46a4-8257-b4e8eeea348d", - "metadata": {}, - "source": [ - "I will recategorise the data so that all of the states data will be separated into the five regions of the United States" - ] - }, { "cell_type": "code", "execution_count": 40, @@ -375,7 +376,7 @@ "id": "recognized-positive", "metadata": {}, "source": [ - "## First Research Question: Is drinking and driving the biggest cause of fatal collisions?\\" + "## First Research Question: Is drinking and driving the biggest cause of fatal collisions?" ] }, { diff --git a/data/Bad_Driver - bad-drivers.csv b/data/Bad_Driver - bad-drivers.csv new file mode 100644 index 0000000..066db1b --- /dev/null +++ b/data/Bad_Driver - bad-drivers.csv @@ -0,0 +1,52 @@ +State,Number of drivers involved in fatal collisions per billion miles,Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding,Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired,Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted,Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents,Car Insurance Premiums ($),Region +Alabama,18.8,39,30,96,80,784.55,Southeast +Alaska,18.1,41,25,90,94,1053.48,West +Arizona,18.6,35,28,84,96,899.47,Southeast +Arkansas,22.4,18,26,94,95,827.34,Southeast +California,12,35,28,91,89,878.41,West +Colorado,13.6,37,28,79,95,835.5,West +Connecticut,10.8,46,36,87,82,1068.73,Northwest +Delaware,16.2,38,30,87,99,1137.87,Northwest +District of Columbia,5.9,34,27,100,100,1273.89,Northwest +Florida,17.9,21,29,92,94,1160.13,Southeast +Georgia,15.6,19,25,95,93,913.15,Southeast +Hawaii,17.5,54,41,82,87,861.18,West +Idaho,15.3,36,29,85,98,641.96,West +Illinois,12.8,36,34,94,96,803.11,Midwest +Indiana,14.5,25,29,95,95,710.46,Midwest +Iowa,15.7,17,25,97,87,649.06,Midwest +Kansas,17.8,27,24,77,85,780.45,Midwest +Kentucky,21.4,19,23,78,76,872.51,Southeast +Louisiana,20.5,35,33,73,98,1281.55,Southeast +Maine,15.1,38,30,87,84,661.88,Northeast +Maryland,12.5,34,32,71,99,1048.78,Northeast +Massachusetts,8.2,23,35,87,80,1011.14,Northeast +Michigan,14.1,24,28,95,77,1110.61,Midwest +Minnesota,9.6,23,29,88,88,777.18,Midwest +Mississippi,17.6,15,31,10,100,896.07,Southeast +Missouri,16.1,43,34,92,84,790.32,Midwest +Montana,21.4,39,44,84,85,816.21,West +Nebraska,14.9,13,35,93,90,732.28,Midwest +Nevada,14.7,37,32,95,99,1029.87,West +New Hampshire,11.6,35,30,87,83,746.54,Northeast +New Jersey,11.2,16,28,86,78,1301.52,Northeast +New Mexico,18.4,19,27,67,98,869.85,Southeast +New York,12.3,32,29,88,80,1234.31,Northeast +North Carolina,16.8,39,31,94,81,708.24,Southeast +North Dakota,23.9,23,42,99,86,688.75,Midwest +Ohio,14.1,28,34,99,82,697.73,Midwest +Oklahoma,19.9,32,29,92,94,881.51,Southeast +Oregon,12.8,33,26,67,90,804.71,West +Pennsylvania,18.2,50,31,96,88,905.99,Northeast +Rhode Island,11.1,34,38,92,79,1148.99,Northeast +South Carolina,23.9,38,41,96,81,858.97,Southeast +South Dakota,19.4,31,33,98,86,669.31,Midwest +Tennessee,19.5,21,29,82,81,767.91,Southeast +Texas,19.4,40,38,91,87,1004.75,Southeast +Utah,11.3,43,16,88,96,809.38,West +Vermont,13.6,30,30,96,95,716.2,Northeast +Virginia,12.7,19,27,87,88,768.95,Southeast +Washington,10.6,42,33,82,86,890.03,West +West Virginia,23.8,34,28,97,87,992.61,Southeast +Wisconsin,13.8,36,33,39,84,670.31,Midwest +Wyoming,17.4,42,32,81,90,791.14,West \ No newline at end of file