{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cleaning up our vehicle weight data\n", "\n", "Before we can analyze our data, we'll need to combine vehicle weights with makes and models, as well as clean up the results a bit." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<p class=\"reading-options\">\n <a class=\"btn\" href=\"/car-crashes-weight-regression/02-create-make-model-weights-csv\">\n <i class=\"fa fa-sm fa-book\"></i>\n Read online\n </a>\n <a class=\"btn\" href=\"/car-crashes-weight-regression/notebooks/02 - Create make model weights csv.ipynb\">\n <i class=\"fa fa-sm fa-download\"></i>\n Download notebook\n </a>\n <a class=\"btn\" href=\"https://colab.research.google.com/github/littlecolumns/ds4j-notebooks/blob/master/car-crashes-weight-regression/notebooks/02 - Create make model weights csv.ipynb\" target=\"_new\">\n <i class=\"fa fa-sm fa-laptop\"></i>\n Interactive version\n </a>\n</p>" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "import glob\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Read in all of the vehicle weight csv files" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['vehicle-data/MERCURY.csv',\n", " 'vehicle-data/LINCOLN.csv',\n", " 'vehicle-data/DELOREAN.csv',\n", " 'vehicle-data/LAMBORGHINI.csv',\n", " 'vehicle-data/SUZUKI.csv']" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filenames = glob.glob(\"vehicle-data/*.csv\")\n", "filenames[:5]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "60" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(filenames)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Anti-Brake System</th>\n", " <th>Curb Weight-automatic</th>\n", " <th>Curb Weight-manual</th>\n", " <th>Engine Type</th>\n", " <th>Front Brake Type</th>\n", " <th>Fuel Economy-city</th>\n", " <th>Fuel Economy-highway</th>\n", " <th>Ground Clearance</th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " <th>Rear Brake Type</th>\n", " <th>Tires</th>\n", " <th>Transmission-long</th>\n", " <th>Wheelbase</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Non-ABS</td>\n", " <td>2500 lbs</td>\n", " <td>2423 lbs</td>\n", " <td>1.6L L4 DOHC 16V</td>\n", " <td>Disc</td>\n", " <td>23 - 25 miles/gallon</td>\n", " <td>26 - 31 miles/gallon</td>\n", " <td>NaN</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>Disc</td>\n", " <td>185/60R14</td>\n", " <td>4-Speed Automatic | 5-Speed Manual</td>\n", " <td>94.70 in.</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Non-ABS</td>\n", " <td>2500 lbs</td>\n", " <td>2423 lbs</td>\n", " <td>1.6L L4 DOHC 16V</td>\n", " <td>Disc</td>\n", " <td>23 - 25 miles/gallon</td>\n", " <td>26 - 31 miles/gallon</td>\n", " <td>NaN</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>Disc</td>\n", " <td>185/60R14</td>\n", " <td>4-Speed Automatic | 5-Speed Manual</td>\n", " <td>94.70 in.</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Anti-Brake System Curb Weight-automatic Curb Weight-manual \\\n", "0 Non-ABS 2500 lbs 2423 lbs \n", "1 Non-ABS 2500 lbs 2423 lbs \n", "\n", " Engine Type Front Brake Type Fuel Economy-city \\\n", "0 1.6L L4 DOHC 16V Disc 23 - 25 miles/gallon \n", "1 1.6L L4 DOHC 16V Disc 23 - 25 miles/gallon \n", "\n", " Fuel Economy-highway Ground Clearance Make Model Model Year \\\n", "0 26 - 31 miles/gallon NaN Mercury Capri 1994 \n", "1 26 - 31 miles/gallon NaN Mercury Capri 1994 \n", "\n", " Rear Brake Type Tires Transmission-long Wheelbase \n", "0 Disc 185/60R14 4-Speed Automatic | 5-Speed Manual 94.70 in. \n", "1 Disc 185/60R14 4-Speed Automatic | 5-Speed Manual 94.70 in. " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat([pd.read_csv(filename) for filename in filenames], sort=False, ignore_index=True)\n", "df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Cut it to be just the columns we're interested in" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Curb Weight-automatic</th>\n", " <th>Curb Weight-manual</th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2500 lbs</td>\n", " <td>2423 lbs</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2500 lbs</td>\n", " <td>2423 lbs</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2492 lbs</td>\n", " <td>2404 lbs</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1993</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2492 lbs</td>\n", " <td>2404 lbs</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1993</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2492 lbs</td>\n", " <td>2404 lbs</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1992</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Curb Weight-automatic Curb Weight-manual Make Model Model Year\n", "0 2500 lbs 2423 lbs Mercury Capri 1994\n", "1 2500 lbs 2423 lbs Mercury Capri 1994\n", "2 2492 lbs 2404 lbs Mercury Capri 1993\n", "3 2492 lbs 2404 lbs Mercury Capri 1993\n", "4 2492 lbs 2404 lbs Mercury Capri 1992" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = df[['Curb Weight-automatic', 'Curb Weight-manual', 'Make', 'Model', 'Model Year']].copy()\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Remove 'lbs' from the weight columns, then average the two" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Curb Weight-automatic</th>\n", " <th>Curb Weight-manual</th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2500</td>\n", " <td>2423</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2500</td>\n", " <td>2423</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Curb Weight-automatic Curb Weight-manual Make Model Model Year\n", "0 2500 2423 Mercury Capri 1994\n", "1 2500 2423 Mercury Capri 1994" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Curb Weight-automatic'] = df['Curb Weight-automatic'].str.replace(' lbs', '')\n", "df['Curb Weight-manual'] = df['Curb Weight-manual'].str.replace(' lbs', '')\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Curb Weight-automatic</th>\n", " <th>Curb Weight-manual</th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2500</td>\n", " <td>2423</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>2500.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2500</td>\n", " <td>2423</td>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>2500.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Curb Weight-automatic Curb Weight-manual Make Model Model Year weight\n", "0 2500 2423 Mercury Capri 1994 2500.0\n", "1 2500 2423 Mercury Capri 1994 2500.0" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Average the automatic and manual weights\n", "df['weight'] = df[['Curb Weight-automatic','Curb Weight-automatic']].astype(float).mean(axis=1)\n", "df.head(2)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>2500.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1994</td>\n", " <td>2500.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1993</td>\n", " <td>2492.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1993</td>\n", " <td>2492.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Mercury</td>\n", " <td>Capri</td>\n", " <td>1992</td>\n", " <td>2492.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Make Model Model Year weight\n", "0 Mercury Capri 1994 2500.0\n", "1 Mercury Capri 1994 2500.0\n", "2 Mercury Capri 1993 2492.0\n", "3 Mercury Capri 1993 2492.0\n", "4 Mercury Capri 1992 2492.0" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.drop(columns=['Curb Weight-automatic', 'Curb Weight-manual'], inplace=True)\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Maybe there are multiples per make/model/year, so we'll average those, too" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>Model Year</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Acura</td>\n", " <td>CL</td>\n", " <td>1997</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Acura</td>\n", " <td>CL</td>\n", " <td>1998</td>\n", " <td>3215</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Acura</td>\n", " <td>CL</td>\n", " <td>1999</td>\n", " <td>3285</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Acura</td>\n", " <td>CL</td>\n", " <td>2001</td>\n", " <td>3470</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Acura</td>\n", " <td>CL</td>\n", " <td>2002</td>\n", " <td>3470</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Make Model Model Year weight\n", "0 Acura CL 1997 3009\n", "1 Acura CL 1998 3215\n", "2 Acura CL 1999 3285\n", "3 Acura CL 2001 3470\n", "4 Acura CL 2002 3470" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now let's group by make, model and year and get the average weight\n", "weights = df.groupby(['Make', 'Model', 'Model Year']).weight.mean().to_frame().reset_index()\n", "\n", "# Remove anything that doesn't have a weight\n", "weights.dropna(subset=['weight'], inplace=True)\n", "\n", "# Make the weights integers because we don't need that specificity\n", "weights.weight = weights.weight.astype(int)\n", "\n", "# What do we have?\n", "weights.head()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4323, 4)" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weights.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Standardization\n", "\n", "We'll need this to match content later\n", "\n", "* Rename Model Year column\n", "* Capitalize Make/Model" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "weights.rename(columns={'Model Year': 'ModelYear'}, inplace=True)\n", "weights.ModelYear = weights.ModelYear.astype(str)\n", "weights.Make = weights.Make.str.upper()\n", "weights.Model = weights.Model.str.upper()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Interpolate missing values\n", "\n", "Sometimes we have 2005 and 2007 but not 2006. We'll interpolate those." ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th>Make</th>\n", " <th colspan=\"10\" halign=\"left\">ACURA</th>\n", " <th>...</th>\n", " <th colspan=\"10\" halign=\"left\">VOLVO</th>\n", " </tr>\n", " <tr>\n", " <th>Model</th>\n", " <th>CL</th>\n", " <th>ILX</th>\n", " <th>INTEGRA</th>\n", " <th>LEGEND</th>\n", " <th>MDX</th>\n", " <th>NSX</th>\n", " <th>RDX</th>\n", " <th>RL</th>\n", " <th>RLX</th>\n", " <th>SLX</th>\n", " <th>...</th>\n", " <th>S70</th>\n", " <th>S80</th>\n", " <th>S90</th>\n", " <th>V40</th>\n", " <th>V50</th>\n", " <th>V60</th>\n", " <th>V70</th>\n", " <th>XC60</th>\n", " <th>XC70</th>\n", " <th>XC90</th>\n", " </tr>\n", " <tr>\n", " <th>ModelYear</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1988</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1989</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1990</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1991</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1992</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>NaN</td>\n", " <td>3098.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1993</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2615.0</td>\n", " <td>3446.0</td>\n", " <td>NaN</td>\n", " <td>3097.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1994</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2670.0</td>\n", " <td>3560.0</td>\n", " <td>NaN</td>\n", " <td>3109.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1995</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2670.0</td>\n", " <td>3560.0</td>\n", " <td>NaN</td>\n", " <td>3208.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1996</th>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>2670.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3208.0</td>\n", " <td>NaN</td>\n", " <td>3660.0</td>\n", " <td>NaN</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1997</th>\n", " <td>3009.0</td>\n", " <td>NaN</td>\n", " <td>2703.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>3069.0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>10 rows \u00d7 649 columns</p>\n", "</div>" ], "text/plain": [ "Make ACURA \\\n", "Model CL ILX INTEGRA LEGEND MDX NSX RDX RL RLX SLX \n", "ModelYear \n", "1988 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1989 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1990 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1991 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1992 NaN NaN 2615.0 3486.0 NaN 3098.0 NaN NaN NaN NaN \n", "1993 NaN NaN 2615.0 3446.0 NaN 3097.0 NaN NaN NaN NaN \n", "1994 NaN NaN 2670.0 3560.0 NaN 3109.0 NaN NaN NaN NaN \n", "1995 NaN NaN 2670.0 3560.0 NaN 3208.0 NaN NaN NaN NaN \n", "1996 NaN NaN 2670.0 NaN NaN 3208.0 NaN 3660.0 NaN 4315.0 \n", "1997 3009.0 NaN 2703.0 NaN NaN 3069.0 NaN NaN NaN NaN \n", "\n", "Make ... VOLVO \n", "Model ... S70 S80 S90 V40 V50 V60 V70 XC60 XC70 XC90 \n", "ModelYear ... \n", "1988 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1989 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1990 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1991 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1992 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1993 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1994 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1995 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1996 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "1997 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN \n", "\n", "[10 rows x 649 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Pivot so each type of car is a column\n", "weights_pivot = weights.pivot_table(values=\"weight\", index='ModelYear', columns=['Make', 'Model'])\n", "weights_pivot.head(10)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th>Make</th>\n", " <th colspan=\"10\" halign=\"left\">ACURA</th>\n", " <th>...</th>\n", " <th colspan=\"10\" halign=\"left\">VOLVO</th>\n", " </tr>\n", " <tr>\n", " <th>Model</th>\n", " <th>CL</th>\n", " <th>ILX</th>\n", " <th>INTEGRA</th>\n", " <th>LEGEND</th>\n", " <th>MDX</th>\n", " <th>NSX</th>\n", " <th>RDX</th>\n", " <th>RL</th>\n", " <th>RLX</th>\n", " <th>SLX</th>\n", " <th>...</th>\n", " <th>S70</th>\n", " <th>S80</th>\n", " <th>S90</th>\n", " <th>V40</th>\n", " <th>V50</th>\n", " <th>V60</th>\n", " <th>V70</th>\n", " <th>XC60</th>\n", " <th>XC70</th>\n", " <th>XC90</th>\n", " </tr>\n", " <tr>\n", " <th>ModelYear</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1988</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>4323.0</td>\n", " <td>3098.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1989</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>4323.0</td>\n", " <td>3098.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1990</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>4323.0</td>\n", " <td>3098.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1991</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>4323.0</td>\n", " <td>3098.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1992</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3486.0</td>\n", " <td>4323.0</td>\n", " <td>3098.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1993</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2615.0</td>\n", " <td>3446.0</td>\n", " <td>4323.0</td>\n", " <td>3097.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1994</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2670.0</td>\n", " <td>3560.0</td>\n", " <td>4323.0</td>\n", " <td>3109.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1995</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2670.0</td>\n", " <td>3560.0</td>\n", " <td>4323.0</td>\n", " <td>3208.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1996</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2670.0</td>\n", " <td>3560.0</td>\n", " <td>4323.0</td>\n", " <td>3208.0</td>\n", " <td>3968.0</td>\n", " <td>3660.0</td>\n", " <td>3933.0</td>\n", " <td>4315.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " <tr>\n", " <th>1997</th>\n", " <td>3009.0</td>\n", " <td>2910.0</td>\n", " <td>2703.0</td>\n", " <td>3560.0</td>\n", " <td>4323.0</td>\n", " <td>3069.0</td>\n", " <td>3968.0</td>\n", " <td>3609.0</td>\n", " <td>3933.0</td>\n", " <td>4465.0</td>\n", " <td>...</td>\n", " <td>3333.0</td>\n", " <td>3618.0</td>\n", " <td>4012.0</td>\n", " <td>2998.0</td>\n", " <td>3570.0</td>\n", " <td>3527.0</td>\n", " <td>3433.0</td>\n", " <td>4387.0</td>\n", " <td>3699.0</td>\n", " <td>4493.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>10 rows \u00d7 649 columns</p>\n", "</div>" ], "text/plain": [ "Make ACURA \\\n", "Model CL ILX INTEGRA LEGEND MDX NSX RDX RL \n", "ModelYear \n", "1988 3009.0 2910.0 2615.0 3486.0 4323.0 3098.0 3968.0 3660.0 \n", "1989 3009.0 2910.0 2615.0 3486.0 4323.0 3098.0 3968.0 3660.0 \n", "1990 3009.0 2910.0 2615.0 3486.0 4323.0 3098.0 3968.0 3660.0 \n", "1991 3009.0 2910.0 2615.0 3486.0 4323.0 3098.0 3968.0 3660.0 \n", "1992 3009.0 2910.0 2615.0 3486.0 4323.0 3098.0 3968.0 3660.0 \n", "1993 3009.0 2910.0 2615.0 3446.0 4323.0 3097.0 3968.0 3660.0 \n", "1994 3009.0 2910.0 2670.0 3560.0 4323.0 3109.0 3968.0 3660.0 \n", "1995 3009.0 2910.0 2670.0 3560.0 4323.0 3208.0 3968.0 3660.0 \n", "1996 3009.0 2910.0 2670.0 3560.0 4323.0 3208.0 3968.0 3660.0 \n", "1997 3009.0 2910.0 2703.0 3560.0 4323.0 3069.0 3968.0 3609.0 \n", "\n", "Make ... VOLVO \\\n", "Model RLX SLX ... S70 S80 S90 V40 V50 \n", "ModelYear ... \n", "1988 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1989 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1990 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1991 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1992 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1993 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1994 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1995 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1996 3933.0 4315.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "1997 3933.0 4465.0 ... 3333.0 3618.0 4012.0 2998.0 3570.0 \n", "\n", "Make \n", "Model V60 V70 XC60 XC70 XC90 \n", "ModelYear \n", "1988 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1989 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1990 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1991 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1992 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1993 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1994 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1995 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1996 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "1997 3527.0 3433.0 4387.0 3699.0 4493.0 \n", "\n", "[10 rows x 649 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Interpolate missing values in those columns\n", "weights_pivot = weights_pivot.interpolate(limit_direction='both')\n", "weights_pivot.head(10)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>ModelYear</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1988</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1989</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1990</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1991</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1992</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1993</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1994</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1995</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1996</td>\n", " <td>3009</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1997</td>\n", " <td>3009</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Make Model ModelYear weight\n", "0 ACURA CL 1988 3009\n", "1 ACURA CL 1989 3009\n", "2 ACURA CL 1990 3009\n", "3 ACURA CL 1991 3009\n", "4 ACURA CL 1992 3009\n", "5 ACURA CL 1993 3009\n", "6 ACURA CL 1994 3009\n", "7 ACURA CL 1995 3009\n", "8 ACURA CL 1996 3009\n", "9 ACURA CL 1997 3009" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert it back into the normal dataframe\n", "filled_in = weights_pivot.T.stack(dropna=False).reset_index()\n", "filled_in.rename(columns={0: 'weight'}, inplace=True)\n", "filled_in.dropna(inplace=True)\n", "filled_in.weight = filled_in.weight.astype(int)\n", "filled_in.head(10)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19470, 4)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filled_in.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Add in manual work " ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>ModelYear</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1988</td>\n", " <td>3009.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1989</td>\n", " <td>3009.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1990</td>\n", " <td>3009.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1991</td>\n", " <td>3009.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>ACURA</td>\n", " <td>CL</td>\n", " <td>1992</td>\n", " <td>3009.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Make Model ModelYear weight\n", "0 ACURA CL 1988 3009.0\n", "1 ACURA CL 1989 3009.0\n", "2 ACURA CL 1990 3009.0\n", "3 ACURA CL 1991 3009.0\n", "4 ACURA CL 1992 3009.0" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "manual_weights = pd.read_csv(\"weights_needed_completed.csv\", na_values='x')\n", "manual_filled = manual_weights.set_index(['make', 'model']).dropna(how='all').T.interpolate(limit_direction='both')\n", "manual_realigned = manual_filled.T.stack(dropna=False).reset_index().rename(columns={\n", " 'make': 'Make',\n", " 'model': 'Model',\n", " 'level_2': 'ModelYear',\n", " 0: 'weight'\n", "})\n", "filled_in = pd.concat([filled_in, manual_realigned], sort=False, ignore_index=True)\n", "filled_in.head()" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(19877, 4)" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "filled_in.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Save the output" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# Let's just save it right to the data folder\n", "filled_in.to_csv(\"data/weights.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th>make</th>\n", " <th>CHEVROLET</th>\n", " <th>DODGE</th>\n", " <th>LEXUS</th>\n", " <th>HYUNDAI</th>\n", " <th>LEXUS</th>\n", " <th>CHRYSLER</th>\n", " <th>TOYOTA</th>\n", " <th>FORD</th>\n", " <th>GMC</th>\n", " <th>LINCOLN</th>\n", " <th>CHEVROLET</th>\n", " </tr>\n", " <tr>\n", " <th>model</th>\n", " <th>SILVERADO</th>\n", " <th>RAM</th>\n", " <th>ES</th>\n", " <th>SANTA FE</th>\n", " <th>RX</th>\n", " <th>TOWN AND COUNTRY</th>\n", " <th>4-RUNNER</th>\n", " <th>E-350</th>\n", " <th>SIERRA</th>\n", " <th>TOWN CAR</th>\n", " <th>GMT-400</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1983</th>\n", " <td>4200.0</td>\n", " <td>4800.0</td>\n", " <td>3300.0</td>\n", " <td>3600.0</td>\n", " <td>3700.0</td>\n", " <td>4200.0</td>\n", " <td>3800.0</td>\n", " <td>5000.0</td>\n", " <td>4200.0</td>\n", " <td>4000.0</td>\n", " <td>4300.0</td>\n", " </tr>\n", " <tr>\n", " <th>1984</th>\n", " <td>4200.0</td>\n", " <td>4800.0</td>\n", " <td>3300.0</td>\n", " <td>3600.0</td>\n", " <td>3700.0</td>\n", " <td>4200.0</td>\n", " <td>3800.0</td>\n", " <td>5000.0</td>\n", " <td>4200.0</td>\n", " <td>4000.0</td>\n", " <td>4300.0</td>\n", " </tr>\n", " <tr>\n", " <th>1985</th>\n", " <td>4200.0</td>\n", " <td>4800.0</td>\n", " <td>3300.0</td>\n", " <td>3600.0</td>\n", " <td>3700.0</td>\n", " <td>4200.0</td>\n", " <td>3800.0</td>\n", " <td>5000.0</td>\n", " <td>4200.0</td>\n", " <td>4000.0</td>\n", " <td>4300.0</td>\n", " </tr>\n", " <tr>\n", " <th>1986</th>\n", " <td>4200.0</td>\n", " <td>4800.0</td>\n", " <td>3300.0</td>\n", " <td>3600.0</td>\n", " <td>3700.0</td>\n", " <td>4200.0</td>\n", " <td>3800.0</td>\n", " <td>5000.0</td>\n", " <td>4200.0</td>\n", " <td>4000.0</td>\n", " <td>4300.0</td>\n", " </tr>\n", " <tr>\n", " <th>1987</th>\n", " <td>4200.0</td>\n", " <td>4800.0</td>\n", " <td>3300.0</td>\n", " <td>3600.0</td>\n", " <td>3700.0</td>\n", " <td>4200.0</td>\n", " <td>3800.0</td>\n", " <td>5000.0</td>\n", " <td>4200.0</td>\n", " <td>4000.0</td>\n", " <td>4300.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "make CHEVROLET DODGE LEXUS HYUNDAI LEXUS CHRYSLER TOYOTA \\\n", "model SILVERADO RAM ES SANTA FE RX TOWN AND COUNTRY 4-RUNNER \n", "1983 4200.0 4800.0 3300.0 3600.0 3700.0 4200.0 3800.0 \n", "1984 4200.0 4800.0 3300.0 3600.0 3700.0 4200.0 3800.0 \n", "1985 4200.0 4800.0 3300.0 3600.0 3700.0 4200.0 3800.0 \n", "1986 4200.0 4800.0 3300.0 3600.0 3700.0 4200.0 3800.0 \n", "1987 4200.0 4800.0 3300.0 3600.0 3700.0 4200.0 3800.0 \n", "\n", "make FORD GMC LINCOLN CHEVROLET \n", "model E-350 SIERRA TOWN CAR GMT-400 \n", "1983 5000.0 4200.0 4000.0 4300.0 \n", "1984 5000.0 4200.0 4000.0 4300.0 \n", "1985 5000.0 4200.0 4000.0 4300.0 \n", "1986 5000.0 4200.0 4000.0 4300.0 \n", "1987 5000.0 4200.0 4000.0 4300.0 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Make</th>\n", " <th>Model</th>\n", " <th>ModelYear</th>\n", " <th>weight</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1983</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1984</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1985</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1986</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1987</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1988</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1989</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1990</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1991</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1992</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1993</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1994</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1995</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1996</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1997</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1998</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>1999</td>\n", " <td>4200.0</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2000</td>\n", " <td>4280.0</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2001</td>\n", " <td>4360.0</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2002</td>\n", " <td>4440.0</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2003</td>\n", " <td>4520.0</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2004</td>\n", " <td>4600.0</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2005</td>\n", " <td>4700.0</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2006</td>\n", " <td>4800.0</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2007</td>\n", " <td>4900.0</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2008</td>\n", " <td>5000.0</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2009</td>\n", " <td>5000.0</td>\n", " </tr>\n", " <tr>\n", " <th>27</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2010</td>\n", " <td>5000.0</td>\n", " </tr>\n", " <tr>\n", " <th>28</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2011</td>\n", " <td>5000.0</td>\n", " </tr>\n", " <tr>\n", " <th>29</th>\n", " <td>CHEVROLET</td>\n", " <td>SILVERADO</td>\n", " <td>2012</td>\n", " <td>5000.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>377</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1990</td>\n", " <td>4340.0</td>\n", " </tr>\n", " <tr>\n", " <th>378</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1991</td>\n", " <td>4360.0</td>\n", " </tr>\n", " <tr>\n", " <th>379</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1992</td>\n", " <td>4380.0</td>\n", " </tr>\n", " <tr>\n", " <th>380</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1993</td>\n", " <td>4400.0</td>\n", " </tr>\n", " <tr>\n", " <th>381</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1994</td>\n", " <td>4420.0</td>\n", " </tr>\n", " <tr>\n", " <th>382</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1995</td>\n", " <td>4440.0</td>\n", " </tr>\n", " <tr>\n", " <th>383</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1996</td>\n", " <td>4460.0</td>\n", " </tr>\n", " <tr>\n", " <th>384</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1997</td>\n", " <td>4480.0</td>\n", " </tr>\n", " <tr>\n", " <th>385</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1998</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>386</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>1999</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>387</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2000</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>388</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2001</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>389</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2002</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>390</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2003</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>391</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2004</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>392</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2005</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>393</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2006</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>394</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2007</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>395</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2008</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>396</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2009</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>397</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2010</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>398</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2011</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>399</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2012</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>400</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2013</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>401</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2014</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>402</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2015</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>403</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2016</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>404</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2017</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>405</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2018</td>\n", " <td>4500.0</td>\n", " </tr>\n", " <tr>\n", " <th>406</th>\n", " <td>CHEVROLET</td>\n", " <td>GMT-400</td>\n", " <td>2019</td>\n", " <td>4500.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>407 rows \u00d7 4 columns</p>\n", "</div>" ], "text/plain": [ " Make Model ModelYear weight\n", "0 CHEVROLET SILVERADO 1983 4200.0\n", "1 CHEVROLET SILVERADO 1984 4200.0\n", "2 CHEVROLET SILVERADO 1985 4200.0\n", "3 CHEVROLET SILVERADO 1986 4200.0\n", "4 CHEVROLET SILVERADO 1987 4200.0\n", "5 CHEVROLET SILVERADO 1988 4200.0\n", "6 CHEVROLET SILVERADO 1989 4200.0\n", "7 CHEVROLET SILVERADO 1990 4200.0\n", "8 CHEVROLET SILVERADO 1991 4200.0\n", "9 CHEVROLET SILVERADO 1992 4200.0\n", "10 CHEVROLET SILVERADO 1993 4200.0\n", "11 CHEVROLET SILVERADO 1994 4200.0\n", "12 CHEVROLET SILVERADO 1995 4200.0\n", "13 CHEVROLET SILVERADO 1996 4200.0\n", "14 CHEVROLET SILVERADO 1997 4200.0\n", "15 CHEVROLET SILVERADO 1998 4200.0\n", "16 CHEVROLET SILVERADO 1999 4200.0\n", "17 CHEVROLET SILVERADO 2000 4280.0\n", "18 CHEVROLET SILVERADO 2001 4360.0\n", "19 CHEVROLET SILVERADO 2002 4440.0\n", "20 CHEVROLET SILVERADO 2003 4520.0\n", "21 CHEVROLET SILVERADO 2004 4600.0\n", "22 CHEVROLET SILVERADO 2005 4700.0\n", "23 CHEVROLET SILVERADO 2006 4800.0\n", "24 CHEVROLET SILVERADO 2007 4900.0\n", "25 CHEVROLET SILVERADO 2008 5000.0\n", "26 CHEVROLET SILVERADO 2009 5000.0\n", "27 CHEVROLET SILVERADO 2010 5000.0\n", "28 CHEVROLET SILVERADO 2011 5000.0\n", "29 CHEVROLET SILVERADO 2012 5000.0\n", ".. ... ... ... ...\n", "377 CHEVROLET GMT-400 1990 4340.0\n", "378 CHEVROLET GMT-400 1991 4360.0\n", "379 CHEVROLET GMT-400 1992 4380.0\n", "380 CHEVROLET GMT-400 1993 4400.0\n", "381 CHEVROLET GMT-400 1994 4420.0\n", "382 CHEVROLET GMT-400 1995 4440.0\n", "383 CHEVROLET GMT-400 1996 4460.0\n", "384 CHEVROLET GMT-400 1997 4480.0\n", "385 CHEVROLET GMT-400 1998 4500.0\n", "386 CHEVROLET GMT-400 1999 4500.0\n", "387 CHEVROLET GMT-400 2000 4500.0\n", "388 CHEVROLET GMT-400 2001 4500.0\n", "389 CHEVROLET GMT-400 2002 4500.0\n", "390 CHEVROLET GMT-400 2003 4500.0\n", "391 CHEVROLET GMT-400 2004 4500.0\n", "392 CHEVROLET GMT-400 2005 4500.0\n", "393 CHEVROLET GMT-400 2006 4500.0\n", "394 CHEVROLET GMT-400 2007 4500.0\n", "395 CHEVROLET GMT-400 2008 4500.0\n", "396 CHEVROLET GMT-400 2009 4500.0\n", "397 CHEVROLET GMT-400 2010 4500.0\n", "398 CHEVROLET GMT-400 2011 4500.0\n", "399 CHEVROLET GMT-400 2012 4500.0\n", "400 CHEVROLET GMT-400 2013 4500.0\n", "401 CHEVROLET GMT-400 2014 4500.0\n", "402 CHEVROLET GMT-400 2015 4500.0\n", "403 CHEVROLET GMT-400 2016 4500.0\n", "404 CHEVROLET GMT-400 2017 4500.0\n", "405 CHEVROLET GMT-400 2018 4500.0\n", "406 CHEVROLET GMT-400 2019 4500.0\n", "\n", "[407 rows x 4 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }