Exploratory Data Analysis with Sweetviz and Model Prediction Using Transportation Dataset

Import Necessary Packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Preprocess training data

df_training = pd.read_csv("/content/drive/MyDrive/ml_projects/7_transportation_dataset_on_time_late_prediction/train_data.csv")

df_testing = pd.read_csv("/content/drive/MyDrive/ml_projects/7_transportation_dataset_on_time_late_prediction/test_data.csv")
# We do eda with a quite popular library sweetviz
# importing sweetviz
import sweetviz as sv
#analyzing the dataset
advert_report = sv.analyze(df_training)
#display the report
advert_report.show_html('/content/drive/MyDrive/ml_projects/7_transportation_dataset_on_time_late_prediction/output.html')

Checking Shape of training and testing dataset

print(df_training.shape)
print(df_testing.shape)

output:

(9000, 61)
(1478, 61)

df = pd.concat(objs = [df_training, df_testing])



Checking Shape of Combine Training and Testing Dataset

df.shape

output:

(10478, 61)

zip_codes = ['ConsigneeZip', 'ShipperZip', 'DestZip', 'OriginZip']
df.head()

Output:


df.info()

Output:

















#dropping features with more missing values or lesser importance
features_to_be_dropped = ["RAD", "ShipperCountry", "ConsigneeCountry", "ConsigneeCity", 'DetailCity', 'DetailCodeDescription', 'ShipperCity',
'ActualDeliveryTime', 'ActualShipTime', 'AVTime', "target",
 'DestCity', 'OriginCity',  "Lane"]

lst_of_dates = ["ActualShip", "AV",  'Goal', 'Goal2', 'EST_AV', "DetailDate", 'new_EST_AV', 'Final_EST_AV', "CreateDate", 

"ActualDelivery", "TargetShip_Early", "DetailCreateDate", "AV_CD"]

zip_features = ["ConsigneeZip", "ShipperZip", "DestZip", "OriginZip"]

categorical_variables = ["ShipmentType", "CarrierMode", "OnTimeShip", "OnTimeDelivery", "Status", "ShipperState", "DestState", "AS_dow", "new_EST_AV_dow", "Final_EST_AV_dow", "av_dow", 
"OriginCtry", "OriginState", "DestCtry", "DetailCode", "DetailState"]

df.columns

Output:

Index(['ActualShip', 'KEY_LOAD_TRACKING', 'CreateDate', 'ActualDelivery',
       'Carrier', 'ConsigneeCity', 'ConsigneeCountry', 'ConsigneeZip',
       'DetailCity', 'DetailState', 'DetailCode', 'DetailCodeDescription',
       'DetailCreateDate', 'DetailDate', 'ShipperCity', 'ShipperState',
       'ShipperZip', 'ShipperCountry', 'AV_CD', 'AV', 'PRO', 'DestCity',
       'DestState', 'DestCtry', 'DestZip', 'OriginCity', 'OriginState',
       'OriginCtry', 'OriginZip', 'Lane', 'ShipmentType', 'CarrierMode',</