import folium
import pandas
import numpy as np
import seaborn as sns
import sklearn
import matplotlib.pyplot as plt 
from sklearn.preprocessing import RobustScaler
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier


df = pandas.read_csv("2018_Central_Park_Squirrel_Census_-_Squirrel_Data.csv")


df.head(50)


dropped_columns = ['Unique Squirrel ID', 'Indifferent', 'Eating', 'Above Ground Sighter Measurement', 
                   'Hectare', 'Hectare Squirrel Number', 'Combination of Primary and Highlight Color', 
                   'Color notes', 'Specific Location', 'Other Activities', 'Other Interactions', 'Lat/Long']

df = df.drop(dropped_columns, axis = 1)

sample_df = df.sample(100)

sample_df


map_osm = folium.Map(location = [40.785091, -73.968285], zoom_start = 14)
map_osm


# Find all ages
all_ages = np.unique(sample_df["Age"].astype(str))

print(all_ages)

# Clarify key names
age_dict = {'?': 'Unknown', 'Adult': 'Adult', 'Juvenile': 'Baby', 'nan': 'Undefined'}

# Create map 
map_osm_1 = folium.Map(location = [40.782511195076395, -73.9639744005647], zoom_start = 13.5)

# Map icons and color are dependent on age
age_icon = {'Unknown': 'question', 'Adult': 'play', 'Baby': 'circle', 'Undefined': 'times'}
age_color = {'Unknown': 'red', 'Adult': 'blue', 'Baby': 'green', 'Undefined': 'purple'}

# Traverse througn data sample 
for idx, row in sample_df.iterrows():
    
    # Get the current age and location
    age = age_dict[str(sample_df.at[idx, 'Age'])]
    location = [row['Y'], row['X']]
    
    # Get the description with important information from the squirrel sighting
    description = 'Age: ' + str(age) + \
    ', Date: ' + str(sample_df.at[idx, 'Date']) + \
    ', Fur Color: ' + str(sample_df.at[idx, 'Primary Fur Color']) + \
    ', Location: ' + str(sample_df.at[idx, 'Location']) + \
    ', Coordinates: ' + str(location)
    
    # Plot the marker on the map 
    folium.Marker(
        location = location,
        popup = description,
        icon = folium.Icon(color = age_color[age], icon = age_icon[age], prefix = 'fa'),
    ).add_to(map_osm_1)

['Adult' 'Juvenile' 'nan']


map_osm_1


# Find all fur colors
all_fur_color = np.unique(sample_df["Primary Fur Color"].astype(str))

print(all_fur_color)

# Clarify key names
fur_color_dict = {'Black': 'Black', 'Cinnamon': 'Cinnamon', 'Gray': 'Gray', 'nan': 'Undefined'}

# Create map 
map_osm_2 = folium.Map(location = [40.782511195076395, -73.9639744005647], zoom_start = 13.5)

# Map marker colors are dependent on fur color
fur_map_color = {'Black': 'black', 'Cinnamon': 'beige', 'Gray': 'lightgray', 'Undefined': 'white'}

# Traverse througn data sample 
for idx, row in sample_df.iterrows():
    
    # Get the current fur color and location of the sighting
    fur_color = fur_color_dict[str(sample_df.at[idx, 'Primary Fur Color'])]
    location = [row['Y'], row['X']]
    
    # Get the description with important information from the squirrel sighting
    description = 'Age: ' + str(sample_df.at[idx, 'Age']) + \
    ', Date: ' + str(sample_df.at[idx, 'Date']) + \
    ', Fur Color: ' + str(fur_color) + \
    ', Location: ' + str(sample_df.at[idx, 'Location']) + \
    ', Coordinates: ' + str(location)
    
    # Plot the marker on the map 
    folium.Marker(
        location = location,
        popup = description,
        icon = folium.Icon(color = fur_map_color[fur_color], prefix = 'fa'),
    ).add_to(map_osm_2)

['Black' 'Cinnamon' 'Gray' 'nan']


map_osm_2


# Find all times
all_times = np.unique(sample_df["Shift"].astype(str))

print(all_times)

# Clarify key values
time_dict = {'AM': 'Morning', 'PM': 'Night'}

# Create map 
map_osm_3 = folium.Map(location = [40.782511195076395, -73.9639744005647], zoom_start = 13.5)

# Map icons and color are dependent on sighting time
time_color = {'Morning': 'orange', 'Night': 'cadetblue'}
time_icon = {'Morning': 'cloud', 'Night': 'star'}

# Traverse througn data sample 
for idx, row in sample_df.iterrows():
    
    # Get the current time and location 
    time = time_dict[str(sample_df.at[idx, 'Shift'])]
    location = [row['Y'], row['X']]
    
    # Get the description with important information from the squirrel sighting
    description = 'Age: ' + str(sample_df.at[idx, 'Age']) + \
    ', Date: ' + str(sample_df.at[idx, 'Date']) + \
    ', Fur Color: ' + str(sample_df.at[idx, 'Primary Fur Color']) + \
    ', Location: ' + str(sample_df.at[idx, 'Location']) + \
    ', Coordinates: ' + str(location) + \
    ', Sighting Time: ' + time 
    
    # Plot the marker on the map 
    folium.Marker(
        location = location,
        popup = description,
        icon = folium.Icon(color = time_color[time], icon = time_icon[time], prefix = 'fas fa'),
    ).add_to(map_osm_3)

['AM' 'PM']


map_osm_3


sns.set_theme(style = "darkgrid")
sns.catplot(x = "Age", kind = "count", palette = "ch:.25", data = df)
plt.xlabel('Age Groups')
plt.ylabel('Age Count')
plt.title('Total Number of Individual Squirrel Ages')
plt.show()


sns.set_theme(style = "darkgrid")
sns.catplot(x = "Primary Fur Color", kind = "count", palette = "ch:.25", data = df)
plt.xlabel('Primary Fur Colors')
plt.ylabel('Primary Fur Colors Count')
plt.title('Total Number of Individual Squirrel Primary Fur Colors')
plt.show()


sns.set_theme(style = "darkgrid")
sns.catplot(x = "Shift", kind = "count", palette = "ch:.25", data = df)
plt.xlabel('Sighting Times')
plt.ylabel('Sighting Times Count')
plt.title('Total Number of Individual Squirrel Sighting Times')
plt.show()


df.corr()


sns.heatmap(df.corr(), cmap = 'Reds')

<AxesSubplot:>


sns.set_theme(style = "darkgrid")
sns.countplot(x = "Age", hue = 'Runs from', data = df)
plt.xlabel('Age Group')
plt.ylabel('Age Count')
plt.title('Total Number of Individual Squirrel Ages with a Running Indicator')
plt.show()


# Calculate the percentage of Adult Squirrels that were seen running away from humans
total_adults = len(np.where(df['Age'] == 'Adult')[0])
total_running_adults = len(np.where((df['Age'] == 'Adult') & (df['Runs from'] == True))[0])
running_adults_percentage = (total_running_adults/total_adults)

# Calculate the percentage of Baby Squirrels that were seen running away from humans
total_babies = len(np.where(df['Age'] == 'Juvenile')[0])
total_running_babies = len(np.where((df['Age'] == 'Juvenile') & (df['Runs from'] == True))[0])
running_babies_percentage = (total_running_babies/total_babies)

print("Percentage of Adult Squirrels that were seen running away from humans: {:.0%} \nPercentage of Baby Squirrels that were seen running away from humans: {:.0%} "
      .format(running_adults_percentage, running_babies_percentage))

Percentage of Adult Squirrels that were seen running away from humans: 22% 
Percentage of Baby Squirrels that were seen running away from humans: 25%


sns.set_theme(style = "darkgrid")
sns.countplot(x = "Age", hue = 'Approaches', data = df)
plt.xlabel('Age Group')
plt.ylabel('Age Count')
plt.title('Total Number of Individual Squirrel Ages with a Approaches Indicator')
plt.show()


# Calculate the percentage of Adult Squirrels that were seen approaching a human
total_adults = len(np.where(df['Age'] == 'Adult')[0])
total_approach_adults = len(np.where((df['Age'] == 'Adult') & (df['Approaches'] == True))[0])
approach_adults_percentage = (total_approach_adults/total_adults)

# Calculate the percentage of Baby Squirrels that were seen approaching a human
total_babies = len(np.where(df['Age'] == 'Juvenile')[0])
total_approach_babies = len(np.where((df['Age'] == 'Juvenile') & (df['Approaches'] == True))[0])
approach_babies_percentage = (total_approach_babies/total_babies)

print("Percentage of Adult Squirrels that were seen approaching a human: {:.0%} \nPercentage of Baby Squirrels that were seen approaching a human: {:.0%} "
      .format(approach_adults_percentage, approach_babies_percentage))

Percentage of Adult Squirrels that were seen approaching a human: 6% 
Percentage of Baby Squirrels that were seen approaching a human: 7%


sns.set_theme(style = "darkgrid")
sns.countplot(x = "Age", hue = 'Foraging', data = df)
plt.xlabel('Age Group')
plt.ylabel('Age Count')
plt.title('Total Number of Individual Squirrel Ages with a Foraging Indicator')
plt.show()


# Calculate the percentage of Adult Squirrels that were seen foraging
total_adults = len(np.where(df['Age'] == 'Adult')[0])
total_approach_adults = len(np.where((df['Age'] == 'Adult') & (df['Foraging'] == True))[0])
approach_adults_percentage = (total_approach_adults/total_adults)

# Calculate the percentage of Baby Squirrels that were seen foraging
total_babies = len(np.where(df['Age'] == 'Juvenile')[0])
total_approach_babies = len(np.where((df['Age'] == 'Juvenile') & (df['Foraging'] == True))[0])
approach_babies_percentage = (total_approach_babies/total_babies)

print("Percentage of Adult Squirrels that were seen foraging: {:.0%} \nPercentage of Baby Squirrels that were seen foraging: {:.0%} "
      .format(approach_adults_percentage, approach_babies_percentage))

Percentage of Adult Squirrels that were seen foraging: 50% 
Percentage of Baby Squirrels that were seen foraging: 36%


# Transform non-numerical labels to numerical labels
new_df = pandas.DataFrame()
qualitative_data = ['Age', 'Primary Fur Color', 'Shift']
le = preprocessing.LabelEncoder()

for col in qualitative_data:
    new_df[col] = le.fit_transform(df[col])

# Standardize data
quantitative_data = ['X', 'Y', 'Date']
rc = RobustScaler()

new_df[quantitative_data] = rc.fit_transform(df[quantitative_data])


y = new_df['Primary Fur Color']
X = new_df.drop('Primary Fur Color', axis = 1)


# Train the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 17)


def get_model_accuracy(X, y, model):
    return model.score(X, y)

def get_model_rmse(y, y_model):
    return np.sqrt(mean_squared_error(y, y_model))


# Only keep the scores of the best performing model
best_neigh_scores_model_accuracy = 0 

# [train_score, test_score, train_rmse, test_rmse]
best_scores = []

# Create a range of neighbors
n_neighbors = np.arange(1, 10)
    
# Choosing Hyper-parameters    
for n_neighbors_ in n_neighbors:
    
    # Create a K-Nearest Neighbors Classifier model 
    neigh = KNeighborsClassifier(n_neighbors = n_neighbors_)

    # Fit the Decision Tree Classifier from the training dataset
    neigh.fit(X_train, y_train)

    # Predict the target for the sample data
    y_test_neigh = neigh.predict(X_test)
    y_train_neigh = neigh.predict(X_train)

    # Return the coefficient of determination of the prediction
    score_train_neigh = get_model_accuracy(X_train, y_train, neigh)
    score_test_neigh = get_model_accuracy(X_test, y_test, neigh)

    # Return the root mean squared error
    rmse_train_neigh = get_model_rmse(y_train, y_train_neigh)
    rmse_test_neigh = get_model_rmse(y_test, y_test_neigh)
    
    # Compare for best test scores
    if score_test_neigh > best_neigh_scores_model_accuracy:
        
        best_neigh_scores_model_accuracy = score_test_neigh
        best_scores = [score_train_neigh, score_test_neigh, rmse_train_neigh, rmse_test_neigh]

# Print Results
description = "K-Nearest Neighbors Classifier Results:\nTrain Data Accuracy: {:.2%}" \
            + "\nTest Data Accuracy: {:.2%}" \
            + "\nTrain Data RMSE: {:.2%}" \
            + "\nTest Data RMSE: {:.2%}" 

print(str(description).format(best_scores[0], best_scores[1], best_scores[2], best_scores[3]))

K-Nearest Neighbors Classifier Results:
Train Data Accuracy: 83.95%
Test Data Accuracy: 79.86%
Train Data RMSE: 50.97%
Test Data RMSE: 55.37%


# Only keep the scores of the best performing model
best_clf_scores_model_accuracy = 0 

#[train_score, test_score, train_rmse, test_rmse]
best_scores = []

# Create a range of max-depths
max_depth = np.arange(1, 10)
    
# Choosing Hyper-parameters
for max_depth_ in max_depth:
    
    # Create a Decision Tree Classifier model 
    clf = DecisionTreeClassifier(max_depth = max_depth_, random_state = 0)

    # Fit the Decision Tree Classifier from the training dataset
    clf.fit(X_train, y_train)

    # Predict the target for the sample data
    y_test_clf = clf.predict(X_test)
    y_train_clf = clf.predict(X_train)

    # Return the coefficient of determination of the prediction
    score_train_clf = get_model_accuracy(X_train, y_train, clf)
    score_test_clf = get_model_accuracy(X_test, y_test, clf)

    # Return the root mean squared error
    rmse_train_clf = get_model_rmse(y_train, y_train_clf)
    rmse_test_clf = get_model_rmse(y_test, y_test_clf)
    
    # Compare for best test scores
    if score_test_clf > best_clf_scores_model_accuracy:
        
        best_clf_scores_model_accuracy = score_test_clf
        best_scores = [score_train_clf, score_test_clf, rmse_train_clf, rmse_test_clf]

# Print Results
description = "Decision Tree Classifier Results:\nTrain Data Accuracy: {:.2%}" \
            + "\nTest Data Accuracy: {:.2%}" \
            + "\nTrain Data RMSE: {:.2%}" \
            + "\nTest Data RMSE: {:.2%}" 

print(str(description).format(best_scores[0], best_scores[1], best_scores[2], best_scores[3]))

Decision Tree Classifier Results:
Train Data Accuracy: 82.47%
Test Data Accuracy: 80.46%
Train Data RMSE: 52.96%
Test Data RMSE: 54.00%

	X	Y	Unique Squirrel ID	Hectare	Shift	Date	Hectare Squirrel Number	Age	Primary Fur Color	Highlight Fur Color	...	Kuks	Quaas	Moans	Tail flags	Tail twitches	Approaches	Indifferent	Runs from	Other Interactions	Lat/Long
0	-73.956134	40.794082	37F-PM-1014-03	37F	PM	10142018	3	NaN	NaN	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9561344937861 40.7940823884086)
1	-73.968857	40.783783	21B-AM-1019-04	21B	AM	10192018	4	NaN	NaN	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9688574691102 40.7837825208444)
2	-73.974281	40.775534	11B-PM-1014-08	11B	PM	10142018	8	NaN	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.97428114848522 40.775533619083)
3	-73.959641	40.790313	32E-PM-1017-14	32E	PM	10172018	14	Adult	Gray	NaN	...	False	False	False	False	False	False	False	True	NaN	POINT (-73.9596413903948 40.7903128889029)
4	-73.970268	40.776213	13E-AM-1017-05	13E	AM	10172018	5	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9702676472613 40.7762126854894)
5	-73.968361	40.772591	11H-AM-1010-03	11H	AM	10102018	3	Adult	Cinnamon	White	...	False	False	False	False	True	False	True	False	NaN	POINT (-73.9683613516225 40.7725908847499)
6	-73.954120	40.793181	36H-AM-1010-02	36H	AM	10102018	2	Adult	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9541201789795 40.7931811701082)
7	-73.958269	40.791737	33F-AM-1008-02	33F	AM	10082018	2	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9582694312289 40.7917367820255)
8	-73.967429	40.782972	21C-PM-1006-01	21C	PM	10062018	1	Adult	Gray	NaN	...	False	False	False	True	True	False	False	False	NaN	POINT (-73.9674285955293 40.7829723919744)
9	-73.972250	40.774288	11D-AM-1010-03	11D	AM	10102018	3	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9722500196844 40.7742879599026)
10	-73.969506	40.782351	20B-PM-1013-05	20B	PM	10132018	5	Adult	Gray	White	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9695063535333 40.7823507678183)
11	-73.964003	40.782031	22F-PM-1014-06	22F	PM	10142018	6	Adult	Gray	NaN	...	False	False	False	False	True	False	True	False	NaN	POINT (-73.9640032826529 40.7820309825448)
12	-73.953217	40.791967	36I-PM-1007-01	36I	PM	10072018	1	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9532170504865 40.7919669739962)
13	-73.976860	40.770280	5C-PM-1010-09	05C	PM	10102018	9	Adult	Cinnamon	Gray	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9768603630674 40.7702795904962)
14	-73.970611	40.769812	7H-AM-1006-05	07H	AM	10062018	5	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9706105896967 40.7698124821507)
15	-73.970378	40.778753	16C-PM-1018-03	16C	PM	10182018	3	Adult	Gray	Cinnamon, White	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9703781726172 40.7787526130321)
16	-73.970393	40.776503	14E-AM-1008-23	14E	AM	10082018	23	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9703925210471 40.7765032004992)
17	-73.963818	40.792417	32A-PM-1013-03	32A	PM	10132018	3	Juvenile	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9638179439747 40.7924173263904)
18	-73.958407	40.791381	33F-AM-1008-01	33F	AM	10082018	1	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9584070974734 40.7913812490557)
19	-73.967113	40.778486	17F-AM-1007-07	17F	AM	10072018	7	Adult	Gray	White	...	False	False	False	True	False	False	True	False	NaN	POINT (-73.9671130680114 40.7784859700171)
20	-73.964987	40.776493	16I-AM-1008-01	16I	AM	10082018	1	Adult	Cinnamon	Gray, White	...	False	False	False	False	True	False	True	False	NaN	POINT (-73.9649866016038 40.7764929694457)
21	-73.967063	40.773499	12I-AM-1013-01	12I	AM	10132018	1	Adult	Cinnamon	White	...	False	False	False	False	True	False	False	True	NaN	POINT (-73.9670628558161 40.77349914209411)
22	-73.958737	40.790852	32F-PM-1008-07	32F	PM	10082018	7	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.958736593111 40.7908524370626)
23	-73.967179	40.786735	25A-AM-1013-04	25A	AM	10132018	4	NaN	Gray	White	...	False	False	False	False	False	False	False	True	NaN	POINT (-73.9671786715256 40.7867352791232)
24	-73.969294	40.776954	15E-AM-1013-01	15E	AM	10132018	1	Adult	Gray	White	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9692943291611 40.7769536102488)
25	-73.954005	40.795245	39G-PM-1013-06	39G	PM	10132018	6	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9540054100539 40.7952445999836)
26	-73.957653	40.786866	29I-PM-1007-01	29I	PM	10072018	1	NaN	Gray	Cinnamon	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9576530157255 40.7868659490397)
27	-73.973803	40.771185	7E-AM-1006-02	07E	AM	10062018	2	Adult	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9738033257473 40.7711846894166)
28	-73.970440	40.779897	17C-PM-1013-05	17C	PM	10132018	5	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9704402681017 40.77989656212031)
29	-73.975698	40.775390	10A-PM-1014-01	10A	PM	10142018	1	Adult	Gray	Black, Cinnamon, White	...	False	False	False	False	True	False	True	False	NaN	POINT (-73.9756976076707 40.7753897331918)
30	-73.966244	40.789038	28A-PM-1007-05	28A	PM	10072018	5	Adult	Cinnamon	NaN	...	False	False	False	False	True	False	False	True	NaN	POINT (-73.9662438996681 40.7890381809441)
31	-73.964315	40.782082	22F-PM-1014-05	22F	PM	10142018	5	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9643153920428 40.7820818516957)
32	-73.974207	40.775796	12B-AM-1007-01	12B	AM	10072018	1	Adult	Gray	Cinnamon	...	False	False	False	True	False	False	True	False	NaN	POINT (-73.9742073452913 40.7757957380463)
33	-73.971615	40.781391	18A-PM-1018-01	18A	PM	10182018	1	Adult	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9716147061553 40.7813911036179)
34	-73.956570	40.790256	33H-AM-1019-02	33H	AM	10192018	2	Juvenile	Gray	Cinnamon	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9565700386162 40.7902561000937)
35	-73.953541	40.792871	36H-PM-1008-02	36H	PM	10082018	2	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9535410657077 40.79287117600611)
36	-73.963916	40.789177	29C-AM-1007-06	29C	AM	10072018	6	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	False	True	NaN	POINT (-73.9639159298446 40.7891774637988)
37	-73.957956	40.795934	38C-PM-1014-09	38C	PM	10142018	9	Adult	Black	NaN	...	False	False	False	False	False	True	False	False	NaN	POINT (-73.9579564338627 40.7959337795027)
38	-73.957465	40.789251	31H-PM-1008-02	31H	PM	10082018	2	Juvenile	Gray	Cinnamon	...	False	False	False	False	True	False	True	False	NaN	POINT (-73.9574648097543 40.78925084286221)
39	-73.970253	40.770600	8H-AM-1017-06	08H	AM	10172018	6	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.970252675331 40.77059995882221)
40	-73.957965	40.791861	34F-AM-1007-01	34F	AM	10072018	1	Adult	Gray	White	...	False	False	False	False	False	False	False	True	NaN	POINT (-73.9579645940414 40.7918613498407)
41	-73.967325	40.774951	13H-PM-1010-02	13H	PM	10102018	2	Adult	Gray	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9673253417305 40.7749505252981)
42	-73.959593	40.790585	32E-AM-1007-13	32E	AM	10072018	13	Adult	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9595933195409 40.7905847332369)
43	-73.960222	40.793780	35C-AM-1007-05	35C	AM	10072018	5	Adult	Gray	NaN	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9602222116708 40.793779699421)
44	-73.960261	40.794288	35C-PM-1013-03	35C	PM	10132018	3	NaN	Gray	Cinnamon	...	False	False	False	False	False	False	False	True	NaN	POINT (-73.9602609920814 40.79428830455661)
45	-73.968064	40.779250	17E-AM-1017-05	17E	AM	10172018	5	Adult	Black	NaN	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9680635070949 40.779249873647)
46	-73.980167	40.768691	2B-PM-1013-01	02B	PM	10132018	1	Adult	Gray	White	...	False	False	False	False	False	False	False	False	NaN	POINT (-73.9801666435401 40.7686905339212)
47	-73.967563	40.781348	19D-AM-1007-01	19D	AM	10072018	1	Juvenile	Gray	NaN	...	False	False	False	False	False	False	False	False	fenced off area can't approach	POINT (-73.9675634326877 40.7813477352507)
48	-73.958497	40.798289	40B-AM-1019-03	40B	AM	10192018	3	Juvenile	Gray	White	...	False	False	False	False	True	True	False	False	NaN	POINT (-73.9584970643213 40.7982886348696)
49	-73.976030	40.768195	3E-PM-1008-07	03E	PM	10082018	7	Adult	Gray	Cinnamon	...	False	False	False	False	False	False	True	False	NaN	POINT (-73.9760298241178 40.7681954366911)

	X	Y	Shift	Date	Age	Primary Fur Color	Highlight Fur Color	Location	Running	Chasing	Climbing	Foraging	Kuks	Quaas	Moans	Tail flags	Tail twitches	Approaches	Runs from
2731	-73.969419	40.778719	AM	10072018	Adult	Cinnamon	White	Ground Plane	True	False	False	True	False	False	False	False	False	False	False
359	-73.961482	40.791293	PM	10132018	Adult	Gray	NaN	Ground Plane	False	True	False	False	False	False	False	False	True	False	False
1143	-73.961145	40.791100	AM	10132018	Adult	Cinnamon	Gray	Above Ground	False	False	True	False	False	False	False	False	False	False	False
1545	-73.966921	40.779277	PM	10082018	Adult	Gray	White	Ground Plane	False	False	False	False	False	False	False	False	True	False	False
2611	-73.971167	40.774798	PM	10182018	Adult	Cinnamon	NaN	Ground Plane	False	False	False	True	False	False	False	False	True	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2588	-73.969417	40.769779	PM	10132018	Adult	Gray	Cinnamon, White	Ground Plane	False	False	False	True	False	False	False	False	False	False	False
1114	-73.972985	40.774071	AM	10082018	Adult	Gray	White	Above Ground	False	False	True	False	False	False	False	False	False	False	False
280	-73.970158	40.773097	PM	10172018	Adult	Gray	Cinnamon	Ground Plane	False	False	False	True	False	False	False	False	False	False	False
2061	-73.954476	40.796965	PM	10062018	Adult	Gray	Cinnamon	Ground Plane	False	False	False	False	False	False	False	False	False	False	False
218	-73.973709	40.771373	AM	10062018	Adult	Gray	NaN	Ground Plane	False	False	False	False	False	False	False	False	False	False	False

	X	Y	Date	Running	Chasing	Climbing	Foraging	Kuks	Quaas	Moans	Tail flags	Tail twitches	Approaches	Runs from
X	1.000000	0.906838	0.104391	0.034092	-0.037457	0.011311	-0.006438	0.057685	0.057433	0.027252	0.003152	-0.008117	-0.038302	0.135509
Y	0.906838	1.000000	0.161824	0.041225	-0.015951	0.005565	-0.005280	0.063737	0.059990	0.022720	0.002229	-0.006876	-0.038296	0.157491
Date	0.104391	0.161824	1.000000	0.037451	0.024388	-0.070891	0.013283	0.024082	0.018114	0.029096	-0.034538	-0.015369	0.025829	0.051473
Running	0.034092	0.041225	0.037451	1.000000	0.025707	-0.055995	-0.220607	-0.006982	-0.012570	-0.017783	0.019521	0.029089	-0.016364	0.167285
Chasing	-0.037457	-0.015951	0.024388	0.025707	1.000000	-0.038022	-0.179526	0.060677	0.093064	0.026248	0.045055	0.022637	-0.040919	-0.061855
Climbing	0.011311	0.005565	-0.070891	-0.055995	-0.038022	1.000000	-0.297542	0.083457	0.019590	0.034294	-0.009952	-0.021641	-0.039993	0.048860
Foraging	-0.006438	-0.005280	0.013283	-0.220607	-0.179526	-0.297542	1.000000	-0.078581	-0.034980	-0.008922	-0.022759	0.058532	0.054887	-0.056925
Kuks	0.057685	0.063737	0.024082	-0.006982	0.060677	0.083457	-0.078581	1.000000	0.191233	-0.005890	0.064542	-0.003363	-0.015611	0.035680
Quaas	0.057433	0.059990	0.018114	-0.012570	0.093064	0.019590	-0.034980	0.191233	1.000000	0.078287	0.063930	-0.001319	-0.032438	-0.013767
Moans	0.027252	0.022720	0.029096	-0.017783	0.026248	0.034294	-0.008922	-0.005890	0.078287	1.000000	-0.007327	-0.012904	-0.007884	0.033428
Tail flags	0.003152	0.002229	-0.034538	0.019521	0.045055	-0.009952	-0.022759	0.064542	0.063930	-0.007327	1.000000	-0.009636	0.005564	0.054788
Tail twitches	-0.008117	-0.006876	-0.015369	0.029089	0.022637	-0.021641	0.058532	-0.003363	-0.001319	-0.012904	-0.009636	1.000000	0.037852	0.051256
Approaches	-0.038302	-0.038296	0.025829	-0.016364	-0.040919	-0.039993	0.054887	-0.015611	-0.032438	-0.007884	0.005564	0.037852	1.000000	-0.067117
Runs from	0.135509	0.157491	0.051473	0.167285	-0.061855	0.048860	-0.056925	0.035680	-0.013767	0.033428	0.054788	0.051256	-0.067117	1.000000

An Analysis of Central Park Squirrels in 2018¶

Tiffanie Choi¶

Outline¶

1. Introduction¶

2. Data Collection¶

3. Data Visualization¶

4. Data Analysis¶

5. Machine Learning¶

6. Conclusion¶

Introduction¶

1A. Project Overview¶

1B. Libraries¶

Data Collection¶

2A. Dataset Background Information¶

2B. Data Preperation: Load the Data¶

2B. Data Preperation: View the Data¶

2B. Data Preperation: Clean the Data¶

3. Data Visualization¶

3A. Visualize the area on a Map¶

3B. Visualize Squirrel Ages on a Map¶

3C. Visualize Squirrel Fur Colors on a Map¶

3D. Visualize Squirrel Sighting Times on a Map¶

4. Data Analysis¶

4A. Are there more adults or baby squirrels in Central Park, NY?¶

4B. What is the geographic spread of squirrel color fur in Central Park, NY?¶

4C. What is the most common time for squirrel sightings in Central Park, NY?¶

4D. Is there a correlation between a squirrel's age and their actions (and/or interactions with humans)?¶

Hypothesis 1: Older squirrels will be found running more than younger squirrels because they most likely see humans as a threat.¶

Hypothesis 2: Younger squirrels will approach humans more than older squirrels because they are seeking food from a source.¶

Hypothesis 3: Older squirrels will be found foraging more than younger squirrels because they are more indepedent on searching for food.¶

Machine Learning¶

5A. Training the Data¶

5B. Building the Model: K-Nearest Neighbors Classifier¶

5B. Building the Model: Decision Tree Classifier¶

6. Conclusion¶

6A. Key Takeaways¶

6B. Future Questions¶