In [24]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
import time
from matplotlib.animation import FuncAnimation

from matplotlib.animation import FFMpegWriter
metadata = dict(title='finalproject' , artist = 'Emma Drake' )
writer = FFMpegWriter(fps=15, metadata=metadata,bitrate=200000)

%matplotlib osx
%config InlineBackend.figure_format = 'retina'

In [2]:
#blank map of the bay

fig = plt.figure(figsize=(50,30))
extent = [-122.56, -121.64, 37.38, 38.32]
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(extent)
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, edgecolor='black')
ax.add_feature(cfeature.LAKES, edgecolor='black')
ax.add_feature(cfeature.RIVERS)
ax.gridlines()
plt.show()

In [3]:
# download data and sort

alldata = '3SanFranciscoBayWaterQualityData1969-2015v3.xlsx'
cols = [0, 1, 4, 11,17,18]
df = pd.read_excel(alldata, usecols=cols)
df.head()

Unnamed: 0,Date,Station_Number,Calculated_Chlorophyll,Temperature,latitude,longitude
0,1969-04-10,4.0,,13.1,,
1,1969-04-10,4.0,,13.1,,
2,1969-04-10,4.0,,13.0,,
3,1969-04-10,4.0,,13.0,,
4,1969-04-10,5.0,,14.1,,


In [4]:
df['year'] = [int(x[0]) for x in df['Date'].astype(str).str.split("-").tolist()]
df.head()

Unnamed: 0,Date,Station_Number,Calculated_Chlorophyll,Temperature,latitude,longitude,year
0,1969-04-10,4.0,,13.1,,,1969
1,1969-04-10,4.0,,13.1,,,1969
2,1969-04-10,4.0,,13.0,,,1969
3,1969-04-10,4.0,,13.0,,,1969
4,1969-04-10,5.0,,14.1,,,1969


In [5]:
#assigning coordinates to the station numbers

df_coords = df.copy()
lats = {
    1.0: 38.04,
    2.0: 38.063333,
    3.0: 38.051667,
    4.0: 38.048333,
    5.0: 38.06,
    6.0: 38.065,
    7.0:38.048333,
    8.0:38.03,
    9.0:38.056667,
    10.0:38.06,
    11.0:38.06,
    12.0:38.051667,
    13.0:38.028333,
    14.0:38.006667,
    15.0:37.973333,
    16.0:37.916667,
    17.0:37.878333,
    18.0:37.846667,
    20.0:37.82,
    21.0:37.788333,
    22.0:37.765,
    23.0:37.728333,
    24.0:37.698333,
    25.0:37.67,
    26.0:37.636667,
    27.0:37.618333,
    28.0:37.601667,
    29.0:37.58,
    29.5:37.568333,
    30.0:37.555,
    31.0:37.528333,
    32.0:37.518333,
    33.0:37.508333,
    34.0:37.495,
    35.0:37.48,
    36.0:37.471667,
    12.5:38.04,
    19.0:37.818333,
    24.5:37.685075,
    25.5:37.6544,
    26.5:37.626324,
    27.5:37.60956,
    28.5:37.596667,
    30.5:37.542166,
    31.5:37.522796,
    33.5:37.500692,
    405.0:38.048333,
    407.0:38.071667,
    411.0:38.096667,
    650.0:38.071667,
    651.0:38.078333,
    652.0:38.086667,
    653.0:38.105,
    654.0:38.105,
    655.0:38.121667,
    659.0:38.178333,
    662.0:38.226667,
    657.0:38.151667,
    649.0:38.06
}
df_coords['lat'] = df_coords['Station_Number'].replace(lats)
longs = {
    657.0:-121.688333,
    649.0:-121.8,
    2.0:-121.851667,
    3.0:-121.88,
    4.0:-121.935,
    5.0:-121.98,
    6.0:-122.035,
    7.0:-122.096667,
    8.0:-122.151667,
    9.0:-122.185,
    10.0:-122.208333,
    11.0:-122.266667,
    12.0:-122.311667,
    13.0:-122.37,
    14.0:-122.405,
    15.0:-122.436667,
    16.0:-122.446667,
    17.0:-122.421667,
    18.0:-122.421667,
    20.0:-122.393333,
    21.0:-122.358333,
    22.0:-122.358333,
    23.0:-122.336667,
    24.0:-122.338333,
    25.0:-122.325,
    26.0:-122.313333,
    27.0:-122.291667,
    28.0:-122.27,
    29.0:-122.245,
    29.5:-122.21833,
    30.0:-122.19,
    31.0:-122.158333,
    32.0:-122.133333,
    33.0:-122.121667,
    34.0:-122.098333,
    35.0:-122.078333,
    36.0:-122.066667,
    1.0:-121.84,
    12.5:-122.315,
    19.0:-122.471667,
    24.5:-122.333328,
    25.5:-122.319169,
    26.5:-122.302245,
    27.5:-122.281678,
    28.5:-122.235,
    30.5:-122.177256,
    31.5:-122.145833,
    33.5:-122.112337,
    405.0:-122.123333,
    407.0:-122.093333,
    411.0:-122.058333,
    650.0:-121.775,
    651.0:-121.763333,
    652.0:-121.746667,
    653.0:-121.72,
    654.0:-121.708333,
    655.0:-121.701667,
    659.0:-121.666667,
    662.0:-121.673333
}
df_coords['long'] = df_coords['Station_Number'].replace(longs)
df_coords.head()

Unnamed: 0,Date,Station_Number,Calculated_Chlorophyll,Temperature,latitude,longitude,year,lat,long
0,1969-04-10,4.0,,13.1,,,1969,38.048333,-121.935
1,1969-04-10,4.0,,13.1,,,1969,38.048333,-121.935
2,1969-04-10,4.0,,13.0,,,1969,38.048333,-121.935
3,1969-04-10,4.0,,13.0,,,1969,38.048333,-121.935
4,1969-04-10,5.0,,14.1,,,1969,38.06,-121.98


In [6]:
df_noNaN = df_coords.copy()
df_noNaN = df_noNaN[df_noNaN['Calculated_Chlorophyll'].notna()]
df_noNaN = df_noNaN[df_noNaN['Temperature'].notna()]
df_noNaN = df_noNaN[df_noNaN['lat'].notna()]
df_noNaN = df_noNaN[df_noNaN['long'].notna()]
df_noNaN = df_noNaN.groupby(["Station_Number","year"]).agg(np.mean)
df_noNaN.drop(columns=["latitude","longitude"],inplace=True)
df_noNaN.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Calculated_Chlorophyll,Temperature,lat,long
Station_Number,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.0,1978,4.9,21.0,38.04,-121.84
2.0,1977,7.222222,17.477778,38.063333,-121.851667
2.0,1978,6.5,17.244444,38.063333,-121.851667
2.0,1979,13.425,20.35,38.063333,-121.851667
2.0,1980,5.636364,18.072727,38.063333,-121.851667


In [7]:
avg_temp_df = df_noNaN.groupby("year").agg(np.mean)
map_df = df_noNaN.merge(avg_temp_df,left_index=True,right_index=True).drop(
    columns=["Calculated_Chlorophyll_y","Temperature_x","lat_y","long_y"])
map_df = map_df.reset_index()
#sorts avg chlorophyll per year at each station, and avg temp per year

In [8]:
fig = plt.figure()

extent = [-122.56, -121.64, 37.38, 38.32]
#ax = plt.axes(projection=ccrs.PlateCarree())
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.set_extent(extent, crs=ccrs.PlateCarree())
ax.gridlines()
#import cartopy.io.img_tiles as cimgt
#request = cimgt.GoogleTiles()
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, edgecolor='black')
ax.add_feature(cfeature.LAKES, edgecolor='black')
ax.add_feature(cfeature.RIVERS)

testing_df = map_df[map_df['year']==1977]
test_longs = np.array(testing_df.long_x.tolist())
test_lats = np.array(testing_df.lat_x.tolist())
ax.scatter(test_longs,test_lats,c='b',s=10,transform=ccrs.PlateCarree());

years_list = list(np.arange(1978,2016))
def animate(i):
    curr_year = years_list[0]
    years_list.remove(years_list[0])
    new_df = map_df[map_df['year']==curr_year]
    
    new_longs = np.array(new_df.long_x.tolist())
    new_lats = np.array(new_df.lat_x.tolist())
    ax.scatter(new_longs,new_lats,c='b',s=10,transform=ccrs.PlateCarree());
    #print(curr_year)

ani = FuncAnimation(fig, animate, 30, interval=200,repeat=False)
plt.show()

In [9]:
#put the chlorophyll values into bins

map_df.sort_values("Calculated_Chlorophyll_x")
bins = [-np.inf,0,10,20,30,40,np.inf]
j_df = map_df.copy()
j_df['Bins'] = pd.cut(map_df['Calculated_Chlorophyll_x'],bins)
j_df['Bins'].astype("str").unique()


array(['(0.0, 10.0]', '(10.0, 20.0]', '(20.0, 30.0]', '(30.0, 40.0]'],
      dtype=object)

In [10]:
#assign colors to chlorophyll bins

color_bins = {
    '(0.0, 10.0]':"red",
    '(10.0, 20.0]':"blue",
    '(20.0, 30.0]':"green",
    '(30.0, 40.0]':"yellow"
}
j_df['colors'] = j_df['Bins'].astype("str").replace(color_bins)
j_df

Unnamed: 0,Station_Number,year,Calculated_Chlorophyll_x,lat_x,long_x,Temperature_y,Bins,colors
0,1.0,1978,4.900000,38.040000,-121.840000,16.338796,"(0.0, 10.0]",red
1,2.0,1977,7.222222,38.063333,-121.851667,16.155919,"(0.0, 10.0]",red
2,2.0,1978,6.500000,38.063333,-121.851667,16.338796,"(0.0, 10.0]",red
3,2.0,1979,13.425000,38.063333,-121.851667,16.282795,"(10.0, 20.0]",blue
4,2.0,1980,5.636364,38.063333,-121.851667,15.711149,"(0.0, 10.0]",red
...,...,...,...,...,...,...,...,...
1303,657.0,2011,3.110891,38.151667,-121.688333,15.629800,"(0.0, 10.0]",red
1304,657.0,2012,4.066071,38.151667,-121.688333,16.102738,"(0.0, 10.0]",red
1305,657.0,2013,2.533000,38.151667,-121.688333,15.570687,"(0.0, 10.0]",red
1306,657.0,2014,3.124719,38.151667,-121.688333,16.885206,"(0.0, 10.0]",red


In [27]:
#plot and animate

fig = plt.figure(figsize=(12,7))

extent = [-122.56, -121.64, 37.38, 38.32]
#ax = plt.axes(projection=ccrs.PlateCarree())
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
ax.set_extent(extent, crs=ccrs.PlateCarree())
ax.gridlines()
import cartopy.io.img_tiles as cimgt
request = cimgt.GoogleTiles()
ax.add_feature(cfeature.OCEAN)
ax.add_feature(cfeature.LAND, edgecolor='black')
ax.add_feature(cfeature.LAKES, edgecolor='black')
ax.add_feature(cfeature.RIVERS)
ax.add_image(request, 10)
testing_df = map_df[map_df['year']==1977]
test_longs = np.array(testing_df.long_x.tolist())
test_lats = np.array(testing_df.lat_x.tolist())
ax.scatter(test_longs,test_lats,c=testing_df["Calculated_Chlorophyll_x"],s=50,transform=ccrs.PlateCarree());
ax.text(-122,37.5,'Temperature: ' + str(testing_df['Temperature_y'].tolist()[0])[:5],
        fontsize=20,bbox=dict(facecolor='green', alpha=1));

years_list = list(np.arange(1978,2016))
def animate(i):
    curr_year = years_list[0]
    years_list.remove(years_list[0])
    new_df = map_df[map_df['year']==curr_year]

    new_longs = np.array(new_df.long_x.tolist())
    new_lats = np.array(new_df.lat_x.tolist())
    ax.scatter(new_longs,new_lats,c=new_df["Calculated_Chlorophyll_x"],s=50,transform=ccrs.PlateCarree());
    ax.text(-122,37.5,'Temperature: ' + str(new_df['Temperature_y'].tolist()[0])[:5],
        fontsize=20,bbox=dict(facecolor='green', alpha=1));

ani = FuncAnimation(fig, animate, 30, interval=200,repeat=False)
plt.show()