In [11]:
import pandas as pd
import plotly.express as px

# Step 1: Load the Dataset
file_path = 'eruptions.csv'  # Replace with the path to your CSV file
eruptions_data = pd.read_csv(file_path)

# Step 2: Data Cleaning and Transformation
# Drop rows with missing values in critical columns
cleaned_data = eruptions_data.dropna(subset=['latitude', 'longitude', 'start_year', 'start_month', 'start_day'])

# Ensure columns are in the correct data type
cleaned_data['start_year'] = pd.to_numeric(cleaned_data['start_year'], errors='coerce').astype('Int64')
cleaned_data['start_month'] = pd.to_numeric(cleaned_data['start_month'], errors='coerce').astype('Int64')
cleaned_data['start_day'] = pd.to_numeric(cleaned_data['start_day'], errors='coerce').astype('Int64')

# Drop rows with invalid dates
cleaned_data.dropna(subset=['start_year', 'start_month', 'start_day'], inplace=True)

# Create a date string and convert it to a datetime object
cleaned_data['date_string'] = cleaned_data['start_year'].astype(str) + '-' + \
                              cleaned_data['start_month'].astype(str) + '-' + \
                              cleaned_data['start_day'].astype(str)
cleaned_data['start_date'] = pd.to_datetime(cleaned_data['date_string'], errors='coerce')

# Drop rows with invalid start dates
cleaned_data.dropna(subset=['start_date'], inplace=True)

# Fill missing VEI values with a default value
cleaned_data['vei'] = cleaned_data['vei'].fillna(0)

# Step 3: Create the Animation
fig = px.scatter_geo(cleaned_data,
                     lat='latitude',
                     lon='longitude',
                     hover_name='volcano_name',  # Add volcano name to hover information
                     color='vei',
                     size='vei',
                     animation_frame=cleaned_data['start_date'].dt.year.astype(str),
                     projection='natural earth',
                     title='Global Volcanic Eruptions Over Time')

# Show the figure
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a

In [12]:
# Group by year and count the number of eruptions each year
yearly_counts = cleaned_data['start_date'].dt.year.value_counts().sort_index().reset_index()
yearly_counts.columns = ['Year', 'Number of Eruptions']

# Create the line chart animation
line_fig = px.line(yearly_counts,
                   x='Year',
                   y='Number of Eruptions',
                   title='Number of Volcanic Eruptions Per Year Over Time',
                   labels={'Year': 'Year', 'Number of Eruptions': 'Number of Eruptions'})

# Show the line chart figure
line_fig.show()


In [13]:

# Ensure VEI is a numeric value for the histogram
cleaned_data['vei'] = pd.to_numeric(cleaned_data['vei'], errors='coerce')
cleaned_data.dropna(subset=['vei'], inplace=True)

# Create the histogram animation
hist_fig = px.histogram(cleaned_data,
                        x='vei',
                        animation_frame=cleaned_data['start_date'].dt.year.astype(str),
                        range_x=[0, cleaned_data['vei'].max() + 1],
                        title='Historical Distribution of Volcanic Explosivity Index (VEI) Over Time',
                        labels={'vei': 'Volcanic Explosivity Index (VEI)'})

# Show the histogram figure
hist_fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

