import pandas as pd
import numpy as np
from dateutil import parser
import plotly.express as px
import matplotlib.pyplot as plt


avalanches_data = pd.read_csv("avalanche-record-CAIC.csv")
avalanches_data.head(3)


avalanches_data = avalanches_data.drop(columns  = ["Date Known","Observation ID","Comments","Description","Secondary Trigger",
                                                   "Location","Landmark", "Area", "First Name","Last Name", "Status","Locked",
                                                   "Weak Layer", "Avg Width", "Width Units","#", "Avg Vertical","Vertical Units",
                                                   "Avg Crown", "Crown Units", "Terminus", 'Sliding Sfc','Incident','sizeR'])
avalanches_data.head()


# Missing values are represented by the string ' -' in the data. Replace them with NaN
avalanches_data.replace(' -',np.nan,inplace=True)
print(avalanches_data.isna().sum())

Date            0
Longitude      29
latitude       29
Elev          659
Asp           656
Type         1953
Trig          819
sizeD         912
dtype: int64


avalanches_data["Longitude"] = avalanches_data["Longitude"].astype(float)
avalanches_data["latitude"] = avalanches_data["latitude"].astype(float)


avalanches_data = avalanches_data[(avalanches_data['Longitude']< -101) & (avalanches_data['Longitude']> -110) & 
                               (avalanches_data['latitude']>36) & (avalanches_data['latitude']<42)]


def codeTrig(x):
    if x == 'N':
        return "Natural"
    if x == 'A':
        return "Artificial"
        
# Extract first letter from Trig column and apply the codeTrig function
# All values in the data are padded with a leading space, to extract the first letter, we want index 1, not 0
avalanches_data['Trig'] = avalanches_data['Trig'].str[1:2].apply(codeTrig)

avalanches_data.head(3)


# Count frequencies for each elevation type (Relative to Tree Line)
elevations = pd.DataFrame(avalanches_data.groupby("Elev")['Asp'].count()).sort_values(by='Asp')

# Rename frequency column to be descriptive of content
elevations = elevations.rename(columns={"Asp": "Frequency"})

# We don't want frequencies for avalanches with All, Unknown, or Missing Values for elevations. Filter them out.                  
elevations = elevations.loc[(" <TL"," TL", " >TL"),:]

# Provide descriptive names for indexes
elevations.index = ["Below Tree Line", "Tree Line", "Above Tree Line"]


# Create barplot of elevation frequencies
plt.bar(elevations.index, elevations['Frequency'])
plt.xlabel('Elevation Relative to Tree Line')
plt.ylabel('Number of Avalanche Reports')
plt.title('Number of Avalanche Reports in the Last 14 Years For Various Elevations');


# Count number of avalanche reports for each slope aspect
aspects = pd.DataFrame(avalanches_data.groupby("Asp")['Elev'].count())

# Rename frequency column to be more descriptive
aspects = aspects.rename(columns={"Elev": "Frequency"}) 

# We don't care about frequencies for the avalanches wit unknown or missing slope aspects. Filter them out.
aspects = aspects.loc[(" SW"," S"," W"," NW"," SE"," N"," NE"," E"),:]   

# Arrange the frequencies in their natural order according to compass direction
order = [" N", " NE", " E", " SE", " S", " SW", " W", " NW"] 
aspects = aspects.reindex(order)
aspects


angles = np.radians(np.arange(0, 360, 45))  # 8 directions evenly spaced (for the 8 aspects)
frequencies = aspects['Frequency'].tolist()

# Create a polar plot
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
ax.set_theta_direction(-1)  # Counter-clockwise direction
ax.set_theta_offset(np.pi/2)  # Start angle at 90 degrees (North)

# For coloring the bars based on frequencies
cmap = plt.colormaps['viridis'] 
norm = plt.Normalize(min(frequencies), max(frequencies))

# Plot the frequencies on the polar plot
bars = ax.bar(angles, frequencies, width=0.35, bottom=0, color=cmap(norm(frequencies)))


# Set the labels for each direction
ax.set_xticks(angles)
ax.set_xticklabels(aspects.index)

ax.set_rlabel_position(0)


ax.set_title("Number of Avalanche Reports in the Last 14 Years For Various Slope Aspects")

# Color bar legend
colorMap = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
colBar = plt.colorbar(colorMap, ax=ax, pad=0.2)
colBar.set_label('Frequency')

plt.show()


def calcPrecentageWest(type):
    return len(type)/len(western_slope) * 100

def calcPercentageEast(type):
    return len(type)/len(eastern_slope) * 100

western_slope = avalanches_data[avalanches_data['Asp'] == ' W'] # Only avalanches with Western aspect
eastern_slope = avalanches_data[avalanches_data['Asp'] == ' E'] # Only avalances with Eastern aspect

# Calculate percentage of avalanches for each type for western slope
west = pd.DataFrame(western_slope.groupby('Type').apply(calcPrecentageWest))

# Avalanche code abbreviations can be found here: https://www.americanavalancheassociation.org/swag 
west.index = ['Cornice','Glide','Hard Slab','Loose','Roof', 'Soft Slab', 'Unknown', 'Wet-Loose','Wet Slab']
west.columns = ['Western']

# Repeat for Eastern Slope
east = pd.DataFrame(eastern_slope.groupby('Type').apply(calcPercentageEast))
east.index = ['Cornice','Glide','Hard Slab','Loose', 'Roof', 'Slush Flow', 'Soft Slab', 'Unknown', 'Wet-Loose','Wet Slab']
east.columns= ['Eastern']

# Merge into one dataframe
avalancheTypePercentage = pd.merge(east, west,left_index=True, right_index=True, how='outer')
avalancheTypePercentage.fillna(0, inplace=True)

avalancheTypePercentage


ax = avalancheTypePercentage.plot(kind='barh', figsize=(10, 6), color=['blue', 'green'])
ax.set_xlabel('Percentage of Total')
ax.set_ylabel('Avalanche Type')
ax.set_title('Comparison of Avalanche Types between Eastern and Western Slopes')
plt.legend(title='Slope', labels=['Eastern', 'Western'])
plt.show()


# Each point will also include information on size, aspect, trigger, and elevation when hovered over
# We only want to plot points that have all of this information
map_avalanche_data = avalanches_data.dropna(subset = ['sizeD','Asp','Elev','Trig'])


# Remove Entries With Unknown Destructive Size
validCodes = [' D1',' D2',' D1.5',' D3.5',' D3',' D4',' D5']
mask = map_avalanche_data['sizeD'].isin(validCodes)
map_avalanche_data = map_avalanche_data[mask]

# Extract Destructive Sizes Into Numeric Column
map_avalanche_data['Destructive Size'] = map_avalanche_data['sizeD'].str[2:].astype(float)


# Define a custom color scale
custom_color_scale = [
    (0.0, 'rgb(15, 4, 135)'),    # Dark blue
    (0.05, 'rgb(63, 13, 207)'),
    (0.1, 'rgb(100, 21, 255)'),
    (0.15, 'rgb(136, 35, 255)'), # Purplish
    (0.20, 'rgb(170, 58, 252)'),
    (0.25, 'rgb(200, 18, 240)'),
    (0.35, 'rgb(190, 18, 190)'),
    (0.50, 'rgb(239, 100, 100)'),
    (0.68, 'rgb(242, 82, 24)'),  # Orange towards the higher end
    (0.80, 'rgb(253, 200, 8)'),
    (1.0, 'rgb(255, 255, 0)')      # Bright yellow
]


# Plot Avalanche reports over a map
# Size and color of points is dependent on avalanche size
fig = px.scatter_mapbox(map_avalanche_data, 
                        lat="latitude", 
                        lon="Longitude", 
                        color="Destructive Size", # Color of point is mapped based on avalanche size
                        hover_name= "sizeD",
                        hover_data= {"Trig":True,"Asp":True,"Elev":True,"sizeD":False,"latitude": False,
                                     "Longitude":False,'Destructive Size':False}, # What data appears when point is hovered over
                        size ="Destructive Size", # Size of point is proportional to avalanche size
                        size_max = 12,
                        zoom=8,
                        mapbox_style="open-street-map",
                        height=800,
                        width=800,
                        color_continuous_scale=custom_color_scale,
                        opacity= 0.5)

# Customize margins of figure
fig.update_layout(margin={"r":0,"t":20,"l":0,"b":0})
fig.write_html('plotlyplot.html')

	Date	Longitude	latitude	Elev	Asp	Type	Trig	sizeD
0	Thu Apr 11 2024 12:00:00 GMT-0600 (Mountain D...	-106.1225117	39.4264253	>TL	E	L	AO	D1
1	Thu Apr 11 2024 00:12:12 GMT-0600 (Mountain D...	-105.6973734	40.3163493	>TL	SE	SS	AS	D1
2	Thu Apr 11 2024 00:00:00 GMT-0600 (Mountain D...	-105.6527821	40.3091512	>TL	E	SS	N	D1
3	Thu Apr 11 2024 00:00:00 GMT-0600 (Mountain D...	-108.0197277	37.4168616	>TL	W	WL	N	D1
4	Wed Apr 10 2024 18:14:49 GMT-0600 (Mountain D...	-106.9405172	39.1316096	>TL	E	WS	N	D2

	Frequency
Asp
N	3872
NE	5601
E	6879
SE	3685
S	1381
SW	832
W	1701
NW	1774

	Eastern	Western
Cornice	1.306720	0.057670
Glide	0.186674	0.115340
Hard Slab	15.939115	11.649366
Loose	4.092476	5.190311
Roof	0.014360	0.115340
Slush Flow	0.014360	0.000000
Soft Slab	58.242389	54.671280
Unknown	2.527283	1.499423
Wet Slab	3.403217	8.650519
Wet-Loose	7.021827	13.610150

CAIC Avalanche Data Analysis¶

Introduction¶

Data Pre-Processing¶

Reading In Data¶

Removing Unecessary Columns¶

A Note on Missing Values¶

Converting Longitude/Latitude Columns to Correct Data-Type¶

Remove Reports From Far Outside Colorado¶

Simplifying Trigger Column¶

Comparing Avalanche Frequencies for Different Geographic Features¶

Elevation¶

Slope Aspect¶

Prepare data for our visualization¶

Now, we can create the figure¶

Comparing Avalanche Types on Eastern and Western Slopes¶

Prepare Data¶

Plot Data¶

Investigating Avalanche Spatial Distributions Across the State of Colorado¶

Preparing Data¶

Plot Data¶

Distribution of avalanches in Colorado, coded by destructive size

	Observation ID	Date	Date Known	Landmark	First Name	Longitude	latitude	Last Name	#	Elev	...	Status	Locked	Weak Layer	Avg Width	Width Units	Avg Vertical	Vertical Units	Avg Crown	Crown Units	Terminus
0	-	Thu Apr 11 2024 12:00:00 GMT-0600 (Mountain D...	Estimated	-	Mac	-106.1225117	39.4264253	Peters	1	>TL	...	approved	-	-	-	ft	-	ft	-	in	-
1	-	Thu Apr 11 2024 00:12:12 GMT-0600 (Mountain D...	Known	-	Scott	-105.6973734	40.3163493	Eubank	2	>TL	...	approved	-	-	-	ft	-	ft	-	in	-
2	-	Thu Apr 11 2024 00:00:00 GMT-0600 (Mountain D...	Estimated	-	Scott	-105.6527821	40.3091512	Eubank	2	>TL	...	approved	-	-	-	ft	-	ft	-	in	-

	Date	Longitude	latitude	Elev	Asp	Type	Trig	sizeD
0	Thu Apr 11 2024 12:00:00 GMT-0600 (Mountain D...	-106.122512	39.426425	>TL	E	L	Artificial	D1
1	Thu Apr 11 2024 00:12:12 GMT-0600 (Mountain D...	-105.697373	40.316349	>TL	SE	SS	Artificial	D1
2	Thu Apr 11 2024 00:00:00 GMT-0600 (Mountain D...	-105.652782	40.309151	>TL	E	SS	Natural	D1