library(DBI)
library(bigrquery)
library(tidyverse) #loads dplyr and friends
library(tidyr)
library(mregions)
library(rgdal)
library(sqldf)
library(glue)
library(spdep)
library(ggplot2)
library(gridExtra)
library(rgdal)
library(MASS)

#connect to DB

BQ_connection <- dbConnect(bigquery(),
                           project = 'global-fishing-watch',
                           dataset = 'global_footprint_of_fisheries',
                           billing = 'fishinghotspots',
                           use_legacy_sql=FALSE)
DBI::dbListTables(BQ_connection)
#"fishing_effort"          "fishing_effort_byvessel" "fishing_vessels"         "vessels"


#store table into var

fishing_vessels <- dplyr::tbl(BQ_connection, "fishing_vessels")
# the variable created in your environment is not a dataframe or tibble but instead a list
# This is because tbl() creates a reference to the table in the remote database but does not bring the actual data into memory. 
class(fishing_vessels)

summary_by_country_and_gear <- fishing_vessels %>% 
  filter(active_2016) %>% 
  group_by(flag, geartype) %>% 
  summarize(n_vessels = n_distinct(mmsi),
            total_GT = sum(tonnage)) %>% 
  arrange(desc(n_vessels))

# <SQL>
#   SELECT `flag`, `geartype`, COUNT(DISTINCT `mmsi`) AS `n_vessels`, SUM(`tonnage`) AS `total_GT`
# FROM `fishing_vessels`
# WHERE (`active_2016`)
# GROUP BY `flag`, `geartype`
# ORDER BY `n_vessels` DESC

summary_by_country_and_gear <- collect(summary_by_country_and_gear)

top_20_flags <- summary_by_country_and_gear %>% group_by(flag) %>% summarize(n_vessels = sum(n_vessels)) %>% top_n(20, n_vessels)

# SELECT `flag`, `n_vessels`
# FROM (SELECT `flag`, `n_vessels`, rank() OVER (ORDER BY `n_vessels` DESC) AS `zzz1`
# FROM (SELECT `flag`, SUM(`n_vessels`) AS `n_vessels`

# FROM (SELECT *
# FROM (SELECT `flag`, `geartype`, COUNT(DISTINCT `mmsi`) AS `n_vessels`, SUM(`tonnage`) AS `total_GT`
# FROM (SELECT *
# FROM `fishing_vessels`
# WHERE (`active_2016`))
# GROUP BY `flag`, `geartype`) 
# ORDER BY `n_vessels` DESC)

# GROUP BY `flag`))
# WHERE (`zzz1` <= 20.0)

top_20_flags2 <- summary_by_country_and_gear %>% 
  group_by(flag) %>% 
  summarize(n_vessels = sum(n_vessels)) %>% 
  top_n(20, n_vessels)

top_20_flags2

summary_by_country_and_gear %>% 
  +     filter(flag %in% top_20_flags) %>% 
  +     na.omit() %>% 
  +     ggplot(aes(x = forcats::fct_reorder(flag, n_vessels), y = n_vessels, fill = geartype))+
  +     geom_col()+
  +     coord_flip()+
  +     labs(x  = "")+
  +     hrbrthemes::theme_ipsum()+
  +     ggsci::scale_fill_startrek()
# outputs image

# load shp

palau_eez <- readOGR(dsn=".", layer="eez",verbose=FALSE)

# get the bounding box of the shapefile
palau_bbox <- sf::st_bbox(palau_eez)

# extend the bounding box 1 degree in every direction.
min_lon <- palau_bbox[["xmin"]] - 1 
max_lon <- palau_bbox[["xmax"]] + 1
min_lat <- palau_bbox[["ymin"]] - 1
max_lat <- palau_bbox[["ymax"]] + 1 

# define mapping resoluion in degrees
resolution <- 0.1

fishing_effort <- dplyr::tbl(BQ_connection,"fishing_effort")
#`global-fishing-watch.global_footprint_of_fisheries.fishing_effort`

binned_effort_around_Palau <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00")WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_Palau_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau)

# <SQL> SELECT floor(lat_bin/0.1)*0.1+0.5*0.1 lat_bin_center, floor(lon_bin/0.1)*0.1+0.5*0.1 lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag, flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00")WHERE lat_bin>=0.621407206177707 AND lat_bin <=12.5587247264652 AND lon_bin >=128.508804817289 AND lon_bin<=137.954100211352 GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0

ggplot(binned_effort_around_Palau_data, aes(x=lat_bin_center, y=fishing_hours)) + 
  +       geom_point(alpha = 0.3, size = 1) +
  +       geom_smooth(size = 0.5) +
  +       ggtitle("% fishing hours")+
  +       theme(aspect.ratio=1)

ggplot(binned_effort_around_Palau_data, aes(x=lon_bin_center, y=fishing_hours)) + 
  +       geom_point(alpha = 0.3, size = 1) +
  +       geom_smooth(size = 0.5) +
  +       ggtitle("% fishing hours")+
  +       theme(aspect.ratio=1)

ggplot(binned_effort_around_Palau_data, aes(x=lat_bin_center, y=flag)) + 
  +       geom_point(alpha = 0.3, size = 1) +
  +       geom_smooth(size = 0.5) +
  +       ggtitle("#countries")+
  +       theme(aspect.ratio=1)

plotall <- binned_effort_around_Palau_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))

summary(binned_effort_around_Palau_data)
lat_min = 1.95
lat_max = 12.55
lon_min = 128.8
lon_max = 137.9

lat_n = (12.55 - 1.95)/0.1
lon_n = (137.9 - 128.8)/0.1

china <- fishing_effort %>% filter(flag == "CHN")
binned_effort_around_Palau_CHN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "CHN") WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_Palau_CHN_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau_CHN)

plotCHN <-binned_effort_around_Palau_CHN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))

taiwan <- fishing_effort %>% filter(flag == "TWN")  %>% filter(date >= "2016-01-01") %>% filter(date <="2016-12-31")
taiwan_desc <- fishing_effort %>% filter(flag == "TWN")  %>% filter(date >= "2016-01-01") %>% filter(date <="2016-12-31") %>%arrange(desc(date))

binned_effort_around_Palau_TWN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "TWN") WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_Palau_TWN_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau_TWN)

plotTWN <-binned_effort_around_Palau_TWN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_Palau_JPN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "JPN") WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_Palau_JPN_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau_JPN)

plotJPN <-binned_effort_around_Palau_JPN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_Palau_KOR <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "KOR") WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center',.con=BQ_connection)
binned_effort_around_Palau_KOR_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau_KOR)

plotKOR <-binned_effort_around_Palau_KOR_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_Palau_ESP <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "ESP") WHERE lat_bin>={min_lat} AND lat_bin <={max_lat} AND lon_bin >={min_lon} AND lon_bin<={max_lon} GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_Palau_ESP_data <- dbGetQuery(BQ_connection, binned_effort_around_Palau_ESP)

plotESP <-binned_effort_around_Palau_ESP_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))



grid.arrange(plotall, plotCHN, plotTWN, plotJPN, ncol=2, bottom="fishing effort around Palau, all vs China vs Taiwan vs Japan")



spain_desc <- fishing_effort %>% filter(flag == "ESP")  %>% filter(date >= "2016-01-01") %>% filter(date <="2016-12-31") %>%arrange(desc(date))

#conclusion: no fishing from korea and spain within Palau EEZ


# world fishing efforts in 2016

binned_effort_around_World<- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_data <- dbGetQuery(BQ_connection,binned_effort_around_World)
binned_effort_around_World_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_World_TWN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "TWN") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_TWN_data <- dbGetQuery(BQ_connection, binned_effort_around_World_TWN)

plotTWN_world <-binned_effort_around_World_TWN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_World_CHN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "CHN") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_CHN_data <- dbGetQuery(BQ_connection, binned_effort_around_World_CHN)

plotCHN_world <-binned_effort_around_World_CHN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))

binned_effort_around_World_JPN <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "JPN") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_JPN_data <- dbGetQuery(BQ_connection, binned_effort_around_World_JPN)

plotJPN_world <-binned_effort_around_World_JPN_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_World_KOR <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "KOR") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_KOR_data <- dbGetQuery(BQ_connection, binned_effort_around_World_KOR)

plotKOR_world <-binned_effort_around_World_KOR_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))


binned_effort_around_World_ESP <- glue::glue_sql('SELECT floor(lat_bin/{resolution})*{resolution}+0.5*{resolution} lat_bin_center, floor(lon_bin/{resolution})*{resolution}+0.5*{resolution} lon_bin_center, SUM(fishing_hours) fishing_hours, COUNT(flag) flag FROM (SELECT lat_bin/100 lat_bin, lon_bin/100 lon_bin, fishing_hours, flag FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" AND flag = "ESP") GROUP BY lat_bin_center, lon_bin_center HAVING fishing_hours>0',.con=BQ_connection)
binned_effort_around_World_ESP_data <- dbGetQuery(BQ_connection, binned_effort_around_World_ESP)

plotESP_world <-binned_effort_around_World_ESP_data %>% filter(fishing_hours > 1) %>% ungroup() %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon_bin_center, y = lat_bin_center, fill = fishing_hours))

#hotspot analysis world

summary(binned_effort_around_World_data)
lat_bin_world_c <- c(binned_effort_around_World_data$lat_bin_center)
lon_bin_world_c <- c(binned_effort_around_World_data$lon_bin_center)

latlon_world <- do.call(rbind, Map(data.frame, lat=lat_bin_world_c, lon=lon_bin_world_c))
coords_world <- coordinates(latlon_world)

knn_world <- knearneigh(coords_world, k=4)
world_nb <- knn2nb(knn_world)
world_W <- nb2listw(world_nb, style="W")
str(world_W)

fishing_G_world <- localG(binned_effort_around_World_data$fishing_hours, world_W, zero.policy=NULL, spChk=NULL, return_internals=FALSE, GeoDa=FALSE)

fishing_G_world_data <- do.call(rbind, Map(data.frame, lat=lat_bin_world_c, lon=lon_bin_world_c, Z = fishing_G_world))

fishing_G_world_data %>% ggplot()+ xlim(-180,180)+ ylim(-90,90)+ geom_raster(aes(x = lon, y = lat, fill = Z))

summary(fishing_G_world_data)



#hotspot analysis Palau


summary(binned_effort_around_Palau_data)
lat_bin_c <- c(binned_effort_around_Palau_data$lat_bin_center)
lon_bin_c <- c(binned_effort_around_Palau_data$lon_bin_center)

latlon <- do.call(rbind, Map(data.frame, lat=lat_bin_c, lon=lon_bin_c))
coords <- coordinates(latlon)

knn <- knearneigh(coords, k=4)
Palau_nb <- knn2nb(knn)
Palau_W <- nb2listw(Palau_nb, style="W")
Palau_W <- nb2listw(Palau_nb, style="B")

str(Palau_W)


#hotspot analysis... how? 

fishing_G <- localG(binned_effort_around_Palau_data$fishing_hours, Palau_W, zero.policy=NULL, spChk=NULL, return_internals=FALSE, GeoDa=FALSE)

fishing_G_data <- do.call(rbind, Map(data.frame, lat=lat_bin_c, lon=lon_bin_c, Z = fishing_G))

fishing_G_data %>% ggplot()+ xlim(127,139)+ ylim(1,13)+ geom_raster(aes(x = lon, y = lat, fill = Z))

summary(fishing_G_data)

fishing_G_data_desc <- fishing_G_data %>% arrange(desc(Z))

# max-z: 12.09136
# min-z: -1.61692
# mean: 0.03924

#Geary C
set.seed(1)
geary.test(binned_effort_around_Palau_data$fishing_hours,Palau_W, alternative ="two.sided")

#Moran's I
set.seed(1)
moran.test(binned_effort_around_Palau_data$fishing_hours,Palau_W, alternative="two.sided")

# Moran I test under randomisation
# 
# data:  binned_effort_around_Palau_data$fishing_hours  
# weights: Palau_W    
# 
# Moran I statistic standard deviate = 41.046, p-value < 2.2e-16
# alternative hypothesis: two.sided
# sample estimates:
#   Moran I statistic       Expectation          Variance 
# 0.6167442560     -0.0005005005      0.0002261334 


#model: fishing effort ~ lat + lon
fish.lm <- lm(fishing_hours ~ lat_bin_center + lon_bin_center, data = binned_effort_around_Palau_data)

# Residuals:
#   Min      1Q  Median      3Q     Max 
# -19.774  -8.683  -3.940   3.423 120.691 
# 
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)    
# (Intercept)    -182.3674    22.6529  -8.051  1.4e-15 ***
#   lat_bin_center   -0.2686     0.1137  -2.363   0.0182 *  
#   lon_bin_center    1.4759     0.1674   8.815  < 2e-16 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 14.71 on 1996 degrees of freedom
# Multiple R-squared:  0.0495,	Adjusted R-squared:  0.04854 
# F-statistic: 51.97 on 2 and 1996 DF,  p-value: < 2.2e-16

#top fishing countries

flag_data <- glue::glue_sql('SELECT flag,SUM(fishing_hours) AS n_hours, COUNT(DISTINCT mmsi_present) AS n_vessels FROM `global-fishing-watch.global_footprint_of_fisheries.fishing_effort` WHERE _PARTITIONTIME >= "2016-01-01 00:00:00" AND _PARTITIONTIME <"2016-12-31 00:00:00" GROUP BY flag ORDER BY n_hours DESC')
flag_data_fetched <- dbGetQuery(BQ_connection, flag_data)

top_20_flags_hours <- flag_data_fetched %>% group_by(flag) %>% summarize(n_hours = sum(n_hours)) %>% top_n(20, n_hours)
top_20_flags_hours

top_20_flags_hours

flag_hours_data <-data.frame(flag = top_20_flags_hours$flag, n_hours = top_20_flags_hours$n_hours)
p <-ggplot(top_20_flags_hours, aes(flag, n_hours))
p +geom_bar(stat = "identity", aes(fill = flag)) 
#alternatively
top_20_flags_hours %>% ggplot(aes(flag, n_hours))+geom_bar(stat = "identity", aes(fill = flag))


#by vessel
top_20_flags_vessels <- flag_data_fetched %>% group_by(flag) %>% summarize(n_vessels = sum(n_vessels)) %>% top_n(20, n_vessels)

flag_vessels_data <-data.frame(flag = top_20_flags_vessels$flag, n_vessels = top_20_flags_vessels$n_vessels)
p2 <-ggplot(top_20_flags_vessels, aes(flag, n_vessels))
p2 +geom_bar(stat = "identity", aes(fill = flag)) 

#desc order num hours
top_20_flags_vessels_desc <- flag_data_fetched %>% group_by(flag) %>% summarize(n_vessels = sum(n_vessels)) %>% top_n(20, n_vessels) %>% arrange(desc(n_vessels))
vessels_data_desc <-data.frame(flag = top_20_flags_vessels_desc$flag, n_vessels = top_20_flags_vessels_desc$n_vessels)


#desc order num vessels
top_20_flags_hours_desc <- flag_data_fetched %>% group_by(flag) %>% summarize(n_hours = sum(n_hours)) %>% top_n(20, n_hours) %>% arrange(desc(n_hours))
flag_hours_data_desc <-data.frame(flag = top_20_flags_hours_desc$flag, n_hours = top_20_flags_hours_desc$n_hours)

#regression
fish_hours.lm <- lm(n_hours ~ flag, data = flag_hours_data)

flag.lm <- lm(n_vessels ~ flag,data=summary_by_country_and_gear)

