STA/OPR 9750 Week 11 In-Class Activity: HTML Import

Week 11 Slides

library(rvest)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter()         masks stats::filter()
✖ readr::guess_encoding() masks rvest::guess_encoding()
✖ dplyr::lag()            masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(leaflet)

CUNYs <- read_html("https://en.wikipedia.org/wiki/List_of_City_University_of_New_York_institutions") |> 
    html_element("tbody") |>
    html_elements("tr td:nth-child(2)") |>
    html_elements("a")

CUNYs <- data.frame(name = CUNYs |> html_text(),
                    link = CUNYs |> html_attr("href")
)

get_cuny_gps <- function(url){
    COORDS <- read_html(url) |> html_element(".geo") |> html_text() |> str_split_1(";")
    LAT <- as.numeric(COORDS[1])
    LON <- as.numeric(COORDS[2])
    list(LAT=LAT, LON=LON)
}

CUNYs <- CUNYs |> 
    mutate(link = paste0("https://en.wikipedia.org/", link)) |>
    rowwise() |>
    mutate(gps = list(get_cuny_gps(link))) |>
    unnest_wider(gps)

MAP <- leaflet() |> 
    addTiles() |>
    addMarkers(CUNYs$LON, 
               CUNYs$LAT, 
               popup=CUNYs$name, 
              options = popupOptions(closeOnClick=FALSE))

MAP