使用 pdftools 套件擷取台灣各縣市明日白天雨量

以下 R 程式碼會先從中央氣象局網站擷取各縣市明日白天雨量 pdf 檔,轉成 text 文字檔,再擷取出各縣市相對的雨量 (%)。

作業系統:Windows 7
R 版本:R-3.5.1

# get rainfall for tomorrow daytime
require(pdftools)
require(BBmisc) # use "suppressAll" function
require(stringr)

download.file("https://www.cwb.gov.tw/V7/forecast/taiwan/Data/W03.pdf",
"d:/tmp/rains.pdf")

text = suppressAll(pdf_text("d:/tmp/rains.pdf"),immediate.=T)
text = unlist(strsplit(text,"\r\n"))
text = text[grepl("^*",text)]

citiesTW1 = c("台北市","北市","桃園市","台中市","台南市",
"高雄市","基隆市","竹縣","竹市","苗縣","彰化縣","南投縣",
"雲縣","嘉義縣","嘉義市","屏縣","宜蘭縣","花蓮縣","台縣","澎湖縣","金門縣",
"連江縣")

citiesTW2 = c("台北市","新北市","桃園市","台中市","台南市",
"高雄市","基隆市","新竹縣","新竹市","苗栗縣","彰化縣","南投縣",
"雲林縣","嘉義縣","嘉義市","屏東縣","宜蘭縣","花蓮縣","台東縣","澎湖縣","金門縣",
"連江縣")

names(citiesTW2)=citiesTW1

for (row in text)
{
tmp2 = unlist(strsplit(row,"%"))[1] # "*臺北市 陰時多雲 20"
tmp3 = unlist(strsplit(tmp2," "))
rainTmp = tail(tmp3,1) # "20"
rainTmp = gsub("1","1",rainTmp)
rainTmp = gsub("2","2",rainTmp)
rainTmp = gsub("3","3",rainTmp)
rainTmp = gsub("4","4",rainTmp)
rainTmp = gsub("5","5",rainTmp)
rainTmp = gsub("6","6",rainTmp)
rainTmp = gsub("7","7",rainTmp)
rainTmp = gsub("8","8",rainTmp)
rainTmp = gsub("9","9",rainTmp)
rainTmp = gsub("0","0",rainTmp)
rainTmp = as.integer(str_trim(rainTmp))

xcity = str_trim(gsub("*","",tmp3[1])) # "臺北市"
xcity = gsub("臺","台",xcity)
xcity = citiesTW2[xcity]
if (xcity %in% citiesTW2)
cat(xcity,":",rainTmp,"\n")
}