작성
·
280
0
Error in which(str_detect(b, "id=\"newEndContents\">")):which(str_detect(b, :
argument of length 0
위 에러 메세지가 발생하는데 무엇인지 못찾겠네요. 아래는 코드 입니다.
final_data <-NULL
i<-1
j<-1
for(j in 1:5) {
for(i in 1:2) {
date<-Sys.Date()-j
date2<-gsub("-","",date)
url <- paste0("https://sports.news.naver.com/kbaseball/news/list?isphoto=N&date=",date2,"&page=", i)
b<-readLines(url, encoding = "utf-8")
library(RJSONIO)
b2<-fromJSON(b)
b2$list[[1]]
a1<-sapply(b2$list, function(x){x$oid})
a2<-sapply(b2$list, function(x){x$aid})
a3<-sapply(b2$list, function(x){x$title})
final_data<-rbind(final_data,cbind(a1, a2, a3))
cat("\n", date2, "-", i, "데이터 수집 중" )
}
}
setwd("/Users/??/Workspace/R/Projects/CrawlingPro/05")
write.csv(final_data, "baseball_news.csv", row.names = F)
final_data[,1]
final_data[,2]
con_url<-paste0("https://sports.news.naver.com/news?oid=", final_data[,1], "&aid=", final_data[,2])
library(stringr)
con_url
k<-1
con<-c()
for (k in 1:length(con_url)) {
b<-readLines(con_url[k], encoding="utf-8")
b2<-b[which(str_detect(b, "id=\"newEndContents\">")):which(str_detect(b, "news_end_btn"))]
b3<-paste(b2, collapse = " ")
b3
b3<-gsub("<.*?>", "",b3)
b3<-gsub("\t|>| |<", "",b3)
con[k] <- b3
cat("\n", k)
}
baseball_data<-cbind(final_data, con)
colnames(baseball_data) <- c("oid", "aid", "head", "cont")
write.csv(baseball_data, "baseball.csv", row.names=F)
답변 2
0
0
안녕하세요. 먼길님
뉴스내에서 html 규칙이 바뀐것 같습니다.
b2<-b[which(str_detect(b, "id=\"newEndContents\">")):which(str_detect(b, "news_end_btn"))]
여기서 "newEndContents" 부분이 "newsEndContents" 로 바뀐듯 합니다.
아래 코드를 참고해주세요
for (k in 1:length(con_url)) {
b<-readLines(con_url[k], encoding="UTF-8")
b2<-b[which(str_detect(b, "id=\"newsEndContents\">")):which(str_detect(b, "news_end_btn"))]
b3<-paste(b2, collapse = " ")
b3<-gsub("<.*?>", "",b3)
b3<-gsub("\t|>| |<", "",b3)
con[k] <- b3
cat("\n", k)
}