i<-1
j<-1
final_data<-NULL
for(j in 1:5){
for(i in 1:2){
date<-Sys.Date()-j
date2<-gsub("-","",date)
url<-paste0("https://sports.news.naver.com/kbaseball/news/list.nhn?date=",date2,"&isphoto=N&page=",i)
url
b<-readLines(url,encoding="UTF-8")
library(RJSONIO)
b2<-fromJSON(b)
a1<-sapply(b2$list,function(x){x$oid})
a2<-sapply(b2$list,function(x){x$aid})
a3<-sapply(b2$list,function(x){x$title})
final_data<-rbind(final_data,cbind(a1,a2,a3))
cat("\n",date2,"-",i,"page 수집중")
}
}
setwd("D:\\인프런\\crawling")
write.csv(final_data,"baseball_news.csv",row.names = F)
final_data[,1]
final_data[,2]
dim(final_data)
con_url<-paste0("https://sports.news.naver.com/news.nhn?oid=",final_data[,1],"&aid=",final_data[,2])
library(stringr)
con_url
k<-1
con<-c()
for(k in 1:length(con_url)){
b<- readLines(con_url[k],encoding="UTF-8")
b2<-b[which(str_detect(b,"id=\"newsEndContents\">")):which(str_detect(b,"news_end_btn"))]
b3<-paste(b2,collapse = " ")
b3
b3<-gsub("<.*?>","",b3)
b3<-gsub("\t|>| |<","",b3)
con[k]<-b3
cat("\n",k)
}
baseball_data<-cbind(final_data,con)
colnames(baseball_data)<-c("oid","aid","head","cont")
write.csv(baseball_data,"baseball.csv",row.names=F)
'R-크롤링' 카테고리의 다른 글
dotax (다음카페) 크롤링 (0) | 2019.12.12 |
---|---|
네이버 블로그 크롤링 (0) | 2019.12.12 |
내 주변 카페리스트 수집하기 (0) | 2019.12.12 |
오늘의 유머 크롤링 코드 (0) | 2019.12.12 |
네이버 스포츠 뉴스 제목 및 댓글 크롤링 (Rselenium) (0) | 2019.04.26 |
댓글