티스토리 뷰
#영화평점에 따른 타이틀 선정건
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)
title=c()
grade=c()
#body=c()
base_url="https://movie.naver.com/movie/point/af/list.nhn?&page="
for(i in 1:200){
craw_url=paste0(base_url,i,Encoding("EUC_KR"))
hdoc=read_html(craw_url)
t_css=".color_b"
g_css="#old_content em"
t_node=html_nodes(hdoc,t_css)
g_node=html_nodes(hdoc,g_css)
title_part=html_text(t_node)
title_part
grade_part=html_text(g_node)
grade_part=grade_part[1:10]
grade_part
title=c(title,title_part)
grade=c(grade,grade_part)
}
movie=data.frame(title,grade)
View(movie)
write.csv(movie,"movie.csv",row.names = F)
data=read.csv("movie.csv")
top10=data %>%
group_by(title) %>%
summarise(grade_sum=sum(grade),
count=n()) %>%
arrange(desc(grade_sum),desc(count)) %>%
head(5)
top10
반응형
LIST
'공부합시다 > 찍먹' 카테고리의 다른 글
[R] barplot - 막대에 색상 넣기 (0) | 2021.04.13 |
---|---|
[R] 성경크롤링에 따른 텍스트마이닝 (0) | 2021.04.13 |
[R] rvest pkg for Crawling (0) | 2021.04.13 |
[R] 한국복지패널 데이터 분석하기 (0) | 2021.04.13 |
[R] Barplot (0) | 2021.04.13 |
댓글