티스토리 뷰

#영화평점에 따른 타이틀 선정건
library(rvest)
library(stringr)
library(dplyr)
library(ggplot2)

title=c()
grade=c()
#body=c()

base_url="https://movie.naver.com/movie/point/af/list.nhn?&page="

for(i in 1:200){
  craw_url=paste0(base_url,i,Encoding("EUC_KR"))
  hdoc=read_html(craw_url)
  
  t_css=".color_b"
  g_css="#old_content em"
  
  t_node=html_nodes(hdoc,t_css)
  g_node=html_nodes(hdoc,g_css)
  
  title_part=html_text(t_node)
  title_part
  grade_part=html_text(g_node)
  grade_part=grade_part[1:10]
  grade_part
  
  title=c(title,title_part)
  grade=c(grade,grade_part)
}
movie=data.frame(title,grade)
View(movie)
write.csv(movie,"movie.csv",row.names = F)
data=read.csv("movie.csv")
top10=data %>% 
  group_by(title) %>% 
  summarise(grade_sum=sum(grade),
            count=n()) %>% 
  arrange(desc(grade_sum),desc(count)) %>% 
  head(5)
top10

 

반응형
LIST

'공부합시다 > 찍먹' 카테고리의 다른 글

[R] barplot - 막대에 색상 넣기  (0) 2021.04.13
[R] 성경크롤링에 따른 텍스트마이닝  (0) 2021.04.13
[R] rvest pkg for Crawling  (0) 2021.04.13
[R] 한국복지패널 데이터 분석하기  (0) 2021.04.13
[R] Barplot  (0) 2021.04.13
댓글
링크
공지사항
최근에 올라온 글