R | RLab_chap06(DoIt) | 2020-11-09 | RLab04~RLab05

RLab04

# summarise()함수 : 행요약하기
# 열전체를 입력 값으로 함수를 계산하여 요약값을 출력,

# 주문테이블에서 매출(salse)의 평균값을 구하라.
summarise(order_info_r, avg = mean(sales))
summarise(order_info_r, min_value = min(sales), max_value= max(sales))

order_info_r %>% summarise(min_value = min(sales), max_value= max(sales))
          
 # 패키지 로딩
library(dplyr)                           
exam <- read.csv("csv_exam.csv")

exam

# 집단별 요약하기
# math 평균 산출
exam %>% summarise(mean_math = mean(math))

#class별로 분리
exam %>% 
  group_by(class) %>% 
  summarise(mean_math = mean(math))

#여러 요약통계량 한번에 산출하기
exam %>% 
  group_by(class) %>%                    #class별로 분리
  summarise(mean_math = mean(math),      #math 평균합계
            sum_math = sum(math),        #math 합계
            medain_math = median(math),  # math 중앙값
            n = n())                     #학생수

# 각 집단별로 다시 집단 나누기 -- 회사별, 구동방식별별
mpg
mpg %>% 
  group_by(manufacturer, drv, ) %>%
  summarise(mean_cty = mean(cty)) %>% 
  head(10)
  
# dplyr 조합하기
mpg %>% 
  group_by(manufacturer) %>% 
  filter(class=="suv") %>% #행은 필터 
  mutate(tot = (cty+hwy)/2) %>%
  summarise(mean_tot = mean(tot)) %>% 
  arrange(desc(mean_tot)) %>% 
  head(5)

# 혼자서 해보기 150p
library(ggplot2)
mpg <- as.data.frame(ggplot2::mpg) #mpg 데이터 불러오기기
#Q1
mpg %>% 
  group_by(class) %>% #class 별 분리 
  summarise(mean_cty = mean(cty)) #cty 평균구하기기

#Q2
mpg %>% 
  group_by(class) %>% #class 별 분리 
  summarise(mean_cty = mean(cty)) %>% #cty 평균구하기기
  arrange(desc(mean_cty)) #내림차순 정렬

#Q3
mpg %>% 
  group_by(manufacturer) %>%  #manufacturer별 분리
  summarise(mean_hwy = mean(hwy)) %>% #hwy 평균
  arrange(desc(mean_hwy)) %>% #내림차순 정렬
  head(3) #상위 3행 출력

# Q4
mpg %>% 
  filter(class == "compact") %>% # compact 추출
  group_by(manufacturer) %>% # manufacturer 별 분리
  summarise(count = n()) %>% #빈도수구하기 
  arrange(desc(count)) #정렬 내림차순 

reservation_r
head(reservation_r)
# group by : 행 그룹화하기
# : summarise()함수와 함께 사용.
# 예약테이블에서 고객번호(customer_id)로 그룹화하여(group by) 
# 각 고객별 예약했던 고객수의 평균값(mean)을 구하라.
reservation_r %>% 
  group_by(customer_id) %>% 
  summarise(avg = mean(visitor_cnt)) # 얼마나 방문했는지 평균

RLab05

# 가로로 합치기
# 데이터 생성
# 중간고사 데이터 생성
test1 <- data.frame(id = c(1, 2, 3, 4, 5),
                    midterm = c(60, 80, 70, 90, 85))
test1

#기말고사 데이터 생성
test2 <- data.frame(id = c(1, 2, 3, 4, 5),
                    final = c(70, 83, 65, 95, 80))

test2

# id기준으로 합치기 
# by에 변수명 지정할때 변수명 앞뒤에 겹따움표 입력.
total <- left_join(test1, test2, by = "id")
total

# 반별 담임교사 명단 생성
name <- data.frame(class = c(1, 2, 3, 4, 5),
                   teacher = c("kim", "lee", "park", "choi", "chu"))
name
exam

exam_new <- left_join(name,exam, by = "class")
exam_new

exam_new2 <- left_join(exam, name, by = "class")
exam_new2

# 두개 합치기기
reservation_r # 예약정보보
order_info_r # 주문정보

# left_join() : 왼쪽기준 모든 데이터 연결하기
str(reservation_r)
library(dplyr)
reservation_new <- left_join(reservation_r, order_info_r, by = "reserv_no") %>% 
  arrange(reserv_no, item_id) %>% 
  select(reserv_no, visitor_cnt, cancel, order_no, item_id, sales)
reservation_new

# 세로로 합치기
# 데이터 생성
# 학생 1~5번 시험데이터 생성
group_a <- data.frame(id = c(1, 2, 3, 4, 5),
                      test = c(60, 80, 70, 90, 85))
group_a

# 학생 6~10번 시험데이터 생성
group_b <-  data.frame(id = c(6, 7, 8, 9, 10),
                       test = c(70, 83, 65, 95, 80))
group_b

# 세로로 합치기
group_all <- bind_rows(group_a, group_b)
group_all

#혼자서 해보기 156p
#Q1
head(mpg)
str(mpg)

mpg_fl <- left_join(mpg, fuel, by = "fl")
mpg_fl

#Q2
mpg_fl %>% 
  select(model, fl, price_fl) %>% 
  head(5)