# select all rows where sales_2019 was more than 100budget %>%filter(sales_2019 >100)
# A tibble: 6 × 8
region product sales_2019 sales_2020 expenses_2019 expenses_…¹ satis…² satis…³
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 North widgets 2129 -517 822 -897 high very h…
2 North gadgets 723 77 1037 1115 very h… very h…
3 South widgets 1123 -1450 1004 672 high neutral
4 South gadgets 2022 -945 -610 200 low low
5 West widgets 633 790 783 -315 neutral neutral
6 West gadgets 1204 426 433 -136 low low
# … with abbreviated variable names ¹expenses_2020, ²satisfaction_2019,
# ³satisfaction_2020
# everything but the Northbudget %>%filter(region !="North")
# A tibble: 6 × 8
region product sales_2019 sales_2020 expenses_2019 expenses_…¹ satis…² satis…³
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 South widgets 1123 -1450 1004 672 high neutral
2 South gadgets 2022 -945 -610 200 low low
3 East widgets -728 -51 -801 -342 very l… very l…
4 East gadgets -423 -354 94 2036 neutral high
5 West widgets 633 790 783 -315 neutral neutral
6 West gadgets 1204 426 433 -136 low low
# … with abbreviated variable names ¹expenses_2020, ²satisfaction_2019,
# ³satisfaction_2020
# regions and products with profit in both 2019 and 2020profit_both <- budget %>%filter( sales_2019 > expenses_2019, sales_2020 > expenses_2020 )# the same as above, using & instead of a commaprofit_both <- budget %>%filter( sales_2019 > expenses_2019 & sales_2020 > expenses_2020 )# regions and products with profit in 2019 or 2020profit_either <- budget %>%filter( sales_2019 > expenses_2019 |#logical operator for or sales_2020 > expenses_2020 )# 2020 profit greater than 1000profit_1000 <- budget %>%filter(sales_2020 - expenses_2020 >1000)
%in% operator
# retain any rows where region is north or south, and where product equals widgetbudget %>%filter(region %in%c("North", "South"), product =="widgets")
# A tibble: 2 × 8
region product sales_2019 sales_2020 expenses_2019 expenses_…¹ satis…² satis…³
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 North widgets 2129 -517 822 -897 high very h…
2 South widgets 1123 -1450 1004 672 high neutral
# … with abbreviated variable names ¹expenses_2020, ²satisfaction_2019,
# ³satisfaction_2020
# retain any rows where the region is not east or west, and where the product does not equal gadgetsbudget %>%filter(!region %in%c("East", "West"), product !="gadgets")
# A tibble: 2 × 8
region product sales_2019 sales_2020 expenses_2019 expenses_…¹ satis…² satis…³
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 North widgets 2129 -517 822 -897 high very h…
2 South widgets 1123 -1450 1004 672 high neutral
# … with abbreviated variable names ¹expenses_2020, ²satisfaction_2019,
# ³satisfaction_2020
Arrange
budget %>%mutate(region =factor(region, levels =c("North", "South", "East", "West"))) %>%filter(product =="gadgets") %>%arrange(region) # arrange function sorts out data
# A tibble: 4 × 8
region product sales_2019 sales_2020 expenses_2019 expenses_…¹ satis…² satis…³
<fct> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
1 North gadgets 723 77 1037 1115 very h… very h…
2 South gadgets 2022 -945 -610 200 low low
3 East gadgets -423 -354 94 2036 neutral high
4 West gadgets 1204 426 433 -136 low low
# … with abbreviated variable names ¹expenses_2020, ²satisfaction_2019,
# ³satisfaction_2020
Mutate
budget2 <- budget %>%mutate(sales = sales_2019 + sales_2020,expenses = expenses_2019 + expenses_2020,profit = sales - expenses,region =paste(region, "Office") )#mutate with logical operator budget2 <- budget2 %>%mutate(profit_category = profit >0,product =as.factor(product))# you do not have to use TRUE or FALSE all the time and you can use case_when function budget3 <- budget2 %>%mutate(profit_category =case_when(profit >0~"PROFIT", profit <0~"NO PROFIT"))# use tilda and %in% operator # create a column where people get a bonus if customer satisfaction was overall high or very highbonus <- budget3 %>%mutate(bonus_2019 =case_when(satisfaction_2019 %in%c("very low", "low", "neutral") ~"no bonus", satisfaction_2019 %in%c("high", "very high") ~"bonus"))
Summarise
## first we need to adjust the data for analysisbudget4 <- budget %>%select(-satisfaction_2019, -satisfaction_2020) %>%pivot_longer(cols = sales_2019:expenses_2020,names_to =c("type", "year"),names_sep ="_",values_to ="value") %>%pivot_wider(names_from = type,values_from = value)head(budget4) # check the format
# A tibble: 6 × 5
region product year sales expenses
<chr> <chr> <chr> <dbl> <dbl>
1 North widgets 2019 2129 822
2 North widgets 2020 -517 -897
3 North gadgets 2019 723 1037
4 North gadgets 2020 77 1115
5 South widgets 2019 1123 1004
6 South widgets 2020 -1450 672
#!!!!!! redefining round so 5s round up !!!!!! round <-function(x, digits =0) { posneg =sign(x) z =abs(x)*10^digits z = z +0.5+sqrt(.Machine$double.eps) z =trunc(z) z = z/10^digits z*posneg}
# remove new round() methodrm(round)
missing_bad <- budget4 %>%mutate(expenses =ifelse(test = year ==2020& region =="South", yes =0, # value if above conditions are metno = expenses # value if above conditions are not met ))
missing_bad <- budget4 %>%mutate(expenses =case_when(# set to 0 when year is 2020 and region is North year ==2020& region =="South"~0, # otherwise, set to the value in the expenses columnTRUE~ expenses ))
# set sales values to "missing" for North 2020 rowsmissing_bad <- missing_bad %>%mutate(sales =ifelse(year ==2020& region =="North", "missing", sales))# check structure of data, sales now characterstr(missing_bad)