customers <- tibble(
id = 1:5,
city = c("Port Ellen", "Dufftown", NA, "Aberlour", "Tobermory"),
postcode = c("PA42 7DU", "AB55 4DH", NA, "AB38 7RY", "PA75 6NR")
)
orders <- tibble(
id = c(2, 3, 4, 4, 5, 5, 6, 6, 7),
items = c(10, 18, 21, 23, 9, 11, 11, 12, 3)
)Create data
Mutating Joins
Left join
left_data <- left_join(customers, orders, by = "id")Warning in left_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
left_data# A tibble: 7 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 1 Port Ellen PA42 7DU NA
2 2 Dufftown AB55 4DH 10
3 3 <NA> <NA> 18
4 4 Aberlour AB38 7RY 21
5 4 Aberlour AB38 7RY 23
6 5 Tobermory PA75 6NR 9
7 5 Tobermory PA75 6NR 11
left2_data <- left_join(orders, customers, by = "id")
left2_data# A tibble: 9 × 4
id items city postcode
<dbl> <dbl> <chr> <chr>
1 2 10 Dufftown AB55 4DH
2 3 18 <NA> <NA>
3 4 21 Aberlour AB38 7RY
4 4 23 Aberlour AB38 7RY
5 5 9 Tobermory PA75 6NR
6 5 11 Tobermory PA75 6NR
7 6 11 <NA> <NA>
8 6 12 <NA> <NA>
9 7 3 <NA> <NA>
right join
right_data <- right_join(customers, orders, by = "id")Warning in right_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
right_data# A tibble: 9 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 2 Dufftown AB55 4DH 10
2 3 <NA> <NA> 18
3 4 Aberlour AB38 7RY 21
4 4 Aberlour AB38 7RY 23
5 5 Tobermory PA75 6NR 9
6 5 Tobermory PA75 6NR 11
7 6 <NA> <NA> 11
8 6 <NA> <NA> 12
9 7 <NA> <NA> 3
inner join
inner_data <- inner_join(customers, orders, by = "id")Warning in inner_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
inner_data# A tibble: 6 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 2 Dufftown AB55 4DH 10
2 3 <NA> <NA> 18
3 4 Aberlour AB38 7RY 21
4 4 Aberlour AB38 7RY 23
5 5 Tobermory PA75 6NR 9
6 5 Tobermory PA75 6NR 11
full join
full_data <- full_join(customers, orders, by = "id")Warning in full_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
full_data# A tibble: 10 × 4
id city postcode items
<dbl> <chr> <chr> <dbl>
1 1 Port Ellen PA42 7DU NA
2 2 Dufftown AB55 4DH 10
3 3 <NA> <NA> 18
4 4 Aberlour AB38 7RY 21
5 4 Aberlour AB38 7RY 23
6 5 Tobermory PA75 6NR 9
7 5 Tobermory PA75 6NR 11
8 6 <NA> <NA> 11
9 6 <NA> <NA> 12
10 7 <NA> <NA> 3
Filtering Joins
semi join
#they can keep or remove rows from table
semi_data <- semi_join(customers, orders, by = "id")
semi_data# A tibble: 4 × 3
id city postcode
<int> <chr> <chr>
1 2 Dufftown AB55 4DH
2 3 <NA> <NA>
3 4 Aberlour AB38 7RY
4 5 Tobermory PA75 6NR
semi2_data <- semi_join(orders, customers, by = "id")
semi2_data# A tibble: 6 × 2
id items
<dbl> <dbl>
1 2 10
2 3 18
3 4 21
4 4 23
5 5 9
6 5 11
anti join
anti_data <- anti_join(customers, orders, by = "id")
anti_data# A tibble: 1 × 3
id city postcode
<int> <chr> <chr>
1 1 Port Ellen PA42 7DU
anti2_data <- anti_join(orders, customers, by = "id")
anti2_data# A tibble: 3 × 2
id items
<dbl> <dbl>
1 6 11
2 6 12
3 7 3
Mutliple joins
satisfaction <- tibble(
id = 1:5,
satisfaction = c(4, 3, 2, 3, 1)
)
join_1 <- left_join(customers, orders, by = "id")Warning in left_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
join_2 <- left_join(join_1, satisfaction,
by = "id")
pipe_join <- customers %>%
left_join(orders, by = "id") %>%
left_join(satisfaction, by = "id")Warning in left_join(., orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
warning.
Binding joins
new_customers <- tibble(
id = 6:9,
city = c("Falkirk", "Ardbeg", "Doogal", "Kirkwall"),
postcode = c("FK1 4RS", "PA42 7EA", "G81 4SJ", "KW15 1SE")
)
bindr_data <- bind_rows(customers, new_customers)
bindr_data# A tibble: 9 × 3
id city postcode
<int> <chr> <chr>
1 1 Port Ellen PA42 7DU
2 2 Dufftown AB55 4DH
3 3 <NA> <NA>
4 4 Aberlour AB38 7RY
5 5 Tobermory PA75 6NR
6 6 Falkirk FK1 4RS
7 7 Ardbeg PA42 7EA
8 8 Doogal G81 4SJ
9 9 Kirkwall KW15 1SE