join.tbl_sql {dbplyr} | R Documentation |
See join for a description of the general purpose of the functions.
## S3 method for class 'tbl_lazy' inner_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ..., sql_on = NULL) ## S3 method for class 'tbl_lazy' left_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ..., sql_on = NULL) ## S3 method for class 'tbl_lazy' right_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ..., sql_on = NULL) ## S3 method for class 'tbl_lazy' full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ..., sql_on = NULL) ## S3 method for class 'tbl_lazy' semi_join(x, y, by = NULL, copy = FALSE, auto_index = FALSE, ..., sql_on = NULL) ## S3 method for class 'tbl_lazy' anti_join(x, y, by = NULL, copy = FALSE, auto_index = FALSE, ..., sql_on = NULL)
x |
tbls to join |
y |
tbls to join |
by |
a character vector of variables to join by. If To join by different variables on x and y use a named vector.
For example, |
copy |
If This allows you to join tables across srcs, but it's potentially expensive operation so you must opt into it. |
suffix |
If there are non-joined duplicate variables in |
auto_index |
if |
... |
other parameters passed onto methods, for instance, |
sql_on |
A custom join predicate as an SQL expression. The SQL
can refer to the |
Semi-joins are implemented using WHERE EXISTS
, and anti-joins with
WHERE NOT EXISTS
.
All joins use column equality by default.
An arbitrary join predicate can be specified by passing
an SQL expression to the sql_on
argument.
Use LHS
and RHS
to refer to the left-hand side or
right-hand side table, respectively.
## Not run: library(dplyr) if (has_lahman("sqlite")) { # Left joins ---------------------------------------------------------------- lahman_s <- lahman_sqlite() batting <- tbl(lahman_s, "Batting") team_info <- select(tbl(lahman_s, "Teams"), yearID, lgID, teamID, G, R:H) # Combine player and whole team statistics first_stint <- select(filter(batting, stint == 1), playerID:H) both <- left_join(first_stint, team_info, type = "inner", by = c("yearID", "teamID", "lgID")) head(both) explain(both) # Join with a local data frame grid <- expand.grid( teamID = c("WAS", "ATL", "PHI", "NYA"), yearID = 2010:2012) top4a <- left_join(batting, grid, copy = TRUE) explain(top4a) # Indices don't really help here because there's no matching index on # batting top4b <- left_join(batting, grid, copy = TRUE, auto_index = TRUE) explain(top4b) # Semi-joins ---------------------------------------------------------------- people <- tbl(lahman_s, "Master") # All people in half of fame hof <- tbl(lahman_s, "HallOfFame") semi_join(people, hof) # All people not in the hall of fame anti_join(people, hof) # Find all managers manager <- tbl(lahman_s, "Managers") semi_join(people, manager) # Find all managers in hall of fame famous_manager <- semi_join(semi_join(people, manager), hof) famous_manager explain(famous_manager) # Anti-joins ---------------------------------------------------------------- # batters without person covariates anti_join(batting, people) # Arbitrary predicates ------------------------------------------------------ # Find all pairs of awards given to the same player # with at least 18 years between the awards: awards_players <- tbl(lahman_s, "AwardsPlayers") inner_join( awards_players, awards_players, sql_on = paste0( "(LHS.playerID = RHS.playerID) AND ", "(LHS.yearID < RHS.yearID - 18)" ) ) } ## End(Not run)