In pure SQL I could do something like this:
SELECT
A.id,
CASE WHEN B.id IS NOT NULL THEN NULL ELSE A.load END AS load
FROM
A
LEFT JOIN
B ON A.id = B.id;
How would I translate that to the tidyverse syntax?
library(dplyr)
library(dbplyr)
con <- simulate_mssql()
A <- tbl_lazy(data.frame(id = 1:10, load = "x"), con, name = "A")
B <- tbl_lazy(data.frame(id = 1:5), con, name = "B")
left_join(A, B, "id") ## shows only A.id
# SELECT `A`.*
# FROM `A`
# LEFT JOIN `B`
# ON (`A`.`id` = `B`.`id`)
Is the only way to add a helper column to B
like this:
left_join(A, B %>% mutate(flag = "x"), "id") %>%
mutate(load = if_else(flag == "x", NA_character_ load))
# <SQL>
# SELECT `id`, IIF(`flag` = 'x', NULL, `load`) AS `load`, `flag`
# FROM (
# SELECT `A`.*, `flag`
# FROM `A`
# LEFT JOIN (
# SELECT `B`.*, 'x' AS `flag`
# FROM `B`
# ) AS `RHS`
# ON (`A`.`id` = `RHS`.`id`)
# ) AS `q01`
You can keep your B.id using keep = TRUE
in the join, i.e.
library(dplyr)
A2 <- data.frame(id = 1:10, load = "x")
B2 <- data.frame(id = 1:5)
left_join(A2, B2, "id", keep = TRUE) %>%
mutate(load = if_else(!is.na(id.y), NA_character_, load))
#> id.x load id.y
#> 1 1 <NA> 1
#> 2 2 <NA> 2
#> 3 3 <NA> 3
#> 4 4 <NA> 4
#> 5 5 <NA> 5
#> 6 6 x NA
#> 7 7 x NA
#> 8 8 x NA
#> 9 9 x NA
#> 10 10 x NA
library(dbplyr)
con <- simulate_mssql()
A <- tbl_lazy(data.frame(id = 1:10, load = "x"), con, name = "A")
B <- tbl_lazy(data.frame(id = 1:5), con, name = "B")
left_join(A, B, "id", keep = TRUE) %>%
mutate(load = if_else(!is.na(id.y), NA_character_, load))
#> <SQL>
#> SELECT `id.x`, IIF(NOT((`id.y` IS NULL)), NULL, `load`) AS `load`, `id.y`
#> FROM (
#> SELECT `A`.`id` AS `id.x`, `load`, `B`.`id` AS `id.y`
#> FROM `A`
#> LEFT JOIN `B`
#> ON (`A`.`id` = `B`.`id`)
#> ) AS `q01`
Created on 2024-02-22 with reprex v2.1.0
Not sure if that helps though...