-
Notifications
You must be signed in to change notification settings - Fork 182
Description
Hi @hadley @mgirlich, I might be misunderstanding but I think there might be some inconsistency in the way that stringr::str_like() is being translated with respect to the argument ignore_case
In the documentation it says
ignore_case: Ignore case of matches? Defaults to TRUE to match the SQL LIKE operator.
In postgres and redshift I see what I would expect - that ILIKE is used when ignore_case is true, while LIKE is used when ignore_case is false. But for the backends without ILIKE we have LIKE being used when ignore_case is true and error thrown when false. But for these latter backends, isn't this the wrong way around (as in I would have expected an error if ignore_case was true [or maybe cast both to lower and use LIKE] and use LIKE if ignore_case is false)
library(dbplyr)
lf_postgres <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz",
con = simulate_postgres())
lf_redshift <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz",
con = simulate_redshift())
lf_mssql <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz",
con = simulate_mssql())
lf_snowflake <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz",
con = simulate_snowflake())
lf_spark <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz",
con = simulate_spark_sql())
# ilike if we ignore case - should we not get an error here for backends without ilike?
lf_postgres |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` ILIKE 'Y')
lf_redshift |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` ILIKE 'Y')
lf_mssql |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_snowflake |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_spark |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
# like if we do not ignore case - should we not get like here for all?
lf_postgres |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_redshift |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_mssql |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
lf_snowflake |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
lf_spark |>
dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
Created on 2024-04-04 with reprex v2.0.2
Here is an additional reprex with a live snowflake database
library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
con <- DBI::dbConnect(odbc::odbc(),
"ohdsi_omop_snowflake",
PWD = Sys.getenv("SNOWFLAKE_PASSWORD"))
concept_db <- tbl(con, in_catalog("OMOP_SYNTHETIC_DATASET", "CDM53", "CONCEPT"))
concept_db |>
head(5)
#> # Source: SQL [5 x 10]
#> # Database: Snowflake 8.13.1[ohdsi@Snowflake/OMOP_SYNTHETIC_DATASET]
#> CONCEPT_ID CONCEPT_NAME DOMAIN_ID VOCABULARY_ID CONCEPT_CLASS_ID
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 0 No matching concept Metadata None Undefined
#> 2 1 Domain Metadata Domain Domain
#> 3 2 Gender Metadata Domain Domain
#> 4 3 Race Metadata Domain Domain
#> 5 4 Ethnicity Metadata Domain Domain
#> # ℹ 5 more variables: STANDARD_CONCEPT <chr>, CONCEPT_CODE <chr>,
#> # VALID_START_DATE <date>, VALID_END_DATE <date>, INVALID_REASON <chr>
# get record back if we bring locally, but not in db
concept_db |>
head(5) |>
collect() |>
dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = TRUE))
#> # A tibble: 1 × 10
#> CONCEPT_ID CONCEPT_NAME DOMAIN_ID VOCABULARY_ID CONCEPT_CLASS_ID
#> <dbl> <chr> <chr> <chr> <chr>
#> 1 4 Ethnicity Metadata Domain Domain
#> # ℹ 5 more variables: STANDARD_CONCEPT <chr>, CONCEPT_CODE <chr>,
#> # VALID_START_DATE <date>, VALID_END_DATE <date>, INVALID_REASON <chr>
concept_db |>
dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = TRUE))
#> # Source: SQL [0 x 10]
#> # Database: Snowflake 8.13.1[ohdsi@Snowflake/OMOP_SYNTHETIC_DATASET]
#> # ℹ 10 variables: CONCEPT_ID <dbl>, CONCEPT_NAME <chr>, DOMAIN_ID <chr>,
#> # VOCABULARY_ID <chr>, CONCEPT_CLASS_ID <chr>, STANDARD_CONCEPT <chr>,
#> # CONCEPT_CODE <chr>, VALID_START_DATE <date>, VALID_END_DATE <date>,
#> # INVALID_REASON <chr>
# get no record back if we bring locally, not supported in db
concept_db |>
head(5) |>
collect() |>
dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = FALSE))
#> # A tibble: 0 × 10
#> # ℹ 10 variables: CONCEPT_ID <dbl>, CONCEPT_NAME <chr>, DOMAIN_ID <chr>,
#> # VOCABULARY_ID <chr>, CONCEPT_CLASS_ID <chr>, STANDARD_CONCEPT <chr>,
#> # CONCEPT_CODE <chr>, VALID_START_DATE <date>, VALID_END_DATE <date>,
#> # INVALID_REASON <chr>
# db use of stringr
concept_db |>
dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
Created on 2024-04-04 with reprex v2.0.2