Skip to content

Possible inconsistency in translation of stringr::str_like() with respect to ignore_case argument #1488

@edward-burn

Description

@edward-burn

Hi @hadley @mgirlich, I might be misunderstanding but I think there might be some inconsistency in the way that stringr::str_like() is being translated with respect to the argument ignore_case

In the documentation it says
ignore_case: Ignore case of matches? Defaults to TRUE to match the SQL LIKE operator.

In postgres and redshift I see what I would expect - that ILIKE is used when ignore_case is true, while LIKE is used when ignore_case is false. But for the backends without ILIKE we have LIKE being used when ignore_case is true and error thrown when false. But for these latter backends, isn't this the wrong way around (as in I would have expected an error if ignore_case was true [or maybe cast both to lower and use LIKE] and use LIKE if ignore_case is false)

library(dbplyr)

lf_postgres <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz", 
                 con = simulate_postgres())
lf_redshift <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz", 
                          con = simulate_redshift())
lf_mssql <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz", 
                 con = simulate_mssql()) 
lf_snowflake <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz", 
                       con = simulate_snowflake()) 
lf_spark <- lazy_frame(a = TRUE, b = 1, c = 2, d = "zzzyzzz", 
                           con = simulate_spark_sql()) 

# ilike if we ignore case - should we not get an error here for backends without ilike?
lf_postgres |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` ILIKE 'Y')
lf_redshift |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` ILIKE 'Y')
lf_mssql |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_snowflake |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_spark |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = TRUE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')

# like if we do not ignore case - should we not get like here for all?
lf_postgres |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_redshift |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> <SQL>
#> SELECT `df`.*
#> FROM `df`
#> WHERE (`d` LIKE 'Y')
lf_mssql |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
lf_snowflake |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.
lf_spark |> 
  dplyr::filter(stringr::str_like(d, "Y", ignore_case = FALSE))
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.

Created on 2024-04-04 with reprex v2.0.2

Here is an additional reprex with a live snowflake database

library(dplyr)
#> Warning: package 'dplyr' was built under R version 4.2.3
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(dbplyr)
#> 
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#> 
#>     ident, sql

con <- DBI::dbConnect(odbc::odbc(),
                      "ohdsi_omop_snowflake",
                      PWD = Sys.getenv("SNOWFLAKE_PASSWORD"))

concept_db <- tbl(con, in_catalog("OMOP_SYNTHETIC_DATASET", "CDM53", "CONCEPT"))
concept_db |> 
  head(5)
#> # Source:   SQL [5 x 10]
#> # Database: Snowflake 8.13.1[ohdsi@Snowflake/OMOP_SYNTHETIC_DATASET]
#>   CONCEPT_ID CONCEPT_NAME        DOMAIN_ID VOCABULARY_ID CONCEPT_CLASS_ID
#>        <dbl> <chr>               <chr>     <chr>         <chr>           
#> 1          0 No matching concept Metadata  None          Undefined       
#> 2          1 Domain              Metadata  Domain        Domain          
#> 3          2 Gender              Metadata  Domain        Domain          
#> 4          3 Race                Metadata  Domain        Domain          
#> 5          4 Ethnicity           Metadata  Domain        Domain          
#> # ℹ 5 more variables: STANDARD_CONCEPT <chr>, CONCEPT_CODE <chr>,
#> #   VALID_START_DATE <date>, VALID_END_DATE <date>, INVALID_REASON <chr>

# get record back if we bring locally, but not in db
concept_db |> 
  head(5) |> 
  collect() |> 
  dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = TRUE))
#> # A tibble: 1 × 10
#>   CONCEPT_ID CONCEPT_NAME DOMAIN_ID VOCABULARY_ID CONCEPT_CLASS_ID
#>        <dbl> <chr>        <chr>     <chr>         <chr>           
#> 1          4 Ethnicity    Metadata  Domain        Domain          
#> # ℹ 5 more variables: STANDARD_CONCEPT <chr>, CONCEPT_CODE <chr>,
#> #   VALID_START_DATE <date>, VALID_END_DATE <date>, INVALID_REASON <chr>
concept_db |>  
  dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = TRUE)) 
#> # Source:   SQL [0 x 10]
#> # Database: Snowflake 8.13.1[ohdsi@Snowflake/OMOP_SYNTHETIC_DATASET]
#> # ℹ 10 variables: CONCEPT_ID <dbl>, CONCEPT_NAME <chr>, DOMAIN_ID <chr>,
#> #   VOCABULARY_ID <chr>, CONCEPT_CLASS_ID <chr>, STANDARD_CONCEPT <chr>,
#> #   CONCEPT_CODE <chr>, VALID_START_DATE <date>, VALID_END_DATE <date>,
#> #   INVALID_REASON <chr>

# get no record back if we bring locally, not supported in db
concept_db |> 
  head(5) |> 
  collect() |> 
  dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = FALSE))
#> # A tibble: 0 × 10
#> # ℹ 10 variables: CONCEPT_ID <dbl>, CONCEPT_NAME <chr>, DOMAIN_ID <chr>,
#> #   VOCABULARY_ID <chr>, CONCEPT_CLASS_ID <chr>, STANDARD_CONCEPT <chr>,
#> #   CONCEPT_CODE <chr>, VALID_START_DATE <date>, VALID_END_DATE <date>,
#> #   INVALID_REASON <chr>
# db use of stringr
concept_db |>  
  dplyr::filter(stringr::str_like(CONCEPT_NAME, "ETHNICITY", ignore_case = FALSE)) 
#> Error in `stringr::str_like()`:
#> ! Backend only supports case insensitve `str_like()`.

Created on 2024-04-04 with reprex v2.0.2

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions