sql regex oracle-database regexp-replace

REGEXP_REPLACE result depends on NLS parameters

I wrote an ORACLE PL-SQL function to convert a string from camelCase to snake_case using REGEXP_REPLACE. Below is the function itself:

create function camel_to_snake(str varchar2) return varchar2 as
begin
    return
        lower(
            regexp_replace(
                regexp_replace (
                    str,
                    '([A-Z])([A-Z][a-z])',
                    '\1_\2',
                    1, 0, 'c'
                ),
                '([a-z])([A-Z])',
                '\1_\2',
                1, 0, 'c'
            )
        );
end;

And after some time it turned out that the result of the function depends on the client side parameter NLS_LANG.

For the function to work correctly, the language and territory must be specified - AMERICAN_AMERICA

select
    column_value as camel_case,
    camel_to_snake(column_value) as snake_case
from
    table(t_varchar_table(
        'Foo',
        'FooBar',
        'foobar'
    ));


CAMEL_CASE                               SNAKE_CASE
---------------------------------------- ----------------------------------------
Foo                                      foo
FooBar                                   foo_bar
foobar                                   foobar

Otherwise it returns nonsense (cyrillic languages):

select
    column_value as camel_case,
    camel_to_snake(column_value) as snake_case
from
    table(t_varchar_table(
        'Foo',
        'FooBar',
        'foobar'
    ));


CAMEL_CASE                               SNAKE_CASE
---------------------------------------- ----------------------------------------
Foo                                      f_o_o
FooBar                                   f_o_ob_a_r
foobar                                   f_o_ob_a_r

How to fix it so that the result of the function does not depend on the client parameters?

Solution

Character ranges like [A-Z] are language-dependent. You can use character classes like [[:upper:]] instead:

create function camel_to_snake(str varchar2) return varchar2 as
begin
    return
        lower(
            regexp_replace(
                regexp_replace (
                    str,
                    '([[:upper:]])([[:upper:]][[:lower:]])',
                    '\1_\2',
                    1, 0, 'c'
                ),
                '([[:lower:]])([[:upper:]])',
                '\1_\2',
                1, 0, 'c'
            )
        );
end;
/

alter session set nls_language = 'Russian'

select
    column_value as camel_case,
    camel_to_snake(column_value) as snake_case
from
    table(t_varchar_table(
        'Foo',
        'FooBar',
        'foobar'
    ));

CAMEL_CASE	SNAKE_CASE
Foo	foo
FooBar	foo_bar
foobar	foobar

fiddle