# Copyright Materialize, Inc. and contributors. All rights reserved. # # Use of this software is governed by the Business Source License # included in the LICENSE file at the root of this repository. # # As of the Change Date specified in that file, in accordance with # the Business Source License, use of this software will be governed # by the Apache License, Version 2.0. mode cockroach # Run all tests on a multi-process cluster, because there were some serialization issues in the past that happened # only on multi-process clusters. statement ok CREATE CLUSTER multiprocess REPLICAS (a (SIZE '2-2')); statement ok SET cluster = 'multiprocess'; statement ok CREATE TABLE data (input text) statement ok INSERT INTO data VALUES ('asdfjkl'), ('foo'), ('asdf'), (NULL), ('jkl') query TTT SELECT data.*, reg.* FROM data, regexp_extract('(asdf)|(?Pjkl)', data.input) reg ORDER BY data.input ---- asdf asdf NULL asdfjkl asdf NULL jkl NULL jkl # TODO - Test that the columns have the correct nullability, once they actually do (database-issues#612) # Standard regex matching. query TTT SELECT 'foo' ~ 'foo?', 'fo' ~ 'foo?', 'f' ~ 'foo?' ---- true true false # Standard regex not-matching. query TTT SELECT 'foo' !~ 'foo?', 'fo' !~ 'foo?', 'f' !~ 'foo?' ---- false false true # Case-insensitive matching. query TT SELECT 'Foo' ~ 'foo', 'Foo' ~* 'foo' ---- false true # Case-insensitive not-matching. query TT SELECT 'Foo' !~ 'foo', 'Foo' !~* 'foo' ---- true false mode standard # Check that literal regexes are pre-compiled... query T multiline EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT input ~ 'foo?' FROM data ---- Explained Query: Project (#1) // { arity: 1 } Map (is_regexp_match["foo?", case_insensitive=false](#0{input})) // { arity: 2 } ReadStorage materialize.public.data // { arity: 1 } Source materialize.public.data Target cluster: multiprocess EOF # ...but not variable regexes. query T multiline EXPLAIN OPTIMIZED PLAN WITH(humanized expressions, arity, join implementations) AS VERBOSE TEXT FOR SELECT input ~ input FROM data ---- Explained Query: Project (#1) // { arity: 1 } Map ((#0{input} ~ #0{input})) // { arity: 2 } ReadStorage materialize.public.data // { arity: 1 } Source materialize.public.data Target cluster: multiprocess EOF query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_match('ABC', 'a.*'); ---- Explained Query (fast path): Constant - (null) Target cluster: multiprocess EOF query T SELECT regexp_match('ABC', 'a.*'); ---- NULL query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_match('ABC', 'a.*', 'i'); ---- Explained Query (fast path): Constant - ({"ABC"}) Target cluster: multiprocess EOF query T SELECT regexp_match('ABC', 'a.*', 'i'); ---- {ABC} # We have to accept it when both flags are present (taking the last one), because Postgres also does the same. query T SELECT regexp_match('ABC', 'a.*', 'ic'); ---- NULL query T SELECT regexp_match('ABC', 'a.*', 'ci'); ---- {ABC} query error invalid regular expression: regex parse error: SELECT 'abs' ~ '\'; # Case-insensitive vs. case-sensitive regexes when there is no full const folding, but MirScalarExpr::reduce changes # from the binary to unary versions. statement ok CREATE TABLE t(s string, like_pat string, regex_pat string); statement ok INSERT INTO t VALUES ('abc', 'a%', 'a.*'), ('ABC', 'a%', 'a.*'), ('ccc', 'a%', 'a.*'); # In Postgres' regexes, `.` matches newlines by default, so let's test this as well. # See https://www.postgresql.org/docs/current/functions-matching.html#POSIX-MATCHING-RULES # "newline-sensitive matching" statement ok INSERT INTO t VALUES ( E'test line 1\ntest line 2', '%1%2%', '1.*2' ); # Note that sqllogictest can't handle newlines in string output, so replace those when displaying `s` query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', '') FROM t WHERE s~'a.*'; ---- Explained Query: Project (#3) Filter is_regexp_match["a.*", case_insensitive=false](#0{s}) Map (replace(#0{s}, "\n", "")) ReadStorage materialize.public.t Source materialize.public.t filter=(is_regexp_match["a.*", case_insensitive=false](#0{s})) Target cluster: multiprocess EOF query T SELECT replace(s, E'\n', '') FROM t WHERE s~'a.*'; ---- abc query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', '') FROM t WHERE s~*'a.*'; ---- Explained Query: Project (#3) Filter is_regexp_match["a.*", case_insensitive=true](#0{s}) Map (replace(#0{s}, "\n", "")) ReadStorage materialize.public.t Source materialize.public.t filter=(is_regexp_match["a.*", case_insensitive=true](#0{s})) Target cluster: multiprocess EOF query T SELECT replace(s, E'\n', '') FROM t WHERE s~*'a.*'; ---- ABC abc query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*') FROM t; ---- Explained Query: Project (#3, #4) Map (replace(#0{s}, "\n", ""), regexp_match["a.*", case_insensitive=false](#0{s})) ReadStorage materialize.public.t Source materialize.public.t Target cluster: multiprocess EOF query TT valuesort SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*') FROM t; ---- ABC NULL ccc NULL test line 1test line 2 NULL abc {abc} query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*', 'i') FROM t; ---- Explained Query: Project (#3, #4) Map (replace(#0{s}, "\n", ""), regexp_match["a.*", case_insensitive=true](#0{s})) ReadStorage materialize.public.t Source materialize.public.t Target cluster: multiprocess EOF query TT valuesort SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*', 'i') FROM t; ---- ccc NULL test line 1test line 2 NULL ABC {ABC} abc {abc} query TT valuesort SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*', 'ic') FROM t; ---- ABC NULL ccc NULL test line 1test line 2 NULL abc {abc} query TT valuesort SELECT replace(s, E'\n', ''), regexp_match(s, 'a.*', 'ci') FROM t; ---- ccc NULL test line 1test line 2 NULL ABC {ABC} abc {abc} query error db error: ERROR: Evaluation error: invalid regular expression: regex parse error: SELECT replace(s, E'\n', '') FROM t WHERE s ~ '\'; # Dynamic regexes (binary (or variadic) versions) query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', '') FROM t WHERE s ~ regex_pat; ---- Explained Query: Project (#3) Filter (#0{s} ~ #2{regex_pat}) Map (replace(#0{s}, "\n", "")) ReadStorage materialize.public.t Source materialize.public.t filter=((#0{s} ~ #2{regex_pat})) Target cluster: multiprocess EOF query T SELECT replace(s, E'\n', '') FROM t WHERE s ~ regex_pat; ---- abc test line 1test line 2 query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', '') FROM t WHERE s ~* regex_pat; ---- Explained Query: Project (#3) Filter (#0{s} ~* #2{regex_pat}) Map (replace(#0{s}, "\n", "")) ReadStorage materialize.public.t Source materialize.public.t filter=((#0{s} ~* #2{regex_pat})) Target cluster: multiprocess EOF query T valuesort SELECT replace(s, E'\n', '') FROM t WHERE s ~* regex_pat; ---- ABC abc test line 1test line 2 query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', ''), regex_pat, regexp_match(s, regex_pat) FROM t; ---- Explained Query: Project (#3, #2{regex_pat}, #4) Map (replace(#0{s}, "\n", ""), regexp_match(#0{s}, #2{regex_pat})) ReadStorage materialize.public.t Source materialize.public.t Target cluster: multiprocess EOF # We need to also replace newlines in the output here to not confuse sqllogictest. query TTT valuesort SELECT replace(s, E'\n', ''), regex_pat, replace(regexp_match(s, regex_pat)::text, E'\n', '') FROM t; ---- ABC a.* NULL ccc a.* NULL abc a.* {abc} test line 1test line 2 1.*2 {"1test line 2"} query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT replace(s, E'\n', ''), regex_pat, regexp_match(s, regex_pat, 'i') FROM t; ---- Explained Query: Project (#3, #2{regex_pat}, #4) Map (replace(#0{s}, "\n", ""), regexp_match(#0{s}, #2{regex_pat}, "i")) ReadStorage materialize.public.t Source materialize.public.t Target cluster: multiprocess EOF query TTT valuesort SELECT replace(s, E'\n', ''), regex_pat, replace(regexp_match(s, regex_pat, 'i')::text, E'\n', '') FROM t; ---- ccc a.* NULL ABC a.* {ABC} abc a.* {abc} test line 1test line 2 1.*2 {"1test line 2"} query TTT valuesort SELECT replace(s, E'\n', ''), regex_pat, replace(regexp_match(s, regex_pat, 'ic')::text, E'\n', '') FROM t; ---- ABC a.* NULL ccc a.* NULL abc a.* {abc} test line 1test line 2 1.*2 {"1test line 2"} query TTT valuesort SELECT replace(s, E'\n', ''), regex_pat, replace(regexp_match(s, regex_pat, 'ci')::text, E'\n', '') FROM t; ---- ccc a.* NULL ABC a.* {ABC} abc a.* {abc} test line 1test line 2 1.*2 {"1test line 2"} statement ok INSERT INTO T VALUES ('this is gonna be an invalid regex', '', '\'); # Note: The actual error msg shows the regex itself (as it should), but it seems sqllogictest can't handle multiline # error msgs. query error db error: ERROR: Evaluation error: invalid regular expression: regex parse error: SELECT replace(s, E'\n', '') FROM t WHERE s ~* regex_pat; query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT *, s~'\' FROM t; ---- Explained Query: Map (error("invalid regular expression: regex parse error:\n \\n ^\nerror: incomplete escape sequence, reached end of pattern prematurely")) ReadStorage materialize.public.t Source materialize.public.t Target cluster: multiprocess EOF mode cockroach query T SELECT regexp_split_to_array(txt, re) FROM (VALUES ('hello world'), ('12 34'), (' 12 34 '), (''), (' ') ) TXT(txt), (VALUES (''), (' '), ('\s+'), ('\s*'), ('\s') ) RE(re) ORDER BY 1 ---- {""} {""} {""} {""} {""} {"",""} {"",""} {"",""} {"",""} {"",1,2,3,4,""} {"",12,"",34,""} {"",12,"",34,""} {"",12,34,""} {" "} {" ",1,2," "," ",3,4," "} {1,2," "," ",3,4} {1,2,3,4} {12,"",34} {12,"",34} {12,34} {h,e,l,l,o," ",w,o,r,l,d} {h,e,l,l,o,w,o,r,l,d} {hello,world} {hello,world} {hello,world} # TODO: Correct ordering when database-issues#4764 is fixed. query T SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo ORDER BY 1 ---- brown dog fox jumps lazy over quick the the # TODO: Correct ordering when database-issues#4764 is fixed. query T SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo ORDER BY 1 ---- b c e f h i k n o o q r t u w x query T SELECT regexp_split_to_array('no match', 'nope') ---- {"no match"} query T SELECT regexp_split_to_table('no match', 'nope') ---- no match # Test regexp_replace success paths query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace('FOOBARBAZ', 'b..', 'X'); ---- Explained Query (fast path): Constant - ("FOOBARBAZ") Target cluster: multiprocess EOF query T SELECT regexp_replace('FOOBARBAZ', 'b..', 'X'); ---- FOOBARBAZ query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace('FOOBARBAZ', 'b..', 'X', 'i'); ---- Explained Query (fast path): Constant - ("FOOXBAZ") Target cluster: multiprocess EOF query T SELECT regexp_replace('FOOBARBAZ', 'b..', 'X', 'i'); ---- FOOXBAZ query T SELECT regexp_replace('FOOBARBAZ', 'b..', 'X', 'ig'); ---- FOOXX query T SELECT regexp_replace('foobarbaz', 'b(..)', 'X${1}Y', 'g') ---- fooXarYXazY query T SELECT regexp_replace('foobarbaz', 'no match', 'X') ---- foobarbaz statement ok CREATE TABLE r(s string, regex string, replacement string); statement ok INSERT INTO r VALUES ('foobarbaz', 'b..', 'X'), ('foobarbaz', 'b(..)', '>${1}<'), ('FOOBARBAZ', 'b..', 'X'), ('FOOZARZAZ', 'b..', 'X'); query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace(s, 'b..', 'X') FROM r; ---- Explained Query: Project (#3) Map (regexp_replace["b..", case_insensitive=false, limit=1](#0{s}, "X")) ReadStorage materialize.public.r Source materialize.public.r Target cluster: multiprocess EOF query TT rowsort SELECT s, regexp_replace(s, 'b..', 'X') FROM r; ---- foobarbaz fooXbaz foobarbaz fooXbaz FOOBARBAZ FOOBARBAZ FOOZARZAZ FOOZARZAZ query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace(s, 'b..', 'X', 'i') FROM r; ---- Explained Query: Project (#3) Map (regexp_replace["b..", case_insensitive=true, limit=1](#0{s}, "X")) ReadStorage materialize.public.r Source materialize.public.r Target cluster: multiprocess EOF query TT rowsort SELECT s, regexp_replace(s, 'b..', 'X', 'i') FROM r; ---- FOOBARBAZ FOOXBAZ foobarbaz fooXbaz foobarbaz fooXbaz FOOZARZAZ FOOZARZAZ query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace(s, 'b..', 'X', 'ig') FROM r; ---- Explained Query: Project (#3) Map (regexp_replace["b..", case_insensitive=true, limit=0](#0{s}, "X")) ReadStorage materialize.public.r Source materialize.public.r Target cluster: multiprocess EOF query TT rowsort SELECT s, regexp_replace(s, 'b..', 'X', 'ig') FROM r; ---- FOOBARBAZ FOOXX foobarbaz fooXX foobarbaz fooXX FOOZARZAZ FOOZARZAZ query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace(s, regex, replacement, 'ig') FROM r; ---- Explained Query: Project (#3) Map (regexp_replace(#0{s}, #1{regex}, #2{replacement}, "ig")) ReadStorage materialize.public.r Source materialize.public.r Target cluster: multiprocess EOF query TT rowsort SELECT s, regexp_replace(s, regex, replacement, 'ig') FROM r; ---- FOOBARBAZ FOOXX foobarbaz fooXX FOOZARZAZ FOOZARZAZ foobarbaz foo>ar<>az< # Test regexp_replace error paths query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT regexp_replace('foobarbaz', 'b(..', 'X', 'ig'); ---- Explained Query (fast path): Error "invalid regular expression: regex parse error:\n b(..\n ^\nerror: unclosed group" Target cluster: multiprocess EOF query T SELECT regexp_replace(null, 'b(..', 'X', 'ig'); ---- NULL query error invalid regular expression: regex parse error: SELECT regexp_replace('foobarbaz', 'b(..', 'X', 'ig'); query error invalid regular expression flag: x SELECT regexp_replace('foobarbaz', 'b(..', 'X', 'igx'); statement ok CREATE TABLE e(s string, regex string, replacement string); statement ok INSERT INTO e VALUES ('foobarbaz', 'b(..', 'X'); query T multiline EXPLAIN OPTIMIZED PLAN WITH (humanized expressions) AS VERBOSE TEXT FOR SELECT s, regexp_replace(s, 'b(..', replacement, 'ig') FROM e; ---- Explained Query: Project (#0{s}, #3) Map (case when (#0{s}) IS NULL then null else error("invalid regular expression: regex parse error:\n b(..\n ^\nerror: unclosed group") end) ReadStorage materialize.public.e Source materialize.public.e Target cluster: multiprocess EOF query error invalid regular expression: regex parse error: SELECT s, regexp_replace(s, 'b(..', replacement, 'ig') FROM e; query error invalid regular expression: regex parse error: SELECT s, regexp_replace(s, regex, replacement, 'ig') FROM e; query error invalid regular expression flag: x SELECT regexp_replace(s, regex, replacement, 'igx') FROM e; # regexp_matches query T SELECT regexp_matches('foobarbaz', '(b..)') ---- {bar} query T SELECT regexp_matches('foobarbaz', '(b..)', 'g') ---- {bar} {baz} query T SELECT regexp_matches('foobarbequebazilbarfbonkbilbobagginsbarbellbeachball', '(b[^b]+)(b[^b]+)(b[^b]+)'); ---- {bar,beque,bazil} query T rowsort SELECT regexp_matches('foobarbequebazilbarfbonkbilbobagginsbarbellbeachball', '(b[^b]+)(b[^b]+)(b[^b]+)', 'g'); ---- {barf,bonk,bil} {bo,baggins,bar} {bar,beque,bazil} {bell,beach,ball} query error invalid regular expression flag: x SELECT regexp_matches('foobarbaz', '(b..)', 'igx'); query T SELECT regexp_matches('orangeoculus', '(b[^b]+)(b[^b]+)(b[^b]+)', 'g'); ---- query T rowsort SELECT regexp_matches('fooBarBequebazilbarfBonkbilbobagginsbarbellbeachball', '(b[^b]+)(b[^b]+)(b[^b]+)', 'ig'); ---- {barf,Bonk,bil} {bo,baggins,bar} {Bar,Beque,bazil} {bell,beach,ball} statement OK CREATE TABLE foo(bar text, pattern text); statement OK insert into foo values ('fooBarBequebazilbarfBonkbilbobagginsbarbellbeachball', '(b[^b]+)(b[^b]+)(b[^b]+)'), ('bellbeachball', '(b[^b]+)(b[^b]+)(b[^b]+)'); query T rowsort SELECT regexp_matches(bar, pattern, 'ig') FROM foo; ---- {barf,Bonk,bil} {bo,baggins,bar} {Bar,Beque,bazil} {bell,beach,ball} {bell,beach,ball} query T SELECT regexp_matches('abc', '', 'ig'); ---- {} {} {} {} statement ok CREATE TABLE foo2(bar int2); statement ok insert into foo2 values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10); simple SELECT regexp_matches(bar, '(b[^b]+)(b[^b]+)(b[^b]+)', 'ig') FROM foo2; ---- db error: ERROR: function regexp_matches(smallint, unknown, unknown) does not exist HINT: No function matches the given name and argument types. You might need to add explicit type casts. statement ok DROP CLUSTER multiprocess;