copy-to-s3-minio.td 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. # Tests for COPY TO expr.
  10. # COPY TO expressions should immediately succeed or fail on their first runs
  11. $ set-max-tries max-tries=1
  12. $ postgres-execute connection=postgres://mz_system:materialize@${testdrive.materialize-internal-sql-addr}
  13. ALTER SYSTEM SET enable_copy_to_expr = true;
  14. # Prepare table data
  15. > CREATE TABLE t (a int);
  16. > INSERT INTO t VALUES (1);
  17. > INSERT INTO t VALUES (2);
  18. > CREATE SECRET aws_secret AS '${arg.aws-secret-access-key}'
  19. > CREATE CONNECTION aws_conn
  20. TO AWS (
  21. ACCESS KEY ID = '${arg.aws-access-key-id}',
  22. SECRET ACCESS KEY = SECRET aws_secret,
  23. ENDPOINT = '${arg.aws-endpoint}',
  24. REGION = 'us-east-1'
  25. );
  26. ! COPY t TO 's3://path/to/dir' WITH (FORMAT = 'csv');
  27. contains:AWS CONNECTION is required for COPY ... TO <expr>
  28. ! COPY t TO 's3://path/to/dir'
  29. WITH (
  30. AWS CONNECTION = aws_conn,
  31. FORMAT = 'text'
  32. );
  33. contains:FORMAT TEXT not yet supported
  34. ! COPY t TO 's3://path/to/dir'
  35. WITH (
  36. AWS CONNECTION = aws_conn,
  37. FORMAT = 'binary'
  38. );
  39. contains:FORMAT BINARY not yet supported
  40. ! COPY t TO 's3://path/to/dir'
  41. WITH (
  42. AWS CONNECTION = aws_conn,
  43. MAX FILE SIZE = '20MB'
  44. );
  45. contains:COPY TO <expr> requires a FORMAT option
  46. ! COPY t TO '/path/'
  47. WITH (
  48. AWS CONNECTION = aws_conn,
  49. FORMAT = 'csv'
  50. );
  51. contains:only 's3://...' urls are supported as COPY TO target
  52. ! COPY t TO NULL
  53. WITH (
  54. AWS CONNECTION = aws_conn,
  55. FORMAT = 'csv'
  56. );
  57. contains:COPY TO target value can not be null
  58. ! COPY t TO 1234
  59. WITH (
  60. AWS CONNECTION = aws_conn,
  61. FORMAT = 'csv'
  62. );
  63. contains:COPY TO target must have type text, not type integer
  64. ! COPY (SELECT * FROM t ORDER BY 1) TO NULL
  65. WITH (
  66. AWS CONNECTION = aws_conn,
  67. FORMAT = 'csv'
  68. );
  69. contains:ORDER BY is not supported in SELECT query for COPY statements
  70. ! COPY t TO 's3://path/to/dir'
  71. WITH (
  72. AWS CONNECTION = aws_conn,
  73. FORMAT = 'csv',
  74. MAX FILE SIZE = '1kB'
  75. );
  76. contains:MAX FILE SIZE cannot be less than 16MB
  77. # Creating cluster with multiple replicas, each with multiple workers
  78. > CREATE CLUSTER c1 REPLICAS (r1 (size '2'), r2 (size '2'));
  79. > SET cluster = c1;
  80. $ set-from-sql var=key-1
  81. SELECT TO_CHAR(now(), 'YYYY-MM-DD')
  82. # functions like now() should work in the s3 path
  83. > COPY t TO 's3://copytos3/test/1/' || TO_CHAR(now(), 'YYYY-MM-DD')
  84. WITH (
  85. AWS CONNECTION = aws_conn,
  86. MAX FILE SIZE = "100MB",
  87. FORMAT = 'csv'
  88. );
  89. $ set-from-sql var=key-2
  90. SELECT TO_CHAR(now(), 'YYYY-MM-DD')
  91. # The test depends on the day not changing at this specific time
  92. > SELECT '${key-1}' = '${key-2}'
  93. true
  94. > SELECT a FROM t
  95. 1
  96. 2
  97. > COPY (SELECT a FROM t) TO 's3://copytos3/test/2'
  98. WITH (
  99. AWS CONNECTION = aws_conn,
  100. MAX FILE SIZE = "100MB",
  101. FORMAT = 'csv'
  102. );
  103. # copy should work on non-table objects (views, etc)
  104. > CREATE VIEW my_view AS SELECT a FROM t WHERE a < 2;
  105. > COPY my_view TO 's3://copytos3/test/2_5'
  106. WITH (
  107. AWS CONNECTION = aws_conn,
  108. MAX FILE SIZE = "100MB",
  109. FORMAT = 'csv',
  110. HEADER = true
  111. );
  112. > COPY (SELECT array[1,2]::int[], false::bool, 'Inf'::double, '{"s": "abc"}'::jsonb, 1::mz_timestamp, 32767::smallint, 2147483647::integer, 9223372036854775807::bigint, 12345678901234567890123.4567890123456789::numeric(39,16), '2010-10-10'::date, '10:10:10'::time, '2010-10-10 10:10:10+00'::timestamp, '2010-10-10 10:10:10+02'::timestamptz, '0 day'::interval, 'aaaa'::text, '\\xAAAA'::bytea, 'това е'::text, 'текст'::bytea) TO 's3://copytos3/test/3'
  113. WITH (
  114. AWS CONNECTION = aws_conn,
  115. MAX FILE SIZE = "100MB",
  116. FORMAT = 'csv'
  117. );
  118. ! COPY (SELECT a FROM t) TO 's3://copytos3'
  119. WITH (
  120. AWS CONNECTION = aws_conn,
  121. MAX FILE SIZE = "100MB",
  122. FORMAT = 'csv'
  123. );
  124. contains:S3 bucket path is not empty
  125. > COPY (SELECT * FROM generate_series(1, 1000000)) TO 's3://copytos3/test/4'
  126. WITH (
  127. AWS CONNECTION = aws_conn,
  128. MAX FILE SIZE = "100MB",
  129. FORMAT = 'csv'
  130. );
  131. # test CSV format options
  132. > COPY (SELECT array[1,2]::int[], 83647::integer, '{"s": "ab`c"}'::jsonb, '2010-10-10 10:10:10+00'::timestamp) TO 's3://copytos3/test/4_5'
  133. WITH (
  134. AWS CONNECTION = aws_conn,
  135. FORMAT = 'csv',
  136. DELIMITER = ';',
  137. QUOTE = '`',
  138. HEADER = true
  139. )
  140. > COPY (SELECT 1 WHERE FALSE) TO 's3://copytos3/test/5'
  141. WITH (
  142. AWS CONNECTION = aws_conn,
  143. MAX FILE SIZE = "100MB",
  144. FORMAT = 'csv'
  145. );
  146. $ s3-verify-data bucket=copytos3 key=test/1/${key-1} sort-rows=true
  147. 1
  148. 2
  149. $ s3-verify-data bucket=copytos3 key=test/2 sort-rows=true
  150. 1
  151. 2
  152. # The double `a` here is a result of the header being written once per file.
  153. $ s3-verify-data bucket=copytos3 key=test/2_5 sort-rows=true
  154. a
  155. a
  156. 1
  157. $ s3-verify-data bucket=copytos3 key=test/3 sort-rows=true
  158. "{1,2}",f,Infinity,"{""s"":""abc""}",1,32767,2147483647,9223372036854775807,12345678901234567890123.4567890123456789,2010-10-10,10:10:10,2010-10-10 10:10:10,2010-10-10 08:10:10+00,00:00:00,aaaa,\x5c7841414141,това е,\xd182d0b5d0bad181d182
  159. $ s3-verify-data bucket=copytos3 key=test/4_5 sort-rows=true
  160. array;int4;jsonb;timestamp
  161. {1,2};83647;`{"s":"ab``c"}`;2010-10-10 10:10:10
  162. # Ensure that at least one file is written even when the input is empty.
  163. $ s3-verify-keys bucket=copytos3 prefix-path=test/5 key-pattern=^test/5/mz.*\.csv$
  164. # Copy a large amount of data in the background and check to see that the INCOMPLETE
  165. # sentinel object is written during the copy
  166. # TODO(database-issues#7984): Enable this test once it is more reliable
  167. # $ postgres-execute background=true connection=postgres://materialize:materialize@${testdrive.materialize-sql-addr}
  168. # COPY (SELECT * FROM generate_series(1, 50000000)) TO 's3://copytos3/test/5' WITH (AWS CONNECTION = aws_conn, MAX FILE SIZE = "100MB", FORMAT = 'csv');
  169. # $ s3-verify-keys bucket=copytos3 prefix-path=test/5 key-pattern=INCOMPLETE
  170. # Test with parquet formatting
  171. $ set-from-sql var=key-1
  172. SELECT TO_CHAR(now(), 'YYYY-MM-DD')
  173. > COPY t TO 's3://copytos3/parquet_test/1/' || TO_CHAR(now(), 'YYYY-MM-DD')
  174. WITH (
  175. AWS CONNECTION = aws_conn,
  176. MAX FILE SIZE = "100MB",
  177. FORMAT = 'parquet'
  178. );
  179. $ set-from-sql var=key-2
  180. SELECT TO_CHAR(now(), 'YYYY-MM-DD')
  181. # The test depends on the day not changing at this specific time
  182. > SELECT '${key-1}' = '${key-2}'
  183. true
  184. > COPY (SELECT a FROM t) TO 's3://copytos3/parquet_test/2'
  185. WITH (
  186. AWS CONNECTION = aws_conn,
  187. MAX FILE SIZE = "100MB",
  188. FORMAT = 'parquet'
  189. );
  190. > COPY (SELECT array[1,2]::int[], array[array[1, 2], array[NULL, 4]], false::bool, 'Inf'::double, '{"s": "abc"}'::jsonb, '85907cb9-ac9b-4e35-84b8-60dc69368aca'::uuid, 1::mz_timestamp, 32767::smallint, 2147483647::integer, 9223372036854775807::bigint, 1234567890123456789012.4567890123456789::numeric(38,16), '2010-10-10'::date, '10:10:10'::time, '2010-10-10 10:10:10+00'::timestamp, '2010-10-10 10:10:10+02'::timestamptz, 'aaaa'::text, '\\xAAAA'::bytea, 'това е'::text, 'текст'::bytea) TO 's3://copytos3/parquet_test/3'
  191. WITH (
  192. AWS CONNECTION = aws_conn,
  193. MAX FILE SIZE = "100MB",
  194. FORMAT = 'parquet'
  195. );
  196. > COPY (SELECT 1 WHERE FALSE) TO 's3://copytos3/parquet_test/4'
  197. WITH (
  198. AWS CONNECTION = aws_conn,
  199. MAX FILE SIZE = "100MB",
  200. FORMAT = 'parquet'
  201. );
  202. $ s3-verify-data bucket=copytos3 key=parquet_test/1/${key-1} sort-rows=true
  203. 1
  204. 2
  205. $ s3-verify-data bucket=copytos3 key=parquet_test/2 sort-rows=true
  206. 1
  207. 2
  208. $ s3-verify-data bucket=copytos3 key=parquet_test/3 sort-rows=true
  209. {items: [1, 2], dimensions: 1} {items: [1, 2, , 4], dimensions: 2} false inf {"s":"abc"} 85907cb9ac9b4e3584b860dc69368aca 1 32767 2147483647 9223372036854775807 1234567890123456789012.4567890123456789 2010-10-10 10:10:10 2010-10-10T10:10:10 2010-10-10T08:10:10Z aaaa 5c7841414141 това е d182d0b5d0bad181d182
  210. # Ensure that at least one file is written even when the input is empty.
  211. $ s3-verify-keys bucket=copytos3 prefix-path=parquet_test/4 key-pattern=^parquet_test/4/mz.*\.parquet$
  212. # Confirm that unimplemented types will early exit before writing to s3
  213. ! COPY (SELECT '0 day'::interval) TO 's3://copytos3/parquet_test/5'
  214. WITH (
  215. AWS CONNECTION = aws_conn,
  216. MAX FILE SIZE = "100MB",
  217. FORMAT = 'parquet'
  218. );
  219. contains:Cannot encode the following columns/types: ["interval: Interval"]
  220. # now should succeed since incomplete sentinel was never written
  221. > COPY (SELECT 1::int) TO 's3://copytos3/parquet_test/5'
  222. WITH (
  223. AWS CONNECTION = aws_conn,
  224. MAX FILE SIZE = "100MB",
  225. FORMAT = 'parquet'
  226. );
  227. $ s3-verify-data bucket=copytos3 key=parquet_test/5 sort-rows=true
  228. 1
  229. # Tests for decimal / numeric type
  230. ! COPY (SELECT .123456789012345678901234567890123456789::numeric(39,39)) TO 's3://copytos3/parquet_test/6'
  231. WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  232. contains:Cannot encode the following columns/types: ["numeric: Numeric { max_scale: Some(NumericMaxScale(39)) }"]
  233. ! COPY (SELECT 12345678901234567890123.4567890123456789::numeric(38,16)) TO 's3://copytos3/parquet_test/6'
  234. WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  235. contains:out of range for column with precision
  236. ! COPY (SELECT 'NaN'::numeric(10,5)) TO 's3://copytos3/parquet_test/7'
  237. WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  238. contains:Cannot represent special numeric value
  239. # the default column scale will be set to 10 so if we see a value with more than 10 digits after the
  240. # decimal it cannot be represented in this column
  241. ! COPY (SELECT 5.4567890123456789::numeric) TO 's3://copytos3/parquet_test/8'
  242. WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  243. contains:cannot represent decimal value
  244. > CREATE TABLE t2 (a NUMERIC, b NUMERIC(38, 15), c NUMERIC(38, 0), d NUMERIC(38, 5));
  245. > INSERT INTO t2 VALUES (1234, -1234, 1234, 1234);
  246. > INSERT INTO t2 VALUES (123456789.123456789, -123456789.12345678912345, 1234567890000, 123456789.12345);
  247. > INSERT INTO t2 VALUES (10000000000000000001, 100000000000000.0000001, -100000000, 100000000000000000000000000000000.00001)
  248. > SELECT a, b, c, d FROM t2
  249. 1234 -1234 1234 1234
  250. 123456789.123456789 -123456789.12345678912345 1234567890000 123456789.12345
  251. 10000000000000000001 100000000000000.0000001 -100000000 100000000000000000000000000000000.00001
  252. > COPY t2 TO 's3://copytos3/parquet_test/9'
  253. WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  254. # each column will use scale 10 or the defined max-scale of the input column, so many values will
  255. # be padded with 0s after the decimal point to fit the column scale but are still the same number
  256. $ s3-verify-data bucket=copytos3 key=parquet_test/9 sort-rows=true
  257. 1234.0000000000 -1234.000000000000000 1234 1234.00000
  258. 123456789.1234567890 -123456789.123456789123450 1234567890000 123456789.12345
  259. 10000000000000000001.0000000000 100000000000000.000000100000000 -100000000 100000000000000000000000000000000.00001
  260. # Tests for nested types
  261. > CREATE TABLE t3 (c1 real list list, c2 varchar list, c3 MAP[text=>MAP[text=>double]], c4 int[][]);
  262. > INSERT INTO t3 VALUES (LIST[[1.25, 2.5],[223.3333]], LIST['a', 'b'], '{a=>{b=>2.5}}', ARRAY[[1, 2],[3, 5]]);
  263. > INSERT INTO t3 VALUES (LIST[[0.0], NULL], NULL, NULL, NULL);
  264. > INSERT INTO t3 VALUES (NULL, LIST[NULL], '{a=>NULL}', ARRAY[[1, 2], [NULL, NULL]]::int[][]);
  265. > COPY t3 TO 's3://copytos3/parquet_test/10' WITH (AWS CONNECTION = aws_conn, FORMAT = 'parquet');
  266. $ s3-verify-data bucket=copytos3 key=parquet_test/10 sort-rows=true
  267. [[0.0], ] // allow-trailing-whitespace
  268. [] {a: } {items: [1, 2, , ], dimensions: 2}
  269. [[1.25, 2.5], [223.3333]] [a, b] {a: {b: 2.5}} {items: [1, 2, 3, 5], dimensions: 2}