03-after-rehydration.td 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. # Copyright Materialize, Inc. and contributors. All rights reserved.
  2. #
  3. # Use of this software is governed by the Business Source License
  4. # included in the LICENSE file at the root of this repository.
  5. #
  6. # As of the Change Date specified in that file, in accordance with
  7. # the Business Source License, use of this software will be governed
  8. # by the Apache License, Version 2.0.
  9. $ set keyschema={
  10. "type": "record",
  11. "name": "Key",
  12. "fields": [
  13. {"name": "key", "type": "string"}
  14. ]
  15. }
  16. $ set schema={
  17. "type" : "record",
  18. "name" : "test",
  19. "fields" : [
  20. {"name":"f1", "type":"string"},
  21. {"name":"f2", "type":"long"}
  22. ]
  23. }
  24. # Ensure we rehydrate properly
  25. > SELECT * from upsert_tbl
  26. key f1 f2
  27. ---------------------------
  28. fish fish 1001
  29. birdmore geese 56
  30. mammalmore moose 2
  31. # Ensure that statistics are correctly updated. Note that the
  32. # byte count could be lower or higher than before restarting,
  33. # as rehydration has to store values differently.
  34. > SELECT
  35. SUM(u.bytes_indexed) > 0,
  36. SUM(u.records_indexed),
  37. bool_and(u.rehydration_latency IS NOT NULL)
  38. FROM mz_tables t
  39. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  40. WHERE t.name IN ('upsert_tbl')
  41. GROUP BY t.name
  42. ORDER BY t.name
  43. true 3 true
  44. # Save the size of the rehydrated state.
  45. $ set-from-sql var=rehydrated-state-bytes
  46. SELECT
  47. (SUM(u.bytes_indexed))::text
  48. FROM mz_tables t
  49. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  50. WHERE t.name IN ('upsert_tbl')
  51. # Ensure we process updates correctly.
  52. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  53. {"key": "fish"} {"f1": "muchlongerfish", "f2": 9000}
  54. > SELECT * from upsert_tbl
  55. key f1 f2
  56. --------------------------------------
  57. fish muchlongerfish 9000
  58. birdmore geese 56
  59. mammalmore moose 2
  60. # Wait for the value's new stats to propagate. We can't
  61. # just check that the `muchlongerfish` value is larger here,
  62. # because the rehydrated value may be more costly. This
  63. # means we have to do this in 2 steps, like this.
  64. #
  65. # This is also != because different implementations use
  66. # space differently during rehydration and normal operation.
  67. > SELECT
  68. SUM(u.bytes_indexed) != ${rehydrated-state-bytes},
  69. SUM(u.records_indexed)
  70. FROM mz_tables t
  71. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  72. WHERE t.name IN ('upsert_tbl')
  73. GROUP BY t.name
  74. ORDER BY t.name
  75. true 3
  76. $ set-from-sql var=state-bytes
  77. SELECT
  78. (SUM(u.bytes_indexed))::text
  79. FROM mz_tables t
  80. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  81. WHERE t.name IN ('upsert_tbl')
  82. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  83. {"key": "fish"} {"f1": "MUCHMUCHMUCHLONGERVALUE", "f2": 9000}
  84. > SELECT
  85. SUM(u.bytes_indexed) > ${state-bytes},
  86. SUM(u.records_indexed)
  87. FROM mz_tables t
  88. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  89. WHERE t.name IN ('upsert_tbl')
  90. GROUP BY t.name
  91. ORDER BY t.name
  92. true 3
  93. # Ensure deletes work.
  94. $ kafka-ingest format=avro topic=upsert key-format=avro key-schema=${keyschema} schema=${schema}
  95. {"key": "fish"}
  96. > SELECT * from upsert_tbl
  97. key f1 f2
  98. --------------------------------------
  99. birdmore geese 56
  100. mammalmore moose 2
  101. > SELECT
  102. SUM(u.records_indexed)
  103. FROM mz_tables t
  104. JOIN mz_internal.mz_source_statistics_raw u ON t.id = u.id
  105. WHERE t.name IN ('upsert_tbl')
  106. GROUP BY t.name
  107. ORDER BY t.name
  108. 2