q15.sql 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. \set person1Id 1450::bigint
  2. \set person2Id 15393162796819
  3. \set startDate '\'2012-11-06\''::timestamp
  4. \set endDate '\'2012-11-10\''::timestamp
  5. -- coalesce
  6. -- ----------
  7. -- 4
  8. -- (1 row)
  9. --
  10. -- Time: 144025.594 ms (02:24.026)
  11. /* Q15. Trusted connection paths through forums created in a given timeframe
  12. \set person1Id 21990232564808
  13. \set person2Id 26388279076936
  14. \set startDate '\'2010-11-01\''::timestamp
  15. \set endDate '\'2010-12-01\''::timestamp
  16. */
  17. WITH
  18. -- forums within the date range
  19. myForums AS (
  20. SELECT id FROM Forum f WHERE f.creationDate BETWEEN :startDate AND :endDate
  21. ),
  22. -- the (inverse) interaction scores between folks who know each other
  23. mm AS (
  24. SELECT least(msg.CreatorPersonId, reply.CreatorPersonId) AS src,
  25. greatest(msg.CreatorPersonId, reply.CreatorPersonId) AS dst,
  26. sum(case when msg.ParentMessageId is null then 10 else 5 end) AS w
  27. FROM Person_knows_Person pp, Message msg, Message reply
  28. WHERE true
  29. AND pp.person1id = msg.CreatorPersonId
  30. AND pp.person2id = reply.CreatorPersonId
  31. AND reply.ParentMessageId = msg.MessageId
  32. AND EXISTS (SELECT * FROM myForums f WHERE f.id = msg.containerforumid)
  33. AND EXISTS (SELECT * FROM myForums f WHERE f.id = reply.containerforumid)
  34. GROUP BY src, dst
  35. ),
  36. -- the true interaction scores, with 0 default for folks with no interactions
  37. edge AS (
  38. SELECT pp.person1id AS src,
  39. pp.person2id AS dst,
  40. 10::double precision / (coalesce(w, 0) + 10) AS w
  41. FROM Person_knows_Person pp
  42. LEFT JOIN mm
  43. ON least(pp.person1id, pp.person2id) = mm.src
  44. AND greatest(pp.person1id, pp.person2id) = mm.dst
  45. ),
  46. completed_paths AS (
  47. WITH MUTUALLY RECURSIVE
  48. paths (src bigint, dst bigint, w double precision) AS (
  49. SELECT :person1Id AS src, :person1Id AS dst, 0::double precision AS w
  50. UNION
  51. SELECT paths1.src, paths2.dst, paths1.w + paths2.w
  52. FROM minimal_paths paths1
  53. JOIN edge paths2 -- step-transitive closure
  54. ON paths1.dst = paths2.src
  55. ),
  56. minimal_paths (src bigint, dst bigint, w double precision) AS (
  57. SELECT src, dst, min(w)
  58. FROM paths
  59. GROUP BY src, dst
  60. )
  61. SELECT src, dst, w
  62. FROM minimal_paths
  63. WHERE dst = :person2Id),
  64. results AS (
  65. SELECT dst, w
  66. FROM completed_paths
  67. WHERE w IN (SELECT min(w) FROM completed_paths)
  68. )
  69. SELECT coalesce(w, -1) FROM results ORDER BY w ASC LIMIT 20;
  70. /*
  71. EXPLAIN WITH MUTUALLY RECURSIVE
  72. srcs (f bigint) AS (SELECT :person1Id),
  73. dsts (t bigint) AS (SELECT :person2Id),
  74. myForums (id bigint) AS (
  75. SELECT id FROM Forum f WHERE f.creationDate BETWEEN :startDate AND :endDate
  76. ),
  77. mm (src bigint, dst bigint, w bigint) AS (
  78. SELECT least(msg.CreatorPersonId, reply.CreatorPersonId) AS src,
  79. greatest(msg.CreatorPersonId, reply.CreatorPersonId) AS dst,
  80. sum(case when msg.ParentMessageId is null then 10 else 5 end) AS w
  81. FROM Person_knows_Person pp, Message msg, Message reply
  82. WHERE true
  83. AND pp.person1id = msg.CreatorPersonId
  84. AND pp.person2id = reply.CreatorPersonId
  85. AND reply.ParentMessageId = msg.MessageId
  86. AND EXISTS (SELECT * FROM myForums f WHERE f.id = msg.containerforumid)
  87. AND EXISTS (SELECT * FROM myForums f WHERE f.id = reply.containerforumid)
  88. GROUP BY src, dst
  89. ),
  90. path (src bigint, dst bigint, w double precision) AS (
  91. SELECT pp.person1id, pp.person2id, 10::double precision / (coalesce(w, 0) + 10)
  92. FROM Person_knows_Person pp left join mm on least(pp.person1id, pp.person2id) = mm.src AND greatest(pp.person1id, pp.person2id) = mm.dst
  93. ),
  94. -- bidirectional bfs for nonexistant paths
  95. pexists (src bigint, dir bool) AS (
  96. (
  97. SELECT f, true FROM srcs
  98. UNION
  99. SELECT t, false FROM dsts
  100. )
  101. UNION
  102. (
  103. WITH
  104. ss (src, dir) AS (SELECT src, dir FROM pexists),
  105. ns (src, dir) AS (SELECT p.dst, ss.dir FROM ss, path p WHERE ss.src = p.src),
  106. bb (src, dir) AS (SELECT src, dir FROM ns UNION ALL SELECT src, dir FROM ss),
  107. found (found) AS (
  108. SELECT 1 AS found
  109. FROM bb b1, bb b2
  110. WHERE b1.dir AND (NOT b2.dir) AND b1.src = b2.src
  111. )
  112. SELECT src, dir
  113. FROM ns
  114. WHERE NOT EXISTS (SELECT 1 FROM found)
  115. UNION
  116. SELECT -1, true
  117. WHERE EXISTS (SELECT 1 FROM found)
  118. )
  119. ),
  120. pathfound (c bool) AS (
  121. SELECT true AS c
  122. FROM pexists
  123. WHERE src = -1 AND dir
  124. ),
  125. shorts (dir bool, gsrc bigint, dst bigint, w double precision, dead bool, iter bigint) AS (
  126. (
  127. SELECT false, f, f, 0::double precision, false, 0 FROM srcs WHERE EXISTS (SELECT 1 FROM pathfound)
  128. UNION ALL
  129. SELECT true, t, t, 0::double precision, false, 0 FROM dsts WHERE EXISTS (SELECT 1 FROM pathfound)
  130. )
  131. UNION
  132. (
  133. WITH
  134. ss (dir, gsrc, dst, w, dead, iter) AS
  135. (SELECT * FROM shorts),
  136. toExplore (dir, gsrc, dst, w, dead, iter) AS
  137. (SELECT * FROM ss WHERE dead = false ORDER BY w limit 1000),
  138. -- assumes graph is undirected
  139. newPoints (dir, gsrc, dst, w, dead) AS (
  140. SELECT e.dir, e.gsrc AS gsrc, p.dst AS dst, e.w + p.w AS w, false AS dead
  141. FROM path p join toExplore e on (e.dst = p.src)
  142. UNION ALL
  143. SELECT dir, gsrc, dst, w, dead OR EXISTS (SELECT * FROM toExplore e WHERE e.dir = o.dir AND e.gsrc = o.gsrc AND e.dst = o.dst) FROM ss o
  144. ),
  145. fullTable (dir, gsrc, dst, w, dead) AS (
  146. SELECT DISTINCT ON(dir, gsrc, dst) dir, gsrc, dst, w, dead
  147. FROM newPoints
  148. ORDER BY dir, gsrc, dst, w, dead DESC
  149. ),
  150. found AS (
  151. SELECT min(l.w + r.w) AS w
  152. FROM fullTable l, fullTable r
  153. WHERE l.dir = false AND r.dir = true AND l.dst = r.dst
  154. )
  155. SELECT dir,
  156. gsrc,
  157. dst,
  158. w,
  159. dead OR (coalesce(t.w > (SELECT f.w/2 FROM found f), false)),
  160. e.iter + 1 AS iter
  161. FROM fullTable t, (SELECT iter FROM toExplore limit 1) e
  162. )
  163. ),
  164. ss (dir bool, gsrc bigint, dst bigint, w double precision, iter bigint) AS (
  165. SELECT dir, gsrc, dst, w, iter FROM shorts WHERE iter = (SELECT max(iter) FROM shorts)
  166. ),
  167. results(f bigint, t bigint, w double precision) AS (
  168. SELECT l.gsrc, r.gsrc, min(l.w + r.w)
  169. FROM ss l, ss r
  170. WHERE l.dir = false AND r.dir = true AND l.dst = r.dst
  171. GROUP BY l.gsrc, r.gsrc
  172. )
  173. SELECT coalesce(min(w), -1) FROM results;
  174. */