analyzer.js 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // █████╗ ███╗ ██╗ █████╗ ██╗ ██╗ ██╗███████╗███████╗██████╗
  2. // ██╔══██╗████╗ ██║██╔══██╗██║ ╚██╗ ██╔╝╚══███╔╝██╔════╝██╔══██╗
  3. // ███████║██╔██╗ ██║███████║██║ ╚████╔╝ ███╔╝ █████╗ ██████╔╝
  4. // ██╔══██║██║╚██╗██║██╔══██║██║ ╚██╔╝ ███╔╝ ██╔══╝ ██╔══██╗
  5. // ██║ ██║██║ ╚████║██║ ██║███████╗██║ ███████╗███████╗██║ ██║
  6. // ╚═╝ ╚═╝╚═╝ ╚═══╝╚═╝ ╚═╝╚══════╝╚═╝ ╚══════╝╚══════╝╚═╝ ╚═╝
  7. //
  8. // Analyze a set of "tokens" and group them together based on functionality.
  9. // Tokens come from the "Tokenizer" helper which is responsible for taking a
  10. // deeply nested Waterline Statement and breaking it down into a flat list
  11. // of keyed tokens that are easier to parse and work with.
  12. //
  13. // Once the tokens have been created the analyzer goes through and groups the
  14. // tokens into discrete pieces of query logic. These groups are then used by
  15. // other helpers such as the SQL builder or Mongo query builder to generate a
  16. // native query. The point of the analyzer isn't to re-create the orignal nested
  17. // statement but to group related pieces of the query that will be processed as
  18. // chunks. So an OR clause will have each set in the clause grouped or a subquery
  19. // will have it's contents grouped.
  20. //
  21. // In most cases this will not be implemented by adapter authors but will be used
  22. // inside a database driver's `compileStatement` machine.
  23. var _ = require('@sailshq/lodash');
  24. module.exports = function analyzer(tokens) {
  25. if (!tokens) {
  26. throw new Error('Missing tokens argument.');
  27. }
  28. // If any of these identifiers is encountered, push a new array onto the stack.
  29. // When the subsequent "ENDIDENTIFIER" is encountered, pop the array off.
  30. var WRAPPED_IDENTIFIERS = [
  31. 'JOIN',
  32. 'INNERJOIN',
  33. 'OUTERJOIN',
  34. 'CROSSJOIN',
  35. 'LEFTJOIN',
  36. 'LEFTOUTERJOIN',
  37. 'RIGHTJOIN',
  38. 'RIGHTOUTERJOIN',
  39. 'FULLOUTERJOIN'
  40. ];
  41. // If any of these conditions is encountered, push them onto the current stack.
  42. var OUTPUTTING_CONDITIONS = [
  43. 'NOT',
  44. 'IN',
  45. 'NOTIN',
  46. 'AND'
  47. ];
  48. // Analyze the tokens and return the result
  49. var result = (function analyzer(tokens) {
  50. // Start a stack with one array in it. That array will hold the final result.
  51. // As tokens which require nesting are encountered, more arrays will be pushed
  52. // onto `stack`. Tokens that don't require nesting are pushed directly onto
  53. // the last array in the stack, referenced by `curChunk`.
  54. var stack = [[]];
  55. // Reference to the current writable chunk.
  56. var curChunk;
  57. // Another stack to keep track of union/subquery madness. See comments in
  58. // the UNION, ENDUNION, SUBQUERY and ENDSUBQUERY branches of the `switch` below.
  59. var unionSubqueryStack = [];
  60. // Function to push a new array onto the stack, and set `curChunk` to that array,
  61. // so that new tokens will added there there until `popStack` is called.
  62. var pushStack = function() {
  63. curChunk = [];
  64. stack.push(curChunk);
  65. };
  66. // Function to pop a chunk off the stack and fold it into the last array remaining in the stack,
  67. // which then becomes `curChunk`.
  68. var popStack = function() {
  69. stack.pop();
  70. _.last(stack).push(curChunk);
  71. curChunk = _.last(stack);
  72. };
  73. // Loop through the tokens.
  74. _.each(tokens, function(token) {
  75. switch (token.type) {
  76. // Each IDENTIFIER token gets its own array, of which the token is the first element.
  77. case 'IDENTIFIER':
  78. pushStack();
  79. curChunk.push(token);
  80. // If this is one of the "wrapped" identifiers, then add another level of nesting for it.
  81. if (_.contains(WRAPPED_IDENTIFIERS, token.value)) {
  82. pushStack();
  83. }
  84. break;
  85. // CONDITION tokens don't get their own array, but some of them (like AND and IN) are outputted
  86. // to the current chunk.
  87. case 'CONDITION':
  88. if (_.contains(OUTPUTTING_CONDITIONS, token.value)) {
  89. curChunk.push(token);
  90. }
  91. break;
  92. // The UNION token gets its own array and, like wrapped identifiers, gets another level of nesting
  93. // besides. Each UNION token is followed by one or more subquery groups, but we don't want those
  94. // subquery groups to output a SUBQUERY token into the current chunk like they normally would.
  95. // We push "UNION" onto `unionSubqueryStack` to indicate that the immediate ancestor of the next
  96. // subquery group is a union, so that the analyzer behaves accordingly.
  97. case 'UNION':
  98. pushStack();
  99. curChunk.push(token);
  100. pushStack();
  101. unionSubqueryStack.push('UNION');
  102. break;
  103. // Pop the "UNION" string off the `unionSubqueryStack` to reset the behaviour when encountering
  104. // subquery groups, then pop two levels off the main stack to account for those that we pushed
  105. // when we saw the UNION token.
  106. case 'ENDUNION':
  107. unionSubqueryStack.pop();
  108. popStack();
  109. popStack();
  110. break;
  111. // So long as we're not immediately inside of a union group, a SUBQUERY token will get pushed
  112. // onto the current stack, and then get another level of nesting. Otherwise, it's ignored.
  113. // Either way, we'll push "SUBQUERY" onto `unionSubqueryStack` so that any nested subqueries
  114. // will be treated appropriately.
  115. case 'SUBQUERY':
  116. if (unionSubqueryStack.length === 0 || _.last(unionSubqueryStack) === 'SUBQUERY') {
  117. curChunk.push(token);
  118. pushStack();
  119. }
  120. unionSubqueryStack.push('SUBQUERY');
  121. break;
  122. // As above; if this is a normal subquery, then pop a level off the main stack to account for the
  123. // one we added when we saw the SUBQUERY token. Either way, pop the `unionSubqueryStack` so that
  124. // subsequent behavior when encoutnering a SUBQUERY token is correct.
  125. case 'ENDSUBQUERY':
  126. unionSubqueryStack.pop();
  127. if (unionSubqueryStack.length === 0 || _.last(unionSubqueryStack) === 'SUBQUERY') {
  128. popStack();
  129. }
  130. break;
  131. // GROUP tokens don't get outputted, but they do cause a new level of nesting to pushed onto
  132. // the main stack.
  133. case 'GROUP':
  134. pushStack();
  135. break;
  136. // Pop the main stack to account for the level added when the GROUP token was encountered.
  137. case 'ENDGROUP':
  138. popStack();
  139. break;
  140. default:
  141. // Is this the "END" token of an identifier?
  142. if (token.type === 'ENDIDENTIFIER') {
  143. // Pop one level off the stack.
  144. popStack();
  145. // Is the identifier we're ending a "wrapped" token?
  146. // If so, pop another level off the stack to account for the wrapping.
  147. if (_.contains(WRAPPED_IDENTIFIERS, token.value)) {
  148. popStack();
  149. }
  150. break;
  151. }
  152. // All other "END" tokens (like ENDCONDITION) can be ignored.
  153. if (token.type.substr(0,3) === 'END') {
  154. break;
  155. }
  156. // All other tokens (like KEY and VALUE) are outputted to the current chunk.
  157. curChunk.push(token);
  158. }
  159. });
  160. // The stack should now be completely collapsed into one (possibly nested) array item. If not, we've got issues.
  161. if (stack.length > 1) {
  162. throw new Error('Consistency violation: final stack in analyzer contains more than one item. Stack is: ', require('util').inspect(stack, {depth: null}));
  163. }
  164. // Return the result.
  165. return stack[0];
  166. })(tokens); // </analyzer>
  167. // Return the result from the analyzer.
  168. return result;
  169. };