MatchDocumentSource.js 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. "use strict";
  2. var async = require("async"),
  3. matcher = require("../matcher/Matcher2.js"),
  4. DocumentSource = require("./DocumentSource");
  5. /**
  6. * A match document source built off of DocumentSource
  7. *
  8. * NOTE: THIS IS A DEVIATION FROM THE MONGO IMPLEMENTATION.
  9. * TODO: internally uses `sift` to fake it, which has bugs, so we need to reimplement this by porting the MongoDB implementation
  10. *
  11. * @class MatchDocumentSource
  12. * @namespace mungedb-aggregate.pipeline.documentSources
  13. * @module mungedb-aggregate
  14. * @constructor
  15. * @param {Object} query the match query to use
  16. * @param [ctx] {ExpressionContext}
  17. **/
  18. var MatchDocumentSource = module.exports = function MatchDocumentSource(query, ctx){
  19. if (arguments.length > 2) throw new Error("up to two args expected");
  20. if (!query) throw new Error("arg `query` is required");
  21. base.call(this, ctx);
  22. this.query = query; // save the query, so we can check it for deps later. THIS IS A DEVIATION FROM THE MONGO IMPLEMENTATION
  23. this.matcher = new matcher(query);
  24. }, klass = MatchDocumentSource, base = require('./DocumentSource'), proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}});
  25. klass.matchName = "$match";
  26. proto.getSourceName = function getSourceName(){
  27. return klass.matchName;
  28. };
  29. proto.getNext = function getNext(callback) {
  30. if (!callback) throw new Error(this.getSourceName() + ' #getNext() requires callback');
  31. var self = this,
  32. next,
  33. test = function test(doc) {
  34. return self.matcher.matches(doc);
  35. },
  36. makeReturn = function makeReturn(doc) {
  37. if(doc !== DocumentSource.EOF && test(doc)) { // Passes the match criteria
  38. return doc;
  39. } else if(doc === DocumentSource.EOF){ // Got EOF
  40. return doc;
  41. }
  42. return undefined; // Didn't match, but not EOF
  43. };
  44. async.doUntil(
  45. function(cb) {
  46. self.source.getNext(function(err, doc) {
  47. if(err) return callback(err);
  48. if (makeReturn(doc)) {
  49. next = doc;
  50. }
  51. return cb();
  52. });
  53. },
  54. function() {
  55. var foundDoc = (next === DocumentSource.EOF || next !== undefined);
  56. return foundDoc; //keep going until doc is found
  57. },
  58. function(err) {
  59. return callback(err, next);
  60. }
  61. );
  62. return next;
  63. };
  64. proto.coalesce = function coalesce(nextSource) {
  65. if (!(nextSource instanceof MatchDocumentSource))
  66. return false;
  67. this.matcher = new matcher({"$and": [this.getQuery(), nextSource.getQuery()]});
  68. return true;
  69. };
  70. proto.serialize = function(explain) {
  71. var out = {};
  72. out[this.getSourceName()] = this.getQuery();
  73. return out;
  74. };
  75. klass.uassertNoDisallowedClauses = function uassertNoDisallowedClauses(query) {
  76. for(var key in query){
  77. if(query.hasOwnProperty(key)){
  78. // can't use the Matcher API because this would segfault the constructor
  79. if (query[key] == "$where") throw new Error("code 16395; $where is not allowed inside of a $match aggregation expression");
  80. // geo breaks if it is not the first portion of the pipeline
  81. if (query[key] == "$near") throw new Error("code 16424; $near is not allowed inside of a $match aggregation expression");
  82. if (query[key] == "$within") throw new Error("code 16425; $within is not allowed inside of a $match aggregation expression");
  83. if (query[key] == "$nearSphere") throw new Error("code 16426; $nearSphere is not allowed inside of a $match aggregation expression");
  84. if (query[key] instanceof Object && query[key].constructor === Object) this.uassertNoDisallowedClauses(query[key]);
  85. }
  86. }
  87. };
  88. klass.createFromJson = function createFromJson(jsonElement, ctx) {
  89. if (!(jsonElement instanceof Object) || jsonElement.constructor !== Object) throw new Error("code 15959 ; the match filter must be an expression in an object");
  90. klass.uassertNoDisallowedClauses(jsonElement);
  91. var matcher = new MatchDocumentSource(jsonElement, ctx);
  92. return matcher;
  93. };
  94. proto.getQuery = function getQuery() {
  95. return this.matcher._pattern;
  96. };
  97. /** Returns the portion of the match that can safely be promoted to before a $redact.
  98. * If this returns an empty BSONObj, no part of this match may safely be promoted.
  99. *
  100. * To be safe to promote, removing a field from a document to be matched must not cause
  101. * that document to be accepted when it would otherwise be rejected. As an example,
  102. * {name: {$ne: "bob smith"}} accepts documents without a name field, which means that
  103. * running this filter before a redact that would remove the name field would leak
  104. * information. On the other hand, {age: {$gt:5}} is ok because it doesn't accept documents
  105. * that have had their age field removed.
  106. */
  107. proto.redactSafePortion = function redactSafePortion() {
  108. var self = this;
  109. // This block contains the functions that make up the implementation of
  110. // DocumentSourceMatch::redactSafePortion(). They will only be called after
  111. // the Match expression has been successfully parsed so they can assume that
  112. // input is well formed.
  113. var isAllDigits = function(n) {
  114. return !isNaN(n);
  115. };
  116. var isFieldnameRedactSafe = function isFieldnameRedactSafe(field) {
  117. var dotPos = field.indexOf('.');
  118. if (dotPos === -1)
  119. return !isAllDigits(field);
  120. var part = field.slice(0, dotPos),
  121. rest = field.slice(dotPos+1, field.length);
  122. return !isAllDigits(part) && isFieldnameRedactSafe(rest);
  123. };
  124. // Returns the redact-safe portion of an "inner" match expression. This is the layer like
  125. // {$gt: 5} which does not include the field name. Returns an empty document if none of the
  126. // expression can safely be promoted in front of a $redact.
  127. var redactSavePortionDollarOps = function redactSafePortionDollarOps(expr) {
  128. var output = {},
  129. elem,i,j,k;
  130. var keys = Object.keys(expr);
  131. for (i = 0; i < keys.length; i++) {
  132. var field = keys[i],
  133. value = expr[field];
  134. if (field[0] !== '$')
  135. continue;
  136. // Ripped the case apart and did not implement this painful thing:
  137. // https://github.com/mongodb/mongo/blob/r2.5.4/src/mongo/db/jsobj.cpp#L286
  138. // Somebody should be taken to task for that work of art.
  139. if (field === '$type' || field === '$regex' || field === '$options' || field === '$mod') {
  140. output[field] = value;
  141. } else if (field === '$lte' || field === '$gte' || field === '$lt' || field === '$gt') {
  142. if (isTypeRedactSafeInComparison(field))
  143. output[field] = value;
  144. } else if (field === '$in') {
  145. // TODO: value/elem/field/etc may be mixed up and wrong here
  146. var allOk = true;
  147. for (j = 0; j < Object.keys(value).length; j++) {
  148. elem = Object.keys(value)[j];
  149. if (!isTypeRedactSafeInComparison(value[elem])) {
  150. allOk = false;
  151. break;
  152. }
  153. }
  154. if (allOk) {
  155. output[field] = value;
  156. }
  157. break;
  158. } else if (field === '$all') {
  159. // TODO: value/elem/field/etc may be mixed up and wrong here
  160. var matches = [];
  161. for (j = 0; j < value.length; j++) {
  162. elem = Object.keys(value)[j];
  163. if (isTypeRedactSafeInComparison(value[elem]))
  164. matches.push(value[elem]);
  165. }
  166. if (matches.length)
  167. output[field] = matches;
  168. } else if (field === '$elemMatch') {
  169. var subIn = value,
  170. subOut;
  171. if (subIn[0] === '$')
  172. subOut = redactSafePortionDollarOps(subIn);
  173. else
  174. subOut = redactSafePortionTopLevel(subIn);
  175. if (subOut && Object.keys(subOut).length)
  176. output[field] = subOut;
  177. break;
  178. } else {
  179. // never allowed:
  180. // equality, maxDist, near, ne, opSize, nin, exists, within, geoIntersects
  181. continue;
  182. }
  183. }
  184. return output;
  185. };
  186. var isTypeRedactSafeInComparison = function isTypeRedactSafeInComparison(type) {
  187. if (type instanceof Array || (type instanceof Object && type.constructor === Object) || type === null || type === undefined)
  188. return false;
  189. return true;
  190. };
  191. // Returns the redact-safe portion of an "outer" match expression. This is the layer like
  192. // {fieldName: {...}} which does include the field name. Returns an empty document if none of
  193. // the expression can safely be promoted in front of a $redact.
  194. var redactSafePortionTopLevel = function(topQuery) {
  195. var output = {},
  196. okClauses = [],
  197. keys = topQuery ? Object.keys(topQuery) : [],
  198. j, elm, clause;
  199. for (var i = 0; i < keys.length; i++) {
  200. var field = keys[i],
  201. value = topQuery[field];
  202. if (field.length && field[0] === '$') {
  203. if (field === '$or') {
  204. okClauses = [];
  205. for (j = 0; j < Object.keys(value).length; j++) {
  206. elm = value[Object.keys(value)[j]];
  207. clause = redactSafePortionTopLevel(elm);
  208. if (!clause || Object.keys(clause).length === 0) {
  209. okClauses = [];
  210. break;
  211. }
  212. okClauses.push(clause);
  213. }
  214. if (okClauses && okClauses.length) {
  215. output.$or = okClauses;
  216. }
  217. } else if (field === '$and') {
  218. okClauses = [];
  219. for (j = 0; j < Object.keys(value).length; j++) {
  220. elm = value[Object.keys(value)[j]];
  221. clause = redactSafePortionTopLevel(elm);
  222. if (clause && Object.keys(clause).length)
  223. okClauses.push(clause);
  224. }
  225. if (okClauses.length)
  226. output.$and = okClauses;
  227. }
  228. continue;
  229. }
  230. if (!isFieldnameRedactSafe(field))
  231. continue;
  232. if (value instanceof Array || !value) {
  233. continue;
  234. } else if (value instanceof Object && value.constructor === Object) {
  235. // subobjects (not regex etc)
  236. var sub = redactSavePortionDollarOps(value);
  237. if (sub && Object.keys(sub).length)
  238. output[field] = sub;
  239. break;
  240. } else {
  241. output[field] = value;
  242. }
  243. }
  244. return output;
  245. };
  246. return redactSafePortionTopLevel(this.getQuery());
  247. };