MatchDocumentSource.js 10 KB


  1. "use strict";
  2. var async = require("async"),
  3. matcher = require("../matcher/Matcher2.js"),
  4. DocumentSource = require("./DocumentSource");
  5. /**
  6. * A match document source built off of DocumentSource
  7. *
  8. * NOTE: THIS IS A DEVIATION FROM THE MONGO IMPLEMENTATION.
  9. * TODO: internally uses `sift` to fake it, which has bugs, so we need to reimplement this by porting the MongoDB implementation
  10. *
  11. * @class MatchDocumentSource
  12. * @namespace mungedb-aggregate.pipeline.documentSources
  13. * @module mungedb-aggregate
  14. * @constructor
  15. * @param {Object} query the match query to use
  16. * @param [ctx] {ExpressionContext}
  17. **/
  18. var MatchDocumentSource = module.exports = function MatchDocumentSource(query, ctx){
  19. if (arguments.length > 2) throw new Error("up to two args expected");
  20. if (!query) throw new Error("arg `query` is required");
  21. base.call(this, ctx);
  22. this.query = query; // save the query, so we can check it for deps later. THIS IS A DEVIATION FROM THE MONGO IMPLEMENTATION
  23. this.matcher = new matcher(query);
  24. // not supporting currently $text operator
  25. // set _isTextQuery to false.
  26. // TODO: update after we implement $text.
  27. if (klass.isTextQuery(query)) throw new Error("$text pipeline operation not supported");
  28. this._isTextQuery = false;
  29. }, klass = MatchDocumentSource, base = require('./DocumentSource'), proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}});
  30. klass.matchName = "$match";
  31. proto.getSourceName = function getSourceName(){
  32. return klass.matchName;
  33. };
  34. proto.getNext = function getNext(callback) {
  35. if (!callback) throw new Error(this.getSourceName() + ' #getNext() requires callback');
  36. if (this.expCtx.checkForInterrupt && this.expCtx.checkForInterrupt() === false) {
  37. return callback(new Error('Interrupted'));
  38. }
  39. var self = this,
  40. next,
  41. test = function test(doc) {
  42. return self.matcher.matches(doc);
  43. },
  44. makeReturn = function makeReturn(doc) {
  45. if(doc !== DocumentSource.EOF && test(doc)) { // Passes the match criteria
  46. return doc;
  47. } else if(doc === DocumentSource.EOF){ // Got EOF
  48. return doc;
  49. }
  50. return undefined; // Didn't match, but not EOF
  51. };
  52. async.doUntil(
  53. function(cb) {
  54. self.source.getNext(function(err, doc) {
  55. if(err) return callback(err);
  56. if (makeReturn(doc)) {
  57. next = doc;
  58. }
  59. return cb();
  60. });
  61. },
  62. function() {
  63. var foundDoc = (next === DocumentSource.EOF || next !== undefined);
  64. return foundDoc; //keep going until doc is found
  65. },
  66. function(err) {
  67. return callback(err, next);
  68. }
  69. );
  70. return next;
  71. };
  72. proto.coalesce = function coalesce(nextSource) {
  73. if (!(nextSource instanceof MatchDocumentSource))
  74. return false;
  75. this.matcher = new matcher({"$and": [this.getQuery(), nextSource.getQuery()]});
  76. return true;
  77. };
  78. proto.serialize = function(explain) {
  79. var out = {};
  80. out[this.getSourceName()] = this.getQuery();
  81. return out;
  82. };
  83. klass.uassertNoDisallowedClauses = function uassertNoDisallowedClauses(query) {
  84. for(var key in query){
  85. if(query.hasOwnProperty(key)){
  86. // can't use the Matcher API because this would segfault the constructor
  87. if (key === "$where") throw new Error("code 16395; $where is not allowed inside of a $match aggregation expression");
  88. // geo breaks if it is not the first portion of the pipeline
  89. if (key === "$near") throw new Error("code 16424; $near is not allowed inside of a $match aggregation expression");
  90. if (key === "$within") throw new Error("code 16425; $within is not allowed inside of a $match aggregation expression");
  91. if (key === "$nearSphere") throw new Error("code 16426; $nearSphere is not allowed inside of a $match aggregation expression");
  92. if (query[key] instanceof Object && query[key].constructor === Object) this.uassertNoDisallowedClauses(query[key]);
  93. }
  94. }
  95. };
  96. klass.createFromJson = function createFromJson(jsonElement, ctx) {
  97. if (!(jsonElement instanceof Object) || jsonElement.constructor !== Object) throw new Error("code 15959 ; the match filter must be an expression in an object");
  98. klass.uassertNoDisallowedClauses(jsonElement);
  99. var matcher = new MatchDocumentSource(jsonElement, ctx);
  100. return matcher;
  101. };
  102. proto.isTextQuery = function isTextQuery() {
  103. return this._isTextQuery;
  104. };
  105. klass.isTextQuery = function isTextQuery(query) {
  106. for (var key in query) {
  107. var fieldName = key;
  108. if (fieldName === "$text") return true;
  109. if (query[key] instanceof Object && query[key].constructor === Object && this.isTextQuery(query[key])) {
  110. return true;
  111. }
  112. }
  113. return false;
  114. };
  115. klass.setSource = function setSource (source) {
  116. this.setSource(source);
  117. };
  118. proto.getQuery = function getQuery() {
  119. return this.matcher._pattern;
  120. };
  121. /** Returns the portion of the match that can safely be promoted to before a $redact.
  122. * If this returns an empty BSONObj, no part of this match may safely be promoted.
  123. *
  124. * To be safe to promote, removing a field from a document to be matched must not cause
  125. * that document to be accepted when it would otherwise be rejected. As an example,
  126. * {name: {$ne: "bob smith"}} accepts documents without a name field, which means that
  127. * running this filter before a redact that would remove the name field would leak
  128. * information. On the other hand, {age: {$gt:5}} is ok because it doesn't accept documents
  129. * that have had their age field removed.
  130. */
  131. proto.redactSafePortion = function redactSafePortion() {
  132. var self = this;
  133. // This block contains the functions that make up the implementation of
  134. // DocumentSourceMatch::redactSafePortion(). They will only be called after
  135. // the Match expression has been successfully parsed so they can assume that
  136. // input is well formed.
  137. var isAllDigits = function(n) {
  138. return !isNaN(n);
  139. };
  140. var isFieldnameRedactSafe = function isFieldnameRedactSafe(field) {
  141. var dotPos = field.indexOf('.');
  142. if (dotPos === -1)
  143. return !isAllDigits(field);
  144. var part = field.slice(0, dotPos),
  145. rest = field.slice(dotPos+1, field.length);
  146. return !isAllDigits(part) && isFieldnameRedactSafe(rest);
  147. };
  148. // Returns the redact-safe portion of an "inner" match expression. This is the layer like
  149. // {$gt: 5} which does not include the field name. Returns an empty document if none of the
  150. // expression can safely be promoted in front of a $redact.
  151. var redactSavePortionDollarOps = function redactSafePortionDollarOps(expr) {
  152. var output = {},
  153. elem,i,j,k;
  154. var keys = Object.keys(expr);
  155. for (i = 0; i < keys.length; i++) {
  156. var field = keys[i],
  157. value = expr[field];
  158. if (field[0] !== '$')
  159. continue;
  160. // Ripped the case apart and did not implement this painful thing:
  161. // https://github.com/mongodb/mongo/blob/r2.5.4/src/mongo/db/jsobj.cpp#L286
  162. // Somebody should be taken to task for that work of art.
  163. if (field === '$type' || field === '$regex' || field === '$options' || field === '$mod') {
  164. output[field] = value;
  165. } else if (field === '$lte' || field === '$gte' || field === '$lt' || field === '$gt') {
  166. if (isTypeRedactSafeInComparison(field))
  167. output[field] = value;
  168. } else if (field === '$in') {
  169. // TODO: value/elem/field/etc may be mixed up and wrong here
  170. var allOk = true;
  171. for (j = 0; j < Object.keys(value).length; j++) {
  172. elem = Object.keys(value)[j];
  173. if (!isTypeRedactSafeInComparison(value[elem])) {
  174. allOk = false;
  175. break;
  176. }
  177. }
  178. if (allOk) {
  179. output[field] = value;
  180. }
  181. break;
  182. } else if (field === '$all') {
  183. // TODO: value/elem/field/etc may be mixed up and wrong here
  184. var matches = [];
  185. for (j = 0; j < value.length; j++) {
  186. elem = Object.keys(value)[j];
  187. if (isTypeRedactSafeInComparison(value[elem]))
  188. matches.push(value[elem]);
  189. }
  190. if (matches.length)
  191. output[field] = matches;
  192. } else if (field === '$elemMatch') {
  193. var subIn = value,
  194. subOut;
  195. if (subIn[0] === '$')
  196. subOut = redactSafePortionDollarOps(subIn);
  197. else
  198. subOut = redactSafePortionTopLevel(subIn);
  199. if (subOut && Object.keys(subOut).length)
  200. output[field] = subOut;
  201. break;
  202. } else {
  203. // never allowed:
  204. // equality, maxDist, near, ne, opSize, nin, exists, within, geoIntersects
  205. continue;
  206. }
  207. }
  208. return output;
  209. };
  210. var isTypeRedactSafeInComparison = function isTypeRedactSafeInComparison(type) {
  211. if (type instanceof Array || (type instanceof Object && type.constructor === Object) || type === null || type === undefined)
  212. return false;
  213. return true;
  214. };
  215. // Returns the redact-safe portion of an "outer" match expression. This is the layer like
  216. // {fieldName: {...}} which does include the field name. Returns an empty document if none of
  217. // the expression can safely be promoted in front of a $redact.
  218. var redactSafePortionTopLevel = function(topQuery) {
  219. var output = {},
  220. okClauses = [],
  221. keys = topQuery ? Object.keys(topQuery) : [],
  222. j, elm, clause;
  223. for (var i = 0; i < keys.length; i++) {
  224. var field = keys[i],
  225. value = topQuery[field];
  226. if (field.length && field[0] === '$') {
  227. if (field === '$or') {
  228. okClauses = [];
  229. for (j = 0; j < Object.keys(value).length; j++) {
  230. elm = value[Object.keys(value)[j]];
  231. clause = redactSafePortionTopLevel(elm);
  232. if (!clause || Object.keys(clause).length === 0) {
  233. okClauses = [];
  234. break;
  235. }
  236. okClauses.push(clause);
  237. }
  238. if (okClauses && okClauses.length) {
  239. output.$or = okClauses;
  240. }
  241. } else if (field === '$and') {
  242. okClauses = [];
  243. for (j = 0; j < Object.keys(value).length; j++) {
  244. elm = value[Object.keys(value)[j]];
  245. clause = redactSafePortionTopLevel(elm);
  246. if (clause && Object.keys(clause).length)
  247. okClauses.push(clause);
  248. }
  249. if (okClauses.length)
  250. output.$and = okClauses;
  251. }
  252. continue;
  253. }
  254. if (!isFieldnameRedactSafe(field))
  255. continue;
  256. if (value instanceof Array || !value) {
  257. continue;
  258. } else if (value instanceof Object && value.constructor === Object) {
  259. // subobjects (not regex etc)
  260. var sub = redactSavePortionDollarOps(value);
  261. if (sub && Object.keys(sub).length)
  262. output[field] = sub;
  263. break;
  264. } else {
  265. output[field] = value;
  266. }
  267. }
  268. return output;
  269. };
  270. return redactSafePortionTopLevel(this.getQuery());
  271. };