GroupDocumentSource.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537
  1. "use strict";
  2. var DocumentSource = require("./DocumentSource"),
  3. Accumulators = require("../accumulators/"),
  4. Document = require("../Document"),
  5. Expression = require("../expressions/Expression"),
  6. ConstantExpression = require("../expressions/ConstantExpression"),
  7. FieldPathExpression = require("../expressions/FieldPathExpression"),
  8. Variables = require("../expressions/Variables"),
  9. VariablesIdGenerator = require("../expressions/VariablesIdGenerator"),
  10. VariablesParseState = require("../expressions/VariablesParseState"),
  11. async = require("async");
  12. /**
  13. * A class for grouping documents together
  14. *
  15. * @class GroupDocumentSource
  16. * @namespace mungedb-aggregate.pipeline.documentSources
  17. * @module mungedb-aggregate
  18. * @constructor
  19. * @param [expCtx] {ExpressionContext}
  20. **/
  21. var GroupDocumentSource = module.exports = function GroupDocumentSource(expCtx) {
  22. if (arguments.length > 1) throw new Error("up to one arg expected");
  23. expCtx = !expCtx ? {} : expCtx;
  24. base.call(this, expCtx);
  25. this.populated = false;
  26. this.doingMerge = false;
  27. this.spilled = false;
  28. this.extSortAllowed = expCtx.extSortAllowed && !expCtx.inRouter;
  29. this.accumulatorFactories = [];
  30. this.currentAccumulators = [];
  31. this.groups = {}; // GroupsType Value -> Accumulators[]
  32. this.groupsKeys = []; // This is to faciliate easier look up of groups
  33. this.originalGroupsKeys = [];
  34. this.variables = null;
  35. this.fieldNames = [];
  36. this.idFieldNames = [];
  37. this.expressions = [];
  38. this.idExpressions = [];
  39. this.currentGroupsKeysIndex = 0;
  40. }, klass = GroupDocumentSource, base = DocumentSource, proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}});
  41. klass.isSplittableDocumentSource = true;
  42. // TODO: Do we need this?
  43. klass.groupOps = {
  44. "$addToSet": Accumulators.AddToSetAccumulator.create,
  45. "$avg": Accumulators.AvgAccumulator.create,
  46. "$first": Accumulators.FirstAccumulator.create,
  47. "$last": Accumulators.LastAccumulator.create,
  48. "$max": Accumulators.MinMaxAccumulator.createMax, // $min and $max have special constructors because they share base features
  49. "$min": Accumulators.MinMaxAccumulator.createMin,
  50. "$push": Accumulators.PushAccumulator.create,
  51. "$sum": Accumulators.SumAccumulator.create,
  52. };
  53. klass.groupName = "$group";
  54. /**
  55. * Factory for making GroupDocumentSources
  56. *
  57. * @method create
  58. * @static
  59. * @param [expCtx] {ExpressionContext}
  60. **/
  61. klass.create = function create(expCtx) {
  62. return new GroupDocumentSource(expCtx);
  63. };
  64. /**
  65. * Factory for making GroupDocumentSources
  66. *
  67. * @method getSourceName
  68. * @return {GroupDocumentSource}
  69. **/
  70. proto.getSourceName = function getSourceName() {
  71. return klass.groupName;
  72. };
  73. /**
  74. * Gets the next document or null if none
  75. *
  76. * @method getNext
  77. * @return {Object}
  78. **/
  79. proto.getNext = function getNext(callback) {
  80. if (!callback) throw new Error(this.getSourceName() + ' #getNext() requires callback.');
  81. if (this.expCtx.checkForInterrupt && this.expCtx.checkForInterrupt() === false)
  82. return callback(new Error("Interrupted"));
  83. var self = this;
  84. async.series([
  85. function(next) {
  86. if (!self.populated)
  87. self.populate(function(err) {
  88. return next(err);
  89. });
  90. else
  91. return next();
  92. },
  93. function(next) {
  94. // NOTE: Skipped the spilled functionality
  95. if (self.spilled) {
  96. throw new Error("Spilled is not implemented.");
  97. } else {
  98. if(self.currentGroupsKeysIndex === self.groupsKeys.length) {
  99. return next(null, null);
  100. }
  101. var out;
  102. try {
  103. var id = self.originalGroupsKeys[self.currentGroupsKeysIndex],
  104. stringifiedId = self.groupsKeys[self.currentGroupsKeysIndex],
  105. accumulators = self.groups[stringifiedId];
  106. out = self.makeDocument(id, accumulators, self.expCtx.inShard);
  107. if(++self.currentGroupsKeysIndex === self.groupsKeys.length) {
  108. self.dispose();
  109. }
  110. } catch (ex) {
  111. return next(ex);
  112. }
  113. return next(null, out);
  114. }
  115. }
  116. ], function(err, results) {
  117. callback(err, results[1]);
  118. });
  119. };
  120. /**
  121. * Sets this source as apparently empty
  122. *
  123. * @method dispose
  124. **/
  125. proto.dispose = function dispose() {
  126. //NOTE: Skipped 'freeing' our resources; at best we could remove some references to things, but our parent will probably forget us anyways!
  127. // make us look done
  128. this.currentGroupsKeysIndex = this.groupsKeys.length;
  129. // free our source's resources
  130. this.source.dispose();
  131. };
  132. /**
  133. * Optimizes the expressions in the group
  134. * @method optimize
  135. **/
  136. proto.optimize = function optimize() {
  137. // TODO if all _idExpressions are ExpressionConstants after optimization, then we know there
  138. // will only be one group. We should take advantage of that to avoid going through the hash
  139. // table.
  140. var self = this;
  141. self.idExpressions.forEach(function(expression, i) {
  142. self.idExpressions[i] = expression.optimize();
  143. });
  144. self.expressions.forEach(function(expression, i) {
  145. self.expressions[i] = expression.optimize();
  146. });
  147. };
  148. /**
  149. * Create an object that represents the document source. The object
  150. * will have a single field whose name is the source's name.
  151. *
  152. * @method serialize
  153. * @param explain {Boolean} Create explain output
  154. **/
  155. proto.serialize = function serialize(explain) {
  156. var self = this,
  157. insides = {};
  158. // add the _id
  159. if (self.idFieldNames.length === 0) {
  160. if (self.idExpressions.length !== 1) throw new Error("Should only have one _id field");
  161. insides._id = self.idExpressions[0].serialize(explain);
  162. } else {
  163. if (self.idExpressions.length !== self.idFieldNames.length)
  164. throw new Error("Should have the same number of idExpressions and idFieldNames.");
  165. var md = {};
  166. self.idExpressions.forEach(function(expression, i) {
  167. md[self.idFieldNames[i]] = expression.serialize(explain);
  168. });
  169. insides._id = md;
  170. }
  171. //add the remaining fields
  172. var aFacs = self.accumulatorFactories,
  173. aFacLen = aFacs.length;
  174. for(var i=0; i < aFacLen; i++) {
  175. var aFac = new aFacs[i](),
  176. serialExpression = self.expressions[i].serialize(explain), //Get the accumulator's expression
  177. serialAccumulator = {}; //Where we'll put the expression
  178. serialAccumulator[aFac.getOpName()] = serialExpression;
  179. insides[self.fieldNames[i]] = serialAccumulator;
  180. }
  181. var serialSource = {};
  182. serialSource[self.getSourceName()] = insides;
  183. return serialSource;
  184. };
  185. /**
  186. * Creates a GroupDocumentSource from the given elem
  187. *
  188. * @method createFromJson
  189. * @param elem {Object} The group specification object; the right hand side of the $group
  190. **/
  191. klass.createFromJson = function createFromJson(elem, expCtx) {
  192. if (!(elem instanceof Object && elem.constructor === Object)) throw new Error("a group's fields must be specified in an object");
  193. var group = GroupDocumentSource.create(expCtx),
  194. idSet = false;
  195. var groupObj = elem,
  196. idGenerator = new VariablesIdGenerator(),
  197. vps = new VariablesParseState(idGenerator);
  198. for (var groupFieldName in groupObj) {
  199. if (groupObj.hasOwnProperty(groupFieldName)) {
  200. var groupField = groupObj[groupFieldName];
  201. if (groupFieldName === "_id") {
  202. if(idSet) throw new Error("15948 a group's _id may only be specified once");
  203. group.parseIdExpression(groupField, vps);
  204. idSet = true;
  205. } else if (groupFieldName === '$doingMerge' && groupField) {
  206. throw new Error("17030 $doingMerge should be true if present");
  207. } else {
  208. /*
  209. Treat as a projection field with the additional ability to
  210. add aggregation operators.
  211. */
  212. if (groupFieldName.indexOf(".") !== -1) throw new Error("16414 the group aggregate field name '" + groupFieldName + "' cannot contain '.'");
  213. if (groupFieldName[0] === "$") throw new Error("15950 the group aggregate field name '" + groupFieldName + "' cannot be an operator name");
  214. if (group._getTypeStr(groupFieldName) === "Object") throw new Error("15951 the group aggregate field '" + groupFieldName + "' must be defined as an expression inside an object");
  215. var subElementCount = 0;
  216. for (var subElementName in groupField) {
  217. if (groupField.hasOwnProperty(subElementName)) {
  218. var subElement = groupField[subElementName],
  219. op = klass.groupOps[subElementName];
  220. if (!op) throw new Error("15952 unknown group operator '" + subElementName + "'");
  221. var groupExpression,
  222. subElementTypeStr = group._getTypeStr(subElement);
  223. if (subElementTypeStr === "Object") {
  224. var subElementObjCtx = new Expression.ObjectCtx({isDocumentOk:true});
  225. groupExpression = Expression.parseObject(subElement, subElementObjCtx, vps);
  226. } else if (subElementTypeStr === "Array") {
  227. throw new Error("15953 aggregating group operators are unary (" + subElementName + ")");
  228. } else { /* assume its an atomic single operand */
  229. groupExpression = Expression.parseOperand(subElement, vps);
  230. }
  231. group.addAccumulator(groupFieldName, op, groupExpression);
  232. ++subElementCount;
  233. }
  234. }
  235. if (subElementCount !== 1) throw new Error("15954 the computed aggregate '" + groupFieldName + "' must specify exactly one operator");
  236. }
  237. }
  238. }
  239. if (!idSet) throw new Error("15955 a group specification must include an _id");
  240. group.variables = new Variables(idGenerator.getIdCount());
  241. return group;
  242. };
  243. /**
  244. * Populates the GroupDocumentSource by grouping all of the input documents at once.
  245. *
  246. * @method populate
  247. * @param callback {Function} Required. callback(err) when done populating.
  248. * @async
  249. **/
  250. proto.populate = function populate(callback) {
  251. var numAccumulators = this.accumulatorFactories.length;
  252. // NOTE: this is not in mongo, does it belong here?
  253. if(numAccumulators !== this.expressions.length) {
  254. callback(new Error("Must have equal number of accumulators and expressions"));
  255. }
  256. var input,
  257. self = this;
  258. async.whilst(
  259. function() {
  260. return input !== null;
  261. },
  262. function(cb) {
  263. self.source.getNext(function(err, doc) {
  264. if(err) return cb(err);
  265. if(doc === null) {
  266. input = doc;
  267. return cb(); //Need to stop now, no new input
  268. }
  269. try {
  270. input = doc;
  271. self.variables.setRoot(input);
  272. /* get the _id value */
  273. var id = self.computeId(self.variables);
  274. if(undefined === id) id = null;
  275. var groupKey = JSON.stringify(id),
  276. group = self.groups[groupKey];
  277. if(!group) {
  278. self.originalGroupsKeys.push(id);
  279. self.groupsKeys.push(groupKey);
  280. group = [];
  281. self.groups[groupKey] = group;
  282. // Add the accumulators
  283. for(var afi = 0; afi<self.accumulatorFactories.length; afi++) {
  284. group.push(new self.accumulatorFactories[afi]());
  285. }
  286. }
  287. //NOTE: Skipped memory usage stuff for case when group already existed
  288. if(numAccumulators !== group.length) {
  289. throw new Error('Group must have one of each accumulator');
  290. }
  291. //NOTE: passing the input to each accumulator
  292. for(var gi=0; gi<group.length; gi++) {
  293. group[gi].process(self.expressions[gi].evaluate(self.variables, self.doingMerge));
  294. }
  295. // We are done with the ROOT document so release it.
  296. self.variables.clearRoot();
  297. //NOTE: Skipped the part about sorted files
  298. } catch (ex) {
  299. return cb(ex);
  300. }
  301. return cb();
  302. });
  303. },
  304. function(err) {
  305. if(err) return callback(err);
  306. self.populated = true;
  307. return callback();
  308. }
  309. );
  310. };
  311. /**
  312. * Get the dependencies of the group
  313. *
  314. * @method getDependencies
  315. * @param deps {Object} The
  316. * @return {DocumentSource.getDepsReturn} An enum value specifying that these dependencies are exhaustive
  317. * @async
  318. **/
  319. proto.getDependencies = function getDependencies(deps) {
  320. var self = this;
  321. // add _id
  322. this.idExpressions.forEach(function(expression, i) {
  323. expression.addDependencies(deps);
  324. });
  325. // add the rest
  326. this.fieldNames.forEach(function (field, i) {
  327. self.expressions[i].addDependencies(deps);
  328. });
  329. return DocumentSource.GetDepsReturn.EXHAUSTIVE_ALL;
  330. };
  331. /**
  332. * Called internally only. Adds an accumulator for each matching group.
  333. *
  334. * @method addAccumulator
  335. * @param fieldName {String} The name of the field where the accumulated value will be placed
  336. * @param accumulatorFactory {Accumulator} The constructor for creating accumulators
  337. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  338. **/
  339. proto.addAccumulator = function addAccumulator(fieldName, accumulatorFactory, expression) {
  340. this.fieldNames.push(fieldName);
  341. this.accumulatorFactories.push(accumulatorFactory);
  342. this.expressions.push(expression);
  343. };
  344. /**
  345. * Makes a document with the given id and accumulators
  346. *
  347. * @method makeDocument
  348. * @param fieldName {String} The name of the field where the accumulated value will be placed
  349. * @param accums {Array} An array of accumulators
  350. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  351. **/
  352. proto.makeDocument = function makeDocument(id, accums, mergeableOutput) {
  353. var out = {};
  354. /* add the _id field */
  355. out._id = this.expandId(id);
  356. /* add the rest of the fields */
  357. this.fieldNames.forEach(function(fieldName, i) {
  358. var val = accums[i].getValue(mergeableOutput);
  359. if (val === undefined) {
  360. out[fieldName] = null;
  361. } else {
  362. out[fieldName] = val;
  363. }
  364. });
  365. return out;
  366. };
  367. /**
  368. * Computes the internal representation of the group key.
  369. *
  370. * @method computeId
  371. * @param vars a VariablesParseState
  372. * @return vals
  373. */
  374. proto.computeId = function computeId(vars) {
  375. var self = this;
  376. // If only one expression return result directly
  377. if (self.idExpressions.length === 1)
  378. return self.idExpressions[0].evaluate(vars); // NOTE: self will probably need to be async soon
  379. // Multiple expressions get results wrapped in an array
  380. var vals = [];
  381. self.idExpressions.forEach(function(expression, i) {
  382. vals.push(expression.evaluate(vars));
  383. });
  384. return vals;
  385. };
  386. /**
  387. * Converts the internal representation of the group key to the _id shape specified by the
  388. * user.
  389. *
  390. * @method expandId
  391. * @param val
  392. * @return document representing an id
  393. */
  394. proto.expandId = function expandId(val) {
  395. var self = this;
  396. // _id doesn't get wrapped in a document
  397. if (self.idFieldNames.length === 0)
  398. return val;
  399. var doc = {};
  400. // _id is a single-field document containing val
  401. if (self.idFieldNames.length === 1) {
  402. doc[self.idFieldNames[0]] = val;
  403. return doc;
  404. }
  405. // _id is a multi-field document containing the elements of val
  406. val.forEach(function(v, i) {
  407. doc[self.idFieldNames[i]] = v;
  408. });
  409. return doc;
  410. };
  411. /**
  412. * Parses the raw id expression into _idExpressions and possibly _idFieldNames.
  413. *
  414. * @method parseIdExpression
  415. * @param groupField {Object} The object with the spec
  416. */
  417. proto.parseIdExpression = function parseIdExpression(groupField, vps) {
  418. var self = this;
  419. if (self._getTypeStr(groupField) === 'Object' && Object.keys(groupField).length !== 0) {
  420. // {_id: {}} is treated as grouping on a constant, not an expression
  421. var idKeyObj = groupField;
  422. if (Object.keys(idKeyObj)[0][0] == '$') {
  423. var objCtx = new Expression.ObjectCtx({});
  424. self.idExpressions.push(Expression.parseObject(idKeyObj, objCtx, vps));
  425. } else {
  426. Object.keys(idKeyObj).forEach(function(key, i) {
  427. var field = {}; //idKeyObj[key];
  428. field[key] = idKeyObj[key];
  429. self.idFieldNames.push(key);
  430. self.idExpressions.push(Expression.parseOperand(field[key], vps));
  431. });
  432. }
  433. } else if (self._getTypeStr(groupField) === 'string' && groupField[0] === '$') {
  434. self.idExpressions.push(FieldPathExpression.parse(groupField, vps));
  435. } else {
  436. self.idExpressions.push(ConstantExpression.create(groupField));
  437. }
  438. };
  439. /**
  440. * Get the type of something. Handles objects specially to return their true type; i.e. their constructor
  441. *
  442. * @method _getTypeStr
  443. * @param obj {Object} The object to get the type of
  444. * @return {String} The type of the object as a string
  445. **/
  446. proto._getTypeStr = function _getTypeStr(obj) {
  447. var typeofStr = typeof obj,
  448. typeStr = (typeofStr == "object" && obj !== null) ? obj.constructor.name : typeofStr;
  449. return typeStr;
  450. };
  451. proto.getShardSource = function getShardSource() {
  452. return this;
  453. };
  454. proto.getMergeSource = function getMergeSource() {
  455. var self = this,
  456. merger = klass.create(this.expCtx);
  457. var idGenerator = new VariablesIdGenerator(),
  458. vps = new VariablesParseState(idGenerator);
  459. merger.idExpressions.push(FieldPathExpression.parse("$$ROOT._id", vps));
  460. for (var i = 0; i < self.fieldNames.length; i++) {
  461. merger.addAccumulator(self.fieldNames[i], self.accumulatorFactories[i], FieldPathExpression.create("$$ROOT." + self.fieldNames[i], vps));
  462. }
  463. return merger;
  464. };