GroupDocumentSource.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. "use strict";
  2. var DocumentSource = require("./DocumentSource"),
  3. accumulators = require("../accumulators/"),
  4. Expression = require("../expressions/Expression"),
  5. ConstantExpression = require("../expressions/ConstantExpression"),
  6. FieldPathExpression = require("../expressions/FieldPathExpression"),
  7. Variables = require("../expressions/Variables"),
  8. VariablesIdGenerator = require("../expressions/VariablesIdGenerator"),
  9. VariablesParseState = require("../expressions/VariablesParseState"),
  10. async = require("async");
  11. /**
  12. * A class for grouping documents together
  13. *
  14. * @class GroupDocumentSource
  15. * @namespace mungedb-aggregate.pipeline.documentSources
  16. * @module mungedb-aggregate
  17. * @constructor
  18. * @param [expCtx] {ExpressionContext}
  19. **/
  20. var GroupDocumentSource = module.exports = function GroupDocumentSource(expCtx) {
  21. if (arguments.length > 1) throw new Error("up to one arg expected");
  22. expCtx = !expCtx ? {} : expCtx;
  23. base.call(this, expCtx);
  24. this.populated = false;
  25. this.doingMerge = false;
  26. this.spilled = false;
  27. this.extSortAllowed = expCtx.extSortAllowed && !expCtx.inRouter;
  28. this.accumulatorFactories = [];
  29. this.currentAccumulators = [];
  30. this.groups = {}; // GroupsType Value -> Accumulators[]
  31. this.groupsKeys = []; // This is to faciliate easier look up of groups
  32. this.originalGroupsKeys = [];
  33. this.variables = null;
  34. this.fieldNames = [];
  35. this.idFieldNames = [];
  36. this.expressions = [];
  37. this.idExpressions = [];
  38. this.currentGroupsKeysIndex = 0;
  39. }, klass = GroupDocumentSource, base = DocumentSource, proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}}); //jshint ignore:line
  40. klass.isSplittableDocumentSource = true;
  41. // TODO: Do we need this?
  42. klass.groupOps = {
  43. "$addToSet": accumulators.AddToSetAccumulator.create,
  44. "$avg": accumulators.AvgAccumulator.create,
  45. "$first": accumulators.FirstAccumulator.create,
  46. "$last": accumulators.LastAccumulator.create,
  47. "$max": accumulators.MinMaxAccumulator.createMax, // $min and $max have special constructors because they share base features
  48. "$min": accumulators.MinMaxAccumulator.createMin,
  49. "$push": accumulators.PushAccumulator.create,
  50. "$sum": accumulators.SumAccumulator.create,
  51. };
  52. klass.groupName = "$group";
  53. /**
  54. * Factory for making GroupDocumentSources
  55. *
  56. * @method create
  57. * @static
  58. * @param [expCtx] {ExpressionContext}
  59. **/
  60. klass.create = function create(expCtx) {
  61. return new GroupDocumentSource(expCtx);
  62. };
  63. /**
  64. * Factory for making GroupDocumentSources
  65. *
  66. * @method getSourceName
  67. * @return {GroupDocumentSource}
  68. **/
  69. proto.getSourceName = function getSourceName() {
  70. return klass.groupName;
  71. };
  72. /**
  73. * Gets the next document or null if none
  74. *
  75. * @method getNext
  76. * @return {Object}
  77. **/
  78. proto.getNext = function getNext(callback) {
  79. if (!callback) throw new Error(this.getSourceName() + " #getNext() requires callback.");
  80. if (this.expCtx.checkForInterrupt && this.expCtx.checkForInterrupt() === false)
  81. return callback(new Error("Interrupted"));
  82. var self = this;
  83. async.series([
  84. function(next) {
  85. if (!self.populated)
  86. self.populate(function(err) {
  87. return next(err);
  88. });
  89. else
  90. return next();
  91. },
  92. function(next) {
  93. // NOTE: Skipped the spilled functionality
  94. if (self.spilled) {
  95. throw new Error("Spilled is not implemented.");
  96. } else {
  97. if(self.currentGroupsKeysIndex === self.groupsKeys.length) {
  98. return next(null, null);
  99. }
  100. var out;
  101. try {
  102. var id = self.originalGroupsKeys[self.currentGroupsKeysIndex],
  103. stringifiedId = self.groupsKeys[self.currentGroupsKeysIndex],
  104. accumulators = self.groups[stringifiedId];
  105. out = self.makeDocument(id, accumulators, self.expCtx.inShard);
  106. if(++self.currentGroupsKeysIndex === self.groupsKeys.length) {
  107. self.dispose();
  108. }
  109. } catch (ex) {
  110. return next(ex);
  111. }
  112. return next(null, out);
  113. }
  114. }
  115. ], function(err, results) {
  116. callback(err, results[1]);
  117. });
  118. };
  119. /**
  120. * Sets this source as apparently empty
  121. *
  122. * @method dispose
  123. **/
  124. proto.dispose = function dispose() {
  125. //NOTE: Skipped 'freeing' our resources; at best we could remove some refs
  126. // make us look done
  127. this.currentGroupsKeysIndex = this.groupsKeys.length;
  128. // free our source's resources
  129. this.source.dispose();
  130. };
  131. /**
  132. * Optimizes the expressions in the group
  133. * @method optimize
  134. **/
  135. proto.optimize = function optimize() {
  136. // TODO if all _idExpressions are ExpressionConstants after optimization, then we know there
  137. // will only be one group. We should take advantage of that to avoid going through the hash
  138. // table.
  139. var self = this;
  140. self.idExpressions.forEach(function(expression, i) {
  141. self.idExpressions[i] = expression.optimize();
  142. });
  143. self.expressions.forEach(function(expression, i) {
  144. self.expressions[i] = expression.optimize();
  145. });
  146. };
  147. /**
  148. * Create an object that represents the document source. The object
  149. * will have a single field whose name is the source's name.
  150. *
  151. * @method serialize
  152. * @param explain {Boolean} Create explain output
  153. **/
  154. proto.serialize = function serialize(explain) {
  155. var self = this,
  156. insides = {};
  157. // add the _id
  158. if (self.idFieldNames.length === 0) {
  159. if (self.idExpressions.length !== 1) throw new Error("Should only have one _id field");
  160. insides._id = self.idExpressions[0].serialize(explain);
  161. } else {
  162. if (self.idExpressions.length !== self.idFieldNames.length)
  163. throw new Error("Should have the same number of idExpressions and idFieldNames.");
  164. var md = {};
  165. self.idExpressions.forEach(function(expression, i) {
  166. md[self.idFieldNames[i]] = expression.serialize(explain);
  167. });
  168. insides._id = md;
  169. }
  170. //add the remaining fields
  171. var aFacs = self.accumulatorFactories,
  172. aFacLen = aFacs.length;
  173. for(var i=0; i < aFacLen; i++) {
  174. var aFac = new aFacs[i](),
  175. serialExpression = self.expressions[i].serialize(explain), //Get the accumulator's expression
  176. serialAccumulator = {}; //Where we'll put the expression
  177. serialAccumulator[aFac.getOpName()] = serialExpression;
  178. insides[self.fieldNames[i]] = serialAccumulator;
  179. }
  180. var serialSource = {};
  181. serialSource[self.getSourceName()] = insides;
  182. return serialSource;
  183. };
  184. /**
  185. * Creates a GroupDocumentSource from the given elem
  186. *
  187. * @method createFromJson
  188. * @param elem {Object} The group specification object; the right hand side of the $group
  189. **/
  190. klass.createFromJson = function createFromJson(elem, expCtx) { //jshint maxcomplexity:17
  191. if (!(elem instanceof Object && elem.constructor === Object)) throw new Error("a group's fields must be specified in an object");
  192. var group = GroupDocumentSource.create(expCtx),
  193. idSet = false;
  194. var groupObj = elem,
  195. idGenerator = new VariablesIdGenerator(),
  196. vps = new VariablesParseState(idGenerator);
  197. for (var groupFieldName in groupObj) {
  198. if (groupObj.hasOwnProperty(groupFieldName)) {
  199. var groupField = groupObj[groupFieldName];
  200. if (groupFieldName === "_id") {
  201. if(idSet) throw new Error("15948 a group's _id may only be specified once");
  202. group.parseIdExpression(groupField, vps);
  203. idSet = true;
  204. } else if (groupFieldName === "$doingMerge" && groupField) {
  205. throw new Error("17030 $doingMerge should be true if present");
  206. } else {
  207. /*
  208. Treat as a projection field with the additional ability to
  209. add aggregation operators.
  210. */
  211. if (groupFieldName.indexOf(".") !== -1)
  212. throw new Error("the group aggregate field name '" + groupFieldName +
  213. "' cannot be used because $group's field names cannot contain '.'; uassert code 16414");
  214. if (groupFieldName[0] === "$")
  215. throw new Error("the group aggregate field name '" +
  216. groupFieldName + "' cannot be an operator name; uassert 15950");
  217. if (group._getTypeStr(groupFieldName) === "Object")
  218. throw new Error("the group aggregate field '" + groupFieldName +
  219. "' must be defined as an expression inside an object; uassert 15951");
  220. var subElementCount = 0;
  221. for (var subElementName in groupField) {
  222. if (groupField.hasOwnProperty(subElementName)) {
  223. var subElement = groupField[subElementName],
  224. op = klass.groupOps[subElementName];
  225. if (!op) throw new Error("15952 unknown group operator '" + subElementName + "'");
  226. var groupExpression,
  227. subElementTypeStr = group._getTypeStr(subElement);
  228. if (subElementTypeStr === "Object") {
  229. var subElementObjCtx = new Expression.ObjectCtx({isDocumentOk:true});
  230. groupExpression = Expression.parseObject(subElement, subElementObjCtx, vps);
  231. } else if (subElementTypeStr === "Array") {
  232. throw new Error("15953 aggregating group operators are unary (" + subElementName + ")");
  233. } else { /* assume its an atomic single operand */
  234. groupExpression = Expression.parseOperand(subElement, vps);
  235. }
  236. group.addAccumulator(groupFieldName, op, groupExpression);
  237. ++subElementCount;
  238. }
  239. }
  240. if (subElementCount !== 1)
  241. throw new Error("the computed aggregate '" +
  242. groupFieldName + "' must specify exactly one operator; uassert code 15954");
  243. }
  244. }
  245. }
  246. if (!idSet) throw new Error("15955 a group specification must include an _id");
  247. group.variables = new Variables(idGenerator.getIdCount());
  248. return group;
  249. };
  250. /**
  251. * Populates the GroupDocumentSource by grouping all of the input documents at once.
  252. *
  253. * @method populate
  254. * @param callback {Function} Required. callback(err) when done populating.
  255. * @async
  256. **/
  257. proto.populate = function populate(callback) {
  258. var numAccumulators = this.accumulatorFactories.length;
  259. // NOTE: this is not in mongo, does it belong here?
  260. if(numAccumulators !== this.expressions.length) {
  261. callback(new Error("Must have equal number of accumulators and expressions"));
  262. }
  263. var input,
  264. self = this;
  265. async.whilst(
  266. function() {
  267. return input !== null;
  268. },
  269. function(cb) {
  270. self.source.getNext(function(err, doc) {
  271. if(err) return cb(err);
  272. if(doc === null) {
  273. input = doc;
  274. return cb(); //Need to stop now, no new input
  275. }
  276. try {
  277. input = doc;
  278. self.variables.setRoot(input);
  279. /* get the _id value */
  280. var id = self.computeId(self.variables);
  281. if(undefined === id) id = null;
  282. var groupKey = JSON.stringify(id),
  283. group = self.groups[groupKey];
  284. if(!group) {
  285. self.originalGroupsKeys.push(id);
  286. self.groupsKeys.push(groupKey);
  287. group = [];
  288. self.groups[groupKey] = group;
  289. // Add the accumulators
  290. for(var afi = 0; afi<self.accumulatorFactories.length; afi++) {
  291. group.push(new self.accumulatorFactories[afi]());
  292. }
  293. }
  294. //NOTE: Skipped memory usage stuff for case when group already existed
  295. if(numAccumulators !== group.length) {
  296. throw new Error("Group must have one of each accumulator");
  297. }
  298. //NOTE: passing the input to each accumulator
  299. for(var gi=0; gi<group.length; gi++) {
  300. group[gi].process(self.expressions[gi].evaluate(self.variables, self.doingMerge));
  301. }
  302. // We are done with the ROOT document so release it.
  303. self.variables.clearRoot();
  304. //NOTE: Skipped the part about sorted files
  305. } catch (ex) {
  306. return cb(ex);
  307. }
  308. return cb();
  309. });
  310. },
  311. function(err) {
  312. if(err) return callback(err);
  313. self.populated = true;
  314. return callback();
  315. }
  316. );
  317. };
  318. /**
  319. * Get the dependencies of the group
  320. *
  321. * @method getDependencies
  322. * @param deps {Object} The
  323. * @return {DocumentSource.getDepsReturn} An enum value specifying that these dependencies are exhaustive
  324. * @async
  325. **/
  326. proto.getDependencies = function getDependencies(deps) {
  327. var self = this;
  328. // add _id
  329. this.idExpressions.forEach(function(expression, i) {
  330. expression.addDependencies(deps);
  331. });
  332. // add the rest
  333. this.fieldNames.forEach(function (field, i) {
  334. self.expressions[i].addDependencies(deps);
  335. });
  336. return DocumentSource.GetDepsReturn.EXHAUSTIVE_ALL;
  337. };
  338. /**
  339. * Called internally only. Adds an accumulator for each matching group.
  340. *
  341. * @method addAccumulator
  342. * @param fieldName {String} The name of the field where the accumulated value will be placed
  343. * @param accumulatorFactory {Accumulator} The constructor for creating accumulators
  344. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  345. **/
  346. proto.addAccumulator = function addAccumulator(fieldName, accumulatorFactory, expression) {
  347. this.fieldNames.push(fieldName);
  348. this.accumulatorFactories.push(accumulatorFactory);
  349. this.expressions.push(expression);
  350. };
  351. /**
  352. * Makes a document with the given id and accumulators
  353. *
  354. * @method makeDocument
  355. * @param fieldName {String} The name of the field where the accumulated value will be placed
  356. * @param accums {Array} An array of accumulators
  357. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  358. **/
  359. proto.makeDocument = function makeDocument(id, accums, mergeableOutput) {
  360. var out = {};
  361. /* add the _id field */
  362. out._id = this.expandId(id);
  363. /* add the rest of the fields */
  364. this.fieldNames.forEach(function(fieldName, i) {
  365. var val = accums[i].getValue(mergeableOutput);
  366. if (val === undefined) {
  367. out[fieldName] = null;
  368. } else {
  369. out[fieldName] = val;
  370. }
  371. });
  372. return out;
  373. };
  374. /**
  375. * Computes the internal representation of the group key.
  376. *
  377. * @method computeId
  378. * @param vars a VariablesParseState
  379. * @return vals
  380. */
  381. proto.computeId = function computeId(vars) {
  382. var self = this;
  383. // If only one expression return result directly
  384. if (self.idExpressions.length === 1)
  385. return self.idExpressions[0].evaluate(vars); // NOTE: self will probably need to be async soon
  386. // Multiple expressions get results wrapped in an array
  387. var vals = [];
  388. self.idExpressions.forEach(function(expression, i) {
  389. vals.push(expression.evaluate(vars));
  390. });
  391. return vals;
  392. };
  393. /**
  394. * Converts the internal representation of the group key to the _id shape specified by the
  395. * user.
  396. *
  397. * @method expandId
  398. * @param val
  399. * @return document representing an id
  400. */
  401. proto.expandId = function expandId(val) {
  402. var self = this;
  403. // _id doesn't get wrapped in a document
  404. if (self.idFieldNames.length === 0)
  405. return val;
  406. var doc = {};
  407. // _id is a single-field document containing val
  408. if (self.idFieldNames.length === 1) {
  409. doc[self.idFieldNames[0]] = val;
  410. return doc;
  411. }
  412. // _id is a multi-field document containing the elements of val
  413. val.forEach(function(v, i) {
  414. doc[self.idFieldNames[i]] = v;
  415. });
  416. return doc;
  417. };
  418. /**
  419. * Parses the raw id expression into _idExpressions and possibly _idFieldNames.
  420. *
  421. * @method parseIdExpression
  422. * @param groupField {Object} The object with the spec
  423. */
  424. proto.parseIdExpression = function parseIdExpression(groupField, vps) {
  425. var self = this;
  426. if (self._getTypeStr(groupField) === "Object" && Object.keys(groupField).length !== 0) {
  427. // {_id: {}} is treated as grouping on a constant, not an expression
  428. var idKeyObj = groupField;
  429. if (Object.keys(idKeyObj)[0][0] === "$") {
  430. var objCtx = new Expression.ObjectCtx({});
  431. self.idExpressions.push(Expression.parseObject(idKeyObj, objCtx, vps));
  432. } else {
  433. Object.keys(idKeyObj).forEach(function(key, i) {
  434. var field = {}; //idKeyObj[key];
  435. field[key] = idKeyObj[key];
  436. self.idFieldNames.push(key);
  437. self.idExpressions.push(Expression.parseOperand(field[key], vps));
  438. });
  439. }
  440. } else if (self._getTypeStr(groupField) === "string" && groupField[0] === "$") {
  441. self.idExpressions.push(FieldPathExpression.parse(groupField, vps));
  442. } else {
  443. self.idExpressions.push(ConstantExpression.create(groupField));
  444. }
  445. };
  446. /**
  447. * Get the type of something. Handles objects specially to return their true type; i.e. their constructor
  448. *
  449. * @method _getTypeStr
  450. * @param obj {Object} The object to get the type of
  451. * @return {String} The type of the object as a string
  452. **/
  453. proto._getTypeStr = function _getTypeStr(obj) {
  454. var typeofStr = typeof obj,
  455. typeStr = (typeofStr === "object" && obj !== null) ? obj.constructor.name : typeofStr;
  456. return typeStr;
  457. };
  458. proto.getShardSource = function getShardSource() {
  459. return this;
  460. };
  461. proto.getMergeSource = function getMergeSource() {
  462. var self = this,
  463. merger = klass.create(this.expCtx);
  464. var idGenerator = new VariablesIdGenerator(),
  465. vps = new VariablesParseState(idGenerator);
  466. merger.idExpressions.push(FieldPathExpression.parse("$$ROOT._id", vps));
  467. for (var i = 0; i < self.fieldNames.length; i++) {
  468. merger.addAccumulator(
  469. self.fieldNames[i], self.accumulatorFactories[i],
  470. FieldPathExpression.create("$$ROOT." + self.fieldNames[i], vps)
  471. );
  472. }
  473. return merger;
  474. };