GroupDocumentSource.js 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. "use strict";
  2. var DocumentSource = require("./DocumentSource"),
  3. accumulators = require("../accumulators/"),
  4. Expression = require("../expressions/Expression"),
  5. ConstantExpression = require("../expressions/ConstantExpression"),
  6. FieldPathExpression = require("../expressions/FieldPathExpression"),
  7. Variables = require("../expressions/Variables"),
  8. VariablesIdGenerator = require("../expressions/VariablesIdGenerator"),
  9. VariablesParseState = require("../expressions/VariablesParseState"),
  10. async = require("async");
  11. /**
  12. * A class for grouping documents together
  13. *
  14. * @class GroupDocumentSource
  15. * @namespace mungedb-aggregate.pipeline.documentSources
  16. * @module mungedb-aggregate
  17. * @constructor
  18. * @param [expCtx] {ExpressionContext}
  19. **/
  20. var GroupDocumentSource = module.exports = function GroupDocumentSource(expCtx) {
  21. if (arguments.length > 1) throw new Error("up to one arg expected");
  22. expCtx = !expCtx ? {} : expCtx;
  23. base.call(this, expCtx);
  24. this.populated = false;
  25. this.doingMerge = false;
  26. this.spilled = false;
  27. this.extSortAllowed = expCtx.extSortAllowed && !expCtx.inRouter;
  28. this.accumulatorFactories = [];
  29. this.currentAccumulators = [];
  30. this.groups = {}; // GroupsType Value -> Accumulators[]
  31. this.groupsKeys = []; // This is to faciliate easier look up of groups
  32. this.originalGroupsKeys = [];
  33. this.variables = null;
  34. this.fieldNames = [];
  35. this.idFieldNames = [];
  36. this.expressions = [];
  37. this.idExpressions = [];
  38. this.currentGroupsKeysIndex = 0;
  39. }, klass = GroupDocumentSource, base = DocumentSource, proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}}); //jshint ignore:line
  40. klass.isSplittableDocumentSource = true;
  41. klass.groupOps = {
  42. "$addToSet": accumulators.AddToSetAccumulator.create,
  43. "$avg": accumulators.AvgAccumulator.create,
  44. "$first": accumulators.FirstAccumulator.create,
  45. "$last": accumulators.LastAccumulator.create,
  46. "$max": accumulators.MinMaxAccumulator.createMax, // $min and $max have special constructors because they share base features
  47. "$min": accumulators.MinMaxAccumulator.createMin,
  48. "$push": accumulators.PushAccumulator.create,
  49. "$sum": accumulators.SumAccumulator.create,
  50. };
  51. klass.groupName = "$group";
  52. /**
  53. * Factory for making GroupDocumentSources
  54. *
  55. * @method create
  56. * @static
  57. * @param [expCtx] {ExpressionContext}
  58. **/
  59. klass.create = function create(expCtx) {
  60. return new GroupDocumentSource(expCtx);
  61. };
  62. /**
  63. * Factory for making GroupDocumentSources
  64. *
  65. * @method getSourceName
  66. * @return {GroupDocumentSource}
  67. **/
  68. proto.getSourceName = function getSourceName() {
  69. return klass.groupName;
  70. };
  71. /**
  72. * Gets the next document or null if none
  73. *
  74. * @method getNext
  75. * @return {Object}
  76. **/
  77. proto.getNext = function getNext() {
  78. if (this.expCtx && this.expCtx.checkForInterrupt) this.expCtx.checkForInterrupt();
  79. if (!this.populated)
  80. this.populate();
  81. // NOTE: Skipped the spilled functionality
  82. if (this.spilled) {
  83. throw new Error("Spilled is not implemented.");
  84. } else {
  85. if (this.currentGroupsKeysIndex === this.groupsKeys.length)
  86. return null;
  87. var id = this.originalGroupsKeys[this.currentGroupsKeysIndex],
  88. stringifiedId = this.groupsKeys[this.currentGroupsKeysIndex],
  89. accumulators = this.groups[stringifiedId],
  90. out = this.makeDocument(id, accumulators, this.expCtx.inShard);
  91. if (++this.currentGroupsKeysIndex === this.groupsKeys.length)
  92. this.dispose();
  93. return out;
  94. }
  95. };
  96. /**
  97. * Sets this source as apparently empty
  98. *
  99. * @method dispose
  100. **/
  101. proto.dispose = function dispose() {
  102. //NOTE: Skipped 'freeing' our resources; at best we could remove some refs
  103. // make us look done
  104. this.currentGroupsKeysIndex = this.groupsKeys.length;
  105. // free our source's resources
  106. this.source.dispose();
  107. };
  108. /**
  109. * Optimizes the expressions in the group
  110. * @method optimize
  111. **/
  112. proto.optimize = function optimize() {
  113. // TODO if all _idExpressions are ExpressionConstants after optimization, then we know there
  114. // will only be one group. We should take advantage of that to avoid going through the hash
  115. // table.
  116. var self = this;
  117. self.idExpressions.forEach(function(expression, i) {
  118. self.idExpressions[i] = expression.optimize();
  119. });
  120. self.expressions.forEach(function(expression, i) {
  121. self.expressions[i] = expression.optimize();
  122. });
  123. };
  124. /**
  125. * Create an object that represents the document source. The object
  126. * will have a single field whose name is the source's name.
  127. *
  128. * @method serialize
  129. * @param explain {Boolean} Create explain output
  130. **/
  131. proto.serialize = function serialize(explain) {
  132. var self = this,
  133. insides = {};
  134. // add the _id
  135. if (self.idFieldNames.length === 0) {
  136. if (self.idExpressions.length !== 1) throw new Error("Should only have one _id field");
  137. insides._id = self.idExpressions[0].serialize(explain);
  138. } else {
  139. if (self.idExpressions.length !== self.idFieldNames.length)
  140. throw new Error("Should have the same number of idExpressions and idFieldNames.");
  141. var md = {};
  142. self.idExpressions.forEach(function(expression, i) {
  143. md[self.idFieldNames[i]] = expression.serialize(explain);
  144. });
  145. insides._id = md;
  146. }
  147. //add the remaining fields
  148. var aFacs = self.accumulatorFactories,
  149. aFacLen = aFacs.length;
  150. for(var i=0; i < aFacLen; i++) {
  151. var aFac = new aFacs[i](),
  152. serialExpression = self.expressions[i].serialize(explain), //Get the accumulator's expression
  153. serialAccumulator = {}; //Where we'll put the expression
  154. serialAccumulator[aFac.getOpName()] = serialExpression;
  155. insides[self.fieldNames[i]] = serialAccumulator;
  156. }
  157. var serialSource = {};
  158. serialSource[self.getSourceName()] = insides;
  159. return serialSource;
  160. };
  161. /**
  162. * Creates a GroupDocumentSource from the given elem
  163. *
  164. * @method createFromJson
  165. * @param elem {Object} The group specification object; the right hand side of the $group
  166. **/
  167. klass.createFromJson = function createFromJson(elem, expCtx) { //jshint maxcomplexity:17
  168. if (!(elem instanceof Object && elem.constructor === Object)) throw new Error("a group's fields must be specified in an object");
  169. var group = GroupDocumentSource.create(expCtx),
  170. idSet = false;
  171. var groupObj = elem,
  172. idGenerator = new VariablesIdGenerator(),
  173. vps = new VariablesParseState(idGenerator);
  174. for (var groupFieldName in groupObj) {
  175. if (groupObj.hasOwnProperty(groupFieldName)) {
  176. var groupField = groupObj[groupFieldName];
  177. if (groupFieldName === "_id") {
  178. if(idSet) throw new Error("15948 a group's _id may only be specified once");
  179. group.parseIdExpression(groupField, vps);
  180. idSet = true;
  181. } else if (groupFieldName === "$doingMerge" && groupField) {
  182. throw new Error("17030 $doingMerge should be true if present");
  183. } else {
  184. /*
  185. Treat as a projection field with the additional ability to
  186. add aggregation operators.
  187. */
  188. if (groupFieldName.indexOf(".") !== -1)
  189. throw new Error("the group aggregate field name '" + groupFieldName +
  190. "' cannot be used because $group's field names cannot contain '.'; uassert code 16414");
  191. if (groupFieldName[0] === "$")
  192. throw new Error("the group aggregate field name '" +
  193. groupFieldName + "' cannot be an operator name; uassert 15950");
  194. if (group._getTypeStr(groupFieldName) === "Object")
  195. throw new Error("the group aggregate field '" + groupFieldName +
  196. "' must be defined as an expression inside an object; uassert 15951");
  197. var subElementCount = 0;
  198. for (var subElementName in groupField) {
  199. if (groupField.hasOwnProperty(subElementName)) {
  200. var subElement = groupField[subElementName],
  201. op = klass.groupOps[subElementName];
  202. if (!op) throw new Error("15952 unknown group operator '" + subElementName + "'");
  203. var groupExpression,
  204. subElementTypeStr = group._getTypeStr(subElement);
  205. if (subElementTypeStr === "Object") {
  206. var subElementObjCtx = new Expression.ObjectCtx({isDocumentOk:true});
  207. groupExpression = Expression.parseObject(subElement, subElementObjCtx, vps);
  208. } else if (subElementTypeStr === "Array") {
  209. throw new Error("15953 aggregating group operators are unary (" + subElementName + ")");
  210. } else { /* assume its an atomic single operand */
  211. groupExpression = Expression.parseOperand(subElement, vps);
  212. }
  213. group.addAccumulator(groupFieldName, op, groupExpression);
  214. ++subElementCount;
  215. }
  216. }
  217. if (subElementCount !== 1)
  218. throw new Error("the computed aggregate '" +
  219. groupFieldName + "' must specify exactly one operator; uassert code 15954");
  220. }
  221. }
  222. }
  223. if (!idSet) throw new Error("15955 a group specification must include an _id");
  224. group.variables = new Variables(idGenerator.getIdCount());
  225. return group;
  226. };
  227. /**
  228. * Populates the GroupDocumentSource by grouping all of the input documents at once.
  229. *
  230. * @method populate
  231. * @async
  232. **/
  233. proto.populate = function populate() {
  234. var numAccumulators = this.accumulatorFactories.length;
  235. if (numAccumulators !== this.expressions.length) throw new Error("dassert: Must have equal number of accumulators and expressions");
  236. // SKIPPED: DEVIATION FROM MONGO: spill and mem usage stuff
  237. // pushed to on spill()
  238. //var sortedFiles;
  239. //int memoryUsageBytes = 0;
  240. // This loop consumes all input from pSource and buckets it based on pIdExpression.
  241. var input;
  242. while ((input = this.source.getNext())) {
  243. // SKIPPED: DEVIATION FROM MONGO: mem usage stuff
  244. this.variables.setRoot(input);
  245. // get the _id value
  246. var id = this.computeId(this.variables);
  247. // treat missing values the same as NULL SERVER-4674
  248. if (id === undefined)
  249. id = null;
  250. // Look for the _id value in the map; if it's not there, add a
  251. // new entry with a blank accumulator.
  252. var groupKey = JSON.stringify(id),
  253. group = this.groups[groupKey];
  254. if (!group) {
  255. this.originalGroupsKeys.push(id);
  256. this.groupsKeys.push(groupKey);
  257. group = [];
  258. this.groups[groupKey] = group;
  259. // Add the accumulators
  260. for (var afi = 0, afl = this.accumulatorFactories.length; afi < afl; afi++) {
  261. group.push(new this.accumulatorFactories[afi]());
  262. }
  263. }
  264. // else {
  265. // NOTE: Skipped memory usage stuff for case when group already existed
  266. // }
  267. // tickle all the accumulators for the group we found
  268. if (numAccumulators !== group.length) throw new Error("Group must have one of each accumulator");
  269. for (var gi = 0, gl = group.length; gi < gl; gi++) {
  270. group[gi].process(this.expressions[gi].evaluate(this.variables, this.doingMerge));
  271. }
  272. // We are done with the ROOT document so release it.
  273. this.variables.clearRoot();
  274. //NOTE: DEVIATION: skipped dev stuff
  275. }
  276. //NOTE: DEVIATION: skipped the part about sorted files
  277. this.populated = true;
  278. };
  279. /**
  280. * Get the dependencies of the group
  281. *
  282. * @method getDependencies
  283. * @param deps {Object} The
  284. * @return {DocumentSource.getDepsReturn} An enum value specifying that these dependencies are exhaustive
  285. * @async
  286. **/
  287. proto.getDependencies = function getDependencies(deps) {
  288. var self = this;
  289. // add _id
  290. this.idExpressions.forEach(function(expression, i) {
  291. expression.addDependencies(deps);
  292. });
  293. // add the rest
  294. this.fieldNames.forEach(function (field, i) {
  295. self.expressions[i].addDependencies(deps);
  296. });
  297. return DocumentSource.GetDepsReturn.EXHAUSTIVE_ALL;
  298. };
  299. /**
  300. * Called internally only. Adds an accumulator for each matching group.
  301. *
  302. * @method addAccumulator
  303. * @param fieldName {String} The name of the field where the accumulated value will be placed
  304. * @param accumulatorFactory {Accumulator} The constructor for creating accumulators
  305. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  306. **/
  307. proto.addAccumulator = function addAccumulator(fieldName, accumulatorFactory, expression) {
  308. this.fieldNames.push(fieldName);
  309. this.accumulatorFactories.push(accumulatorFactory);
  310. this.expressions.push(expression);
  311. };
  312. /**
  313. * Makes a document with the given id and accumulators
  314. *
  315. * @method makeDocument
  316. * @param fieldName {String} The name of the field where the accumulated value will be placed
  317. * @param accums {Array} An array of accumulators
  318. * @param epxression {Expression} The expression to be evaluated on incoming documents before they are accumulated
  319. **/
  320. proto.makeDocument = function makeDocument(id, accums, mergeableOutput) {
  321. var out = {};
  322. /* add the _id field */
  323. out._id = this.expandId(id);
  324. /* add the rest of the fields */
  325. this.fieldNames.forEach(function(fieldName, i) {
  326. var val = accums[i].getValue(mergeableOutput);
  327. if (val === undefined) {
  328. out[fieldName] = null;
  329. } else {
  330. out[fieldName] = val;
  331. }
  332. });
  333. return out;
  334. };
  335. /**
  336. * Computes the internal representation of the group key.
  337. *
  338. * @method computeId
  339. * @param vars a VariablesParseState
  340. * @return vals
  341. */
  342. proto.computeId = function computeId(vars) {
  343. var self = this;
  344. // If only one expression return result directly
  345. if (self.idExpressions.length === 1)
  346. return self.idExpressions[0].evaluate(vars); // NOTE: self will probably need to be async soon
  347. // Multiple expressions get results wrapped in an array
  348. var vals = [];
  349. self.idExpressions.forEach(function(expression, i) {
  350. vals.push(expression.evaluate(vars));
  351. });
  352. return vals;
  353. };
  354. /**
  355. * Converts the internal representation of the group key to the _id shape specified by the
  356. * user.
  357. *
  358. * @method expandId
  359. * @param val
  360. * @return document representing an id
  361. */
  362. proto.expandId = function expandId(val) {
  363. var self = this;
  364. // _id doesn't get wrapped in a document
  365. if (self.idFieldNames.length === 0)
  366. return val;
  367. var doc = {};
  368. // _id is a single-field document containing val
  369. if (self.idFieldNames.length === 1) {
  370. doc[self.idFieldNames[0]] = val;
  371. return doc;
  372. }
  373. // _id is a multi-field document containing the elements of val
  374. val.forEach(function(v, i) {
  375. doc[self.idFieldNames[i]] = v;
  376. });
  377. return doc;
  378. };
  379. /**
  380. * Parses the raw id expression into _idExpressions and possibly _idFieldNames.
  381. *
  382. * @method parseIdExpression
  383. * @param groupField {Object} The object with the spec
  384. */
  385. proto.parseIdExpression = function parseIdExpression(groupField, vps) {
  386. var self = this;
  387. if (self._getTypeStr(groupField) === "Object" && Object.keys(groupField).length !== 0) {
  388. // {_id: {}} is treated as grouping on a constant, not an expression
  389. var idKeyObj = groupField;
  390. if (Object.keys(idKeyObj)[0][0] === "$") {
  391. var objCtx = new Expression.ObjectCtx({});
  392. self.idExpressions.push(Expression.parseObject(idKeyObj, objCtx, vps));
  393. } else {
  394. Object.keys(idKeyObj).forEach(function(key, i) {
  395. var field = {}; //idKeyObj[key];
  396. field[key] = idKeyObj[key];
  397. self.idFieldNames.push(key);
  398. self.idExpressions.push(Expression.parseOperand(field[key], vps));
  399. });
  400. }
  401. } else if (self._getTypeStr(groupField) === "string" && groupField[0] === "$") {
  402. self.idExpressions.push(FieldPathExpression.parse(groupField, vps));
  403. } else {
  404. self.idExpressions.push(ConstantExpression.create(groupField));
  405. }
  406. };
  407. /**
  408. * Get the type of something. Handles objects specially to return their true type; i.e. their constructor
  409. *
  410. * @method _getTypeStr
  411. * @param obj {Object} The object to get the type of
  412. * @return {String} The type of the object as a string
  413. **/
  414. proto._getTypeStr = function _getTypeStr(obj) {
  415. var typeofStr = typeof obj,
  416. typeStr = (typeofStr === "object" && obj !== null) ? obj.constructor.name : typeofStr;
  417. return typeStr;
  418. };
  419. proto.getShardSource = function getShardSource() {
  420. return this;
  421. };
  422. proto.getMergeSource = function getMergeSource() {
  423. var self = this,
  424. merger = klass.create(this.expCtx);
  425. var idGenerator = new VariablesIdGenerator(),
  426. vps = new VariablesParseState(idGenerator);
  427. merger.idExpressions.push(FieldPathExpression.parse("$$ROOT._id", vps));
  428. for (var i = 0; i < self.fieldNames.length; i++) {
  429. merger.addAccumulator(
  430. self.fieldNames[i], self.accumulatorFactories[i],
  431. FieldPathExpression.create("$$ROOT." + self.fieldNames[i], vps)
  432. );
  433. }
  434. return merger;
  435. };