|
|
@@ -1,5 +1,5 @@
|
|
|
"use strict";
|
|
|
-
|
|
|
+var async = require('async');
|
|
|
/**
|
|
|
* mongodb "commands" (sent via db.$cmd.findOne(...)) subclass to make a command. define a singleton object for it.
|
|
|
* @class Pipeline
|
|
|
@@ -13,7 +13,6 @@ var Pipeline = module.exports = function Pipeline(theCtx){
|
|
|
this.explain = false;
|
|
|
this.splitMongodPipeline = false;
|
|
|
this.ctx = theCtx;
|
|
|
- this.SYNC_MODE = false;
|
|
|
}, klass = Pipeline, base = Object, proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}});
|
|
|
|
|
|
var DocumentSource = require("./documentSources/DocumentSource"),
|
|
|
@@ -26,7 +25,8 @@ var DocumentSource = require("./documentSources/DocumentSource"),
|
|
|
OutDocumentSource = require('./documentSources/OutDocumentSource'),
|
|
|
GeoNearDocumentSource = require('./documentSources/GeoNearDocumentSource'),
|
|
|
RedactDocumentSource = require('./documentSources/RedactDocumentSource'),
|
|
|
- SortDocumentSource = require('./documentSources/SortDocumentSource');
|
|
|
+ SortDocumentSource = require('./documentSources/SortDocumentSource'),
|
|
|
+ DepsTracker = require('./DepsTracker');
|
|
|
|
|
|
klass.COMMAND_NAME = "aggregate";
|
|
|
klass.PIPELINE_NAME = "pipeline";
|
|
|
@@ -50,13 +50,14 @@ klass.nStageDesc = Object.keys(klass.stageDesc).length;
|
|
|
|
|
|
klass.optimizations = {};
|
|
|
klass.optimizations.local = {};
|
|
|
+klass.optimizations.sharded = {};
|
|
|
|
|
|
/**
|
|
|
* Moves $match before $sort when they are placed next to one another
|
|
|
* @static
|
|
|
* @method moveMatchBeforeSort
|
|
|
* @param pipelineInst An instance of a Pipeline
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.optimizations.local.moveMatchBeforeSort = function moveMatchBeforeSort(pipelineInst) {
|
|
|
var sources = pipelineInst.sources;
|
|
|
for(var srcn = sources.length, srci = 1; srci < srcn; ++srci) {
|
|
|
@@ -77,7 +78,7 @@ klass.optimizations.local.moveMatchBeforeSort = function moveMatchBeforeSort(pip
|
|
|
* @static
|
|
|
* @method moveLimitBeforeSkip
|
|
|
* @param pipelineInst An instance of a Pipeline
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.optimizations.local.moveLimitBeforeSkip = function moveLimitBeforeSkip(pipelineInst) {
|
|
|
var sources = pipelineInst.sources;
|
|
|
if(sources.length === 0) return;
|
|
|
@@ -110,7 +111,7 @@ klass.optimizations.local.moveLimitBeforeSkip = function moveLimitBeforeSkip(pip
|
|
|
* @static
|
|
|
* @method coalesceAdjacent
|
|
|
* @param pipelineInst An instance of a Pipeline
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.optimizations.local.coalesceAdjacent = function coalesceAdjacent(pipelineInst) {
|
|
|
var sources = pipelineInst.sources;
|
|
|
if(sources.length === 0) return;
|
|
|
@@ -147,7 +148,7 @@ klass.optimizations.local.coalesceAdjacent = function coalesceAdjacent(pipelineI
|
|
|
* @static
|
|
|
* @method optimizeEachDocumentSource
|
|
|
* @param pipelineInst An instance of a Pipeline
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.optimizations.local.optimizeEachDocumentSource = function optimizeEachDocumentSource(pipelineInst) {
|
|
|
var sources = pipelineInst.sources;
|
|
|
for(var srci = 0, srcn = sources.length; srci < srcn; ++srci) {
|
|
|
@@ -160,7 +161,7 @@ klass.optimizations.local.optimizeEachDocumentSource = function optimizeEachDocu
|
|
|
* @static
|
|
|
* @method duplicateMatchBeforeInitalRedact
|
|
|
* @param pipelineInst An instance of a Pipeline
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.optimizations.local.duplicateMatchBeforeInitalRedact = function duplicateMatchBeforeInitalRedact(pipelineInst) {
|
|
|
var sources = pipelineInst.sources;
|
|
|
if(sources.length >= 2 && sources[0].constructor === RedactDocumentSource) {
|
|
|
@@ -174,6 +175,106 @@ klass.optimizations.local.duplicateMatchBeforeInitalRedact = function duplicateM
|
|
|
}
|
|
|
};
|
|
|
|
|
|
+//SKIPPED: addRequiredPrivileges
|
|
|
+
|
|
|
+/**
|
|
|
+ * Perform optimizations for a pipeline through sharding
|
|
|
+ * @method splitForSharded
|
|
|
+ */
|
|
|
+proto.splitForSharded = function splitForSharded() {
|
|
|
+ var shardPipeline = new Pipeline({});
|
|
|
+ shardPipeline.explain = this.explain;
|
|
|
+
|
|
|
+ klass.optimizations.sharded.findSplitPoint(shardPipeline, this);
|
|
|
+ klass.optimizations.sharded.moveFinalUnwindFromShardsToMerger(shardPipeline, this);
|
|
|
+ //klass.optimizations.sharded.limitFieldsSentFromShardsToMerger(shardPipeline, this);
|
|
|
+ return shardPipeline;
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * Split the source into Merge sources and Shard sources
|
|
|
+ * @static
|
|
|
+ * @method findSplitPoint
|
|
|
+ * @param shardPipe Shard sources
|
|
|
+ * @param mergePipe Merge sources
|
|
|
+ */
|
|
|
+klass.optimizations.sharded.findSplitPoint = function findSplitPoint(shardPipe, mergePipe) {
|
|
|
+ while(mergePipe.sources.length > 0) {
|
|
|
+ var current = mergePipe.sources[0];
|
|
|
+ mergePipe.sources.splice(0, 1);
|
|
|
+
|
|
|
+ if (current.isSplittable && current.isSplittable()) {
|
|
|
+ var shardSource = current.getShardSource(),
|
|
|
+ mergeSource = current.getMergeSource();
|
|
|
+ if (typeof shardSource != "undefined") { shardPipe.sources.push(shardSource); } //push_back
|
|
|
+ if (typeof mergeSource != "undefined") { mergePipe.sources.unshift(mergeSource); } //push_front
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ debugger
|
|
|
+ if (!shardPipe.sources) { shardPipe.sources = []; }
|
|
|
+ shardPipe.sources.push(current);
|
|
|
+ }
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * Optimize pipeline through moving unwind to the end
|
|
|
+ * @static
|
|
|
+ * @method moveFinalUnwindFromShardsToMerger
|
|
|
+ * @param shardPipe shard sources
|
|
|
+ * @param mergePipe merge sources
|
|
|
+ */
|
|
|
+klass.optimizations.sharded.moveFinalUnwindFromShardsToMerger = function moveFinalUnwindFromShardsToMerger(shardPipe, mergePipe) {
|
|
|
+ while((shardPipe.sources != null) && (!shardPipe.sources.length > 0
|
|
|
+ && shardPipe.sources[length-1].constructor === UnwindDocumentSource)) {
|
|
|
+ mergePipe.sources.unshift(shardPipe.sources[length-1]);
|
|
|
+ shardPipe.sources.pop();
|
|
|
+ }
|
|
|
+};
|
|
|
+
|
|
|
+//SKIPPED: optimizations.sharded.limitFieldsSentFromShardsToMerger. Somehow what this produces is not handled by Expression.js (err 16404)
|
|
|
+/**
|
|
|
+ * Optimize pipeline by adding $project stage if shard fields are not exhaustive
|
|
|
+ * @static
|
|
|
+ * @method limitFieldsSentFromShardsToMerger
|
|
|
+ * @param shardPipe shard sources
|
|
|
+ * @param mergePipe merge sources
|
|
|
+ */
|
|
|
+// klass.optimizations.sharded.limitFieldsSentFromShardsToMerger = function limitFieldsSentFromShardsToMerger(shardPipe, mergePipe) {
|
|
|
+// var mergeDeps = mergePipe.getDependencies(shardPipe.getInitialQuery());
|
|
|
+// if (mergeDeps.needWholeDocument) {
|
|
|
+// return;
|
|
|
+// }
|
|
|
+// if (mergeDeps.fields == null) {
|
|
|
+// mergeDeps.fields = {};
|
|
|
+// }
|
|
|
+// if (mergeDeps.fields.length == 0) {
|
|
|
+// mergeDeps.fields["_id"] = 0;
|
|
|
+// }
|
|
|
+// if (shardPipe.sources == null) {
|
|
|
+// shardPipe.sources = {};
|
|
|
+// }
|
|
|
+// //NOTE: Deviation from Mongo: not setting mergeDeps.needTextScore because we aren't handling that (Document meta stuff)
|
|
|
+
|
|
|
+// // HEURISTIC: only apply optimization if none of the shard stages have an exhaustive list of
|
|
|
+// // field dependencies. While this may not be 100% ideal in all cases, it is simple and
|
|
|
+// // avoids the worst cases by ensuring that:
|
|
|
+// // 1) Optimization IS applied when the shards wouldn't have known their exhaustive list of
|
|
|
+// // dependencies. This situation can happen when a $sort is before the first $project or
|
|
|
+// // $group. Without the optimization, the shards would have to reify and transmit full
|
|
|
+// // objects even though only a subset of fields are needed.
|
|
|
+// // 2) Optimization IS NOT applied immediately following a $project or $group since it would
|
|
|
+// // add an unnecessary project (and therefore a deep-copy).
|
|
|
+// for (var i = 0; i < shardPipe.sources.length; i++) {
|
|
|
+// if (shardPipe.sources.getDependencies() & DocumentSource.GetDepsReturn.EXHAUSTIVE_FIELDS)
|
|
|
+// return;
|
|
|
+// }
|
|
|
+
|
|
|
+// // if we get here, add the project.
|
|
|
+// shardPipe.sources.push(ProjectDocumentSource.createFromJson({$project: mergeDeps.toProjection()[0]}, shardPipe.ctx));
|
|
|
+// };
|
|
|
+
|
|
|
/**
|
|
|
* Create an `Array` of `DocumentSource`s from the given JSON pipeline
|
|
|
* // NOTE: DEVIATION FROM MONGO: split out into a separate function to better allow extensions (was in parseCommand)
|
|
|
@@ -181,7 +282,7 @@ klass.optimizations.local.duplicateMatchBeforeInitalRedact = function duplicateM
|
|
|
* @method parseDocumentSources
|
|
|
* @param pipeline {Array} The JSON pipeline
|
|
|
* @returns {Array} The parsed `DocumentSource`s
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.parseDocumentSources = function parseDocumentSources(pipeline, ctx){
|
|
|
var sources = [];
|
|
|
for (var nSteps = pipeline.length, iStep = 0; iStep < nSteps; ++iStep) {
|
|
|
@@ -197,7 +298,7 @@ klass.parseDocumentSources = function parseDocumentSources(pipeline, ctx){
|
|
|
|
|
|
// Create a DocumentSource pipeline stage from 'stageSpec'.
|
|
|
var desc = klass.stageDesc[stageName];
|
|
|
- if (!desc) throw new Error("Unrecognized pipeline stage name: '" + stageName + "'; code 16435");
|
|
|
+ if (!desc) throw new Error("Unrecognized pipeline stage name: '" + stageName + "'; uassert code 16436");
|
|
|
|
|
|
// Parse the stage
|
|
|
var stage = desc(stageSpec, ctx);
|
|
|
@@ -205,7 +306,7 @@ klass.parseDocumentSources = function parseDocumentSources(pipeline, ctx){
|
|
|
sources.push(stage);
|
|
|
|
|
|
if(stage.constructor === OutDocumentSource && iStep !== nSteps - 1) {
|
|
|
- throw new Error("$out can only be the final stage in the pipeline; code 16435");
|
|
|
+ throw new Error("$out can only be the final stage in the pipeline; code 16991");
|
|
|
}
|
|
|
}
|
|
|
return sources;
|
|
|
@@ -223,7 +324,7 @@ klass.parseDocumentSources = function parseDocumentSources(pipeline, ctx){
|
|
|
* @param cmdObj.splitMongodPipeline {Boolean} should split?
|
|
|
* @param ctx {Object} Not used yet in mungedb-aggregate
|
|
|
* @returns {Array} the pipeline, if created, otherwise a NULL reference
|
|
|
- **/
|
|
|
+ */
|
|
|
klass.parseCommand = function parseCommand(cmdObj, ctx){
|
|
|
var pipelineNamespace = require("./"),
|
|
|
Pipeline = pipelineNamespace.Pipeline, // using require in case Pipeline gets replaced with an extension
|
|
|
@@ -248,10 +349,9 @@ klass.parseCommand = function parseCommand(cmdObj, ctx){
|
|
|
/**
|
|
|
* If we get here, we've harvested the fields we expect for a pipeline
|
|
|
* Set up the specified document source pipeline.
|
|
|
- **/
|
|
|
+ */
|
|
|
// NOTE: DEVIATION FROM MONGO: split this into a separate function to simplify and better allow for extensions (now in parseDocumentSources)
|
|
|
- var sources = pipelineInst.sources = Pipeline.parseDocumentSources(pipeline, ctx);
|
|
|
-
|
|
|
+ pipelineInst.sources = Pipeline.parseDocumentSources(pipeline, ctx);
|
|
|
klass.optimizations.local.moveMatchBeforeSort(pipelineInst);
|
|
|
klass.optimizations.local.moveLimitBeforeSkip(pipelineInst);
|
|
|
klass.optimizations.local.coalesceAdjacent(pipelineInst);
|
|
|
@@ -261,12 +361,6 @@ klass.parseCommand = function parseCommand(cmdObj, ctx){
|
|
|
return pipelineInst;
|
|
|
};
|
|
|
|
|
|
-// sync callback for Pipeline#run if omitted
|
|
|
-klass.SYNC_CALLBACK = function(err, results){
|
|
|
- if (err) throw err;
|
|
|
- return results.result;
|
|
|
-};
|
|
|
-
|
|
|
function ifError(err) {
|
|
|
if (err) throw err;
|
|
|
}
|
|
|
@@ -277,7 +371,7 @@ function ifError(err) {
|
|
|
* @param inputSource {DocumentSource} The input document source for the pipeline
|
|
|
* @param [callback] {Function} Optional callback function if using async extensions
|
|
|
* @return {Object} An empty object or the match spec
|
|
|
-**/
|
|
|
+ */
|
|
|
proto.getInitialQuery = function getInitialQuery() {
|
|
|
var sources = this.sources;
|
|
|
if(sources.length === 0) {
|
|
|
@@ -297,15 +391,18 @@ proto.getInitialQuery = function getInitialQuery() {
|
|
|
* @param inputSource {DocumentSource} The input document source for the pipeline
|
|
|
* @param [callback] {Function} Optional callback function if using async extensions
|
|
|
* @return {Object} An empty object or the match spec
|
|
|
-**/
|
|
|
+ */
|
|
|
proto.serialize = function serialize() {
|
|
|
var serialized = {},
|
|
|
array = [];
|
|
|
|
|
|
// create an array out of the pipeline operations
|
|
|
- this.sources.forEach(function(source) {
|
|
|
- source.serializeToArray(array);
|
|
|
- });
|
|
|
+ if (this.sources) {
|
|
|
+ for (var i = 0; i < this.sources.length; i++) {
|
|
|
+ //this.sources.forEach(function(source) {
|
|
|
+ this.sources[i].serializeToArray(array);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
serialized[klass.COMMAND_NAME] = this.ctx && this.ctx.ns && this.ctx.ns.coll ? this.ctx.ns.coll : '';
|
|
|
serialized[klass.PIPELINE_NAME] = array;
|
|
|
@@ -318,7 +415,7 @@ proto.serialize = function serialize() {
|
|
|
/**
|
|
|
* Points each source at its previous source
|
|
|
* @method stitch
|
|
|
-**/
|
|
|
+ */
|
|
|
proto.stitch = function stitch() {
|
|
|
if(this.sources.length <= 0) throw new Error("should not have an empty pipeline; massert code 16600");
|
|
|
|
|
|
@@ -334,20 +431,31 @@ proto.stitch = function stitch() {
|
|
|
/**
|
|
|
* Run the pipeline
|
|
|
* @method run
|
|
|
- * @param callback {Function} Optional. Run the pipeline in async mode; callback(err, result)
|
|
|
- * @return result {Object} The result of executing the pipeline
|
|
|
-**/
|
|
|
+ * @param callback {Function} gets called once for each document result from the pipeline
|
|
|
+ */
|
|
|
proto.run = function run(callback) {
|
|
|
// should not get here in the explain case
|
|
|
if(this.explain) throw new Error("Should not be running a pipeline in explain mode!");
|
|
|
-
|
|
|
- /* NOTE: DEVIATION FROM MONGO SOURCE. WE'RE SUPPORTING SYNC AND ASYNC */
|
|
|
- if(this.SYNC_MODE) {
|
|
|
- callback();
|
|
|
- return this._runSync();
|
|
|
- } else {
|
|
|
- return this._runAsync(callback);
|
|
|
- }
|
|
|
+
|
|
|
+ var doc = null,
|
|
|
+ error = null,
|
|
|
+ finalSource = this._getFinalSource();
|
|
|
+
|
|
|
+ async.doWhilst(
|
|
|
+ function iterator(next){
|
|
|
+ return finalSource.getNext(function (err, obj){
|
|
|
+ callback(err, obj);
|
|
|
+ doc = obj;
|
|
|
+ error = err;
|
|
|
+ next();
|
|
|
+ });
|
|
|
+ },
|
|
|
+ function test(){
|
|
|
+ return doc !== null && !error;
|
|
|
+ },
|
|
|
+ function done(err){
|
|
|
+ //nothing to do here
|
|
|
+ });
|
|
|
};
|
|
|
|
|
|
/**
|
|
|
@@ -355,58 +463,16 @@ proto.run = function run(callback) {
|
|
|
* @method _getFinalSource
|
|
|
* @return {Object} The DocumentSource at the end of the pipeline
|
|
|
* @private
|
|
|
-**/
|
|
|
+ */
|
|
|
proto._getFinalSource = function _getFinalSource() {
|
|
|
return this.sources[this.sources.length - 1];
|
|
|
};
|
|
|
|
|
|
-/**
|
|
|
- * Run the pipeline synchronously
|
|
|
- * @method _runSync
|
|
|
- * @return {Object} The results object {result:resultArray}
|
|
|
- * @private
|
|
|
-**/
|
|
|
-proto._runSync = function _runSync(callback) {
|
|
|
- var resultArray = [],
|
|
|
- finalSource = this._getFinalSource(),
|
|
|
- handleErr = function(err) {
|
|
|
- if(err) throw err;
|
|
|
- },
|
|
|
- next;
|
|
|
- while((next = finalSource.getNext(handleErr)) !== DocumentSource.EOF) {
|
|
|
- resultArray.push(next);
|
|
|
- }
|
|
|
- return {result:resultArray};
|
|
|
-};
|
|
|
-
|
|
|
-/**
|
|
|
- * Run the pipeline asynchronously
|
|
|
- * @method _runAsync
|
|
|
- * @param callback {Function} callback(err, resultObject)
|
|
|
- * @private
|
|
|
-**/
|
|
|
-proto._runAsync = function _runAsync(callback) {
|
|
|
- var resultArray = [],
|
|
|
- finalSource = this._getFinalSource(),
|
|
|
- gotNext = function(err, doc) {
|
|
|
- if(err) return callback(err);
|
|
|
- if(doc !== DocumentSource.EOF) {
|
|
|
- resultArray.push(doc);
|
|
|
- return setImmediate(function() { //setImmediate to avoid callstack size issues
|
|
|
- finalSource.getNext(gotNext);
|
|
|
- });
|
|
|
- } else {
|
|
|
- return callback(null, {result:resultArray});
|
|
|
- }
|
|
|
- };
|
|
|
- finalSource.getNext(gotNext);
|
|
|
-};
|
|
|
-
|
|
|
/**
|
|
|
* Get the pipeline explanation
|
|
|
* @method writeExplainOps
|
|
|
* @return {Array} An array of source explanations
|
|
|
-**/
|
|
|
+ */
|
|
|
proto.writeExplainOps = function writeExplainOps() {
|
|
|
var array = [];
|
|
|
this.sources.forEach(function(source) {
|
|
|
@@ -419,7 +485,44 @@ proto.writeExplainOps = function writeExplainOps() {
|
|
|
* Set the source of documents for the pipeline
|
|
|
* @method addInitialSource
|
|
|
* @param source {DocumentSource}
|
|
|
-**/
|
|
|
+ */
|
|
|
proto.addInitialSource = function addInitialSource(source) {
|
|
|
this.sources.unshift(source);
|
|
|
};
|
|
|
+
|
|
|
+//SKIPPED: canRunInMongos
|
|
|
+
|
|
|
+//Note: Deviation from Mongo: Mongo 2.6.5 passes a param to getDependencies
|
|
|
+// to calculate TextScore. mungedb-aggregate doesn't do this, so no param is needed.
|
|
|
+proto.getDependencies = function getDependencies () {
|
|
|
+ var deps = new DepsTracker(),
|
|
|
+ knowAllFields = false;
|
|
|
+
|
|
|
+ //NOTE: Deviation from Mongo -- We aren't using Meta and textscore
|
|
|
+ for (var i=0; i < this.sources.length && !knowAllFields; i++) {
|
|
|
+ var localDeps = new DepsTracker(),
|
|
|
+ status = this.sources[i].getDependencies(localDeps);
|
|
|
+
|
|
|
+ if (status === DocumentSource.GetDepsReturn.NOT_SUPPORTED) {
|
|
|
+ // Assume this stage needs everything. We may still know something about our
|
|
|
+ // dependencies if an earlier stage returned either EXHAUSTIVE_FIELDS or
|
|
|
+ // EXHAUSTIVE_META.
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!knowAllFields) {
|
|
|
+ for (var key in localDeps.fields)
|
|
|
+ deps.fields[key] = localDeps.fields[key];
|
|
|
+
|
|
|
+ if (localDeps.needWholeDocument)
|
|
|
+ deps.needWholeDocument = true;
|
|
|
+ knowAllFields = status & DocumentSource.GetDepsReturn.EXHAUSTIVE_FIELDS;
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!knowAllFields)
|
|
|
+ deps.needWholeDocument = true; // don't know all fields we need
|
|
|
+
|
|
|
+ return deps;
|
|
|
+};
|