CursorDocumentSource.js 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. "use strict";
  2. var async = require('async'),
  3. Value = require('../Value'),
  4. Runner = require('../../query/Runner'),
  5. DocumentSource = require('./DocumentSource'),
  6. LimitDocumentSource = require('./LimitDocumentSource');
  7. /**
  8. * Constructs and returns Documents from the BSONObj objects produced by a supplied Runner.
  9. * An object of this type may only be used by one thread, see SERVER-6123.
  10. *
  11. * This is usually put at the beginning of a chain of document sources
  12. * in order to fetch data from the database.
  13. *
  14. * @class CursorDocumentSource
  15. * @namespace mungedb-aggregate.pipeline.documentSources
  16. * @module mungedb-aggregate
  17. * @constructor
  18. * @param {CursorDocumentSource.CursorWithContext} cursorWithContext the cursor to use to fetch data
  19. **/
  20. var CursorDocumentSource = module.exports = CursorDocumentSource = function CursorDocumentSource(namespace, runner, expCtx){
  21. base.call(this, expCtx);
  22. this._docsAddedToBatches = 0;
  23. this._ns = namespace;
  24. this._runner = runner;
  25. this._firstRun = true;
  26. }, klass = CursorDocumentSource, base = DocumentSource, proto = klass.prototype = Object.create(base.prototype, {constructor:{value:klass}});
  27. klass.MaxDocumentsToReturnToClientAtOnce = 150; //DEVIATION: we are using documents instead of bytes
  28. klass.create = function create(ns, runner, expCtx) {
  29. return new CursorDocumentSource(ns, runner, expCtx);
  30. };
  31. proto._currentBatch = [];
  32. proto._currentBatchIndex = 0;
  33. // BSONObj members must outlive _projection and cursor.
  34. proto._query = undefined;
  35. proto._sort = undefined;
  36. proto._projection = undefined;
  37. proto._dependencies = undefined;
  38. proto._limit = undefined;
  39. proto._docsAddedToBatches = undefined; // for _limit enforcement
  40. proto._ns = undefined;
  41. proto._runner = undefined; // PipelineRunner holds a weak_ptr to this.
  42. proto.isValidInitialSource = function(){
  43. return true;
  44. };
  45. /**
  46. * Release the Cursor and the read lock it requires, but without changing the other data.
  47. * Releasing the lock is required for proper concurrency, see SERVER-6123. This
  48. * functionality is also used by the explain version of pipeline execution.
  49. *
  50. * @method dispose
  51. **/
  52. proto.dispose = function dispose() {
  53. if (this._runner) this._runner.reset();
  54. this._currentBatch = [];
  55. };
  56. /**
  57. * Get the source's name.
  58. * @method getSourceName
  59. * @returns {String} the string name of the source as a constant string; this is static, and there's no need to worry about adopting it
  60. **/
  61. proto.getSourceName = function getSourceName() {
  62. return "$cursor";
  63. };
  64. /**
  65. * Returns the next Document if there is one
  66. *
  67. * @method getNext
  68. **/
  69. proto.getNext = function getNext(callback) {
  70. if (this.expCtx && this.expCtx.checkForInterrupt && this.expCtx.checkForInterrupt()){
  71. return callback(new Error('Interrupted'));
  72. }
  73. var self = this;
  74. if (self._currentBatchIndex >= self._currentBatch.length) {
  75. self._currentBatchIndex = 0;
  76. self._currentBatch = [];
  77. return self.loadBatch(function(err){
  78. if (err) return callback(err);
  79. if (self._currentBatch.length === 0)
  80. return callback(null, null);
  81. return callback(null, self._currentBatch[self._currentBatchIndex++]);
  82. });
  83. }
  84. return callback(null, self._currentBatch[self._currentBatchIndex++]);
  85. };
  86. /**
  87. * Attempt to coalesce this DocumentSource with any $limits that it encounters
  88. *
  89. * @method coalesce
  90. * @param {DocumentSource} nextSource the next source in the document processing chain.
  91. * @returns {Boolean} whether or not the attempt to coalesce was successful or not; if the attempt was not successful, nothing has been changed
  92. **/
  93. proto.coalesce = function coalesce(nextSource) {
  94. // Note: Currently we assume the $limit is logically after any $sort or
  95. // $match. If we ever pull in $match or $sort using this method, we
  96. // will need to keep track of the order of the sub-stages.
  97. if (!this._limit) {
  98. if (nextSource instanceof LimitDocumentSource) {
  99. this._limit = nextSource;
  100. return this._limit;
  101. }
  102. return false;// false if next is not a $limit
  103. }
  104. else {
  105. return this._limit.coalesce(nextSource);
  106. }
  107. return false;
  108. };
  109. /**
  110. * Record the query that was specified for the cursor this wraps, if
  111. * any.
  112. *
  113. * This should be captured after any optimizations are applied to
  114. * the pipeline so that it reflects what is really used.
  115. *
  116. * This gets used for explain output.
  117. *
  118. * @method setQuery
  119. * @param {Object} pBsonObj the query to record
  120. **/
  121. proto.setQuery = function setQuery(query) {
  122. this._query = query;
  123. };
  124. /**
  125. * Record the sort that was specified for the cursor this wraps, if
  126. * any.
  127. *
  128. * This should be captured after any optimizations are applied to
  129. * the pipeline so that it reflects what is really used.
  130. *
  131. * This gets used for explain output.
  132. *
  133. * @method setSort
  134. * @param {Object} pBsonObj the query to record
  135. **/
  136. proto.setSort = function setSort(sort) {
  137. this._sort = sort;
  138. };
  139. /**
  140. * Informs this object of projection and dependency information.
  141. *
  142. * @method setProjection
  143. * @param {Object} projection
  144. **/
  145. proto.setProjection = function setProjection(projection, deps) {
  146. this._projection = projection;
  147. this._dependencies = deps;
  148. };
  149. /**
  150. *
  151. * @method setSource
  152. * @param source {DocumentSource} the underlying source to use
  153. * @param callback {Function} a `mungedb-aggregate`-specific extension to the API to half-way support reading from async sources
  154. **/
  155. proto.setSource = function setSource(theSource) {
  156. throw new Error('this doesnt take a source');
  157. };
  158. proto.serialize = function serialize(explain) {
  159. // we never parse a documentSourceCursor, so we only serialize for explain
  160. if (!explain)
  161. return {};
  162. var out = {};
  163. out[this.getSourceName()] = {
  164. query: this._query,
  165. sort: this._sort ? this._sort : null,
  166. limit: this._limit ? this._limit.getLimit() : null,
  167. fields: this._projection ? this._projection : null,
  168. plan: this._runner.getInfo(explain)
  169. };
  170. return out;
  171. };
  172. /**
  173. * returns -1 for no limit
  174. *
  175. * @method getLimit
  176. **/
  177. proto.getLimit = function getLimit() {
  178. return this._limit ? this._limit.getLimit() : -1;
  179. };
  180. /**
  181. * Load a batch of documents from the Runner into the internal array
  182. *
  183. * @method loadBatch
  184. **/
  185. proto.loadBatch = function loadBatch(callback) {
  186. if (!this._runner) {
  187. this.dispose();
  188. return callback;
  189. }
  190. this._runner.restoreState();
  191. var self = this,
  192. whileBreak = false, // since we are in an async loop instead of a normal while loop, need to mimic the
  193. whileReturn = false; // functionality. These flags are similar to saying 'break' or 'return' from inside the loop
  194. return async.whilst(
  195. function test(){
  196. return !whileBreak && !whileReturn;
  197. },
  198. function(next) {
  199. return self._runner.getNext(function(err, obj, state){
  200. if (err) return next(err);
  201. if (state === Runner.RunnerState.RUNNER_ADVANCED) {
  202. if (self._dependencies) {
  203. self._currentBatch.push(self._dependencies.extractFields(obj));
  204. } else {
  205. self._currentBatch.push(obj);
  206. }
  207. if (self._limit) {
  208. if (++self._docsAddedToBatches === self._limit.getLimit()) {
  209. whileBreak = true;
  210. return next();
  211. }
  212. //this was originally a 'verify' in the mongo code
  213. if (self._docsAddedToBatches > self._limit.getLimit()){
  214. return next(new Error('documents collected past the end of the limit'));
  215. }
  216. }
  217. if (self._currentBatch.length >= klass.MaxDocumentsToReturnToClientAtOnce) {
  218. // End self batch and prepare Runner for yielding.
  219. self._runner.saveState();
  220. whileReturn = true;
  221. }
  222. } else {
  223. whileBreak = true;
  224. }
  225. return next();
  226. });
  227. },
  228. function(err){
  229. if (!whileReturn){
  230. self._runner.reset();
  231. }
  232. //this is a deviation to ensure that the callstack does not get too large if the Runner does things syncronously
  233. if (self._firstRun || !self._currentBatch.length){
  234. self._firstRun = false;
  235. callback(err);
  236. } else {
  237. setImmediate(function(){
  238. callback(err);
  239. });
  240. }
  241. }
  242. );
  243. };