CouchDB-style-like map-reduce:CouchDB MapReduce
usage: world count example
var fs = require('fs');
var worldCounter = new MapReduce({
map: function(chunk){
chunk.toString().split(/\W+|\d+/).forEach(function(world){
world && this.emit(world.toLowerCase(), 1);
}, this);
},
reduce: function(key, values){
return this.count(values);
},
inputs: fs.readdirSync('./').map(fs.createReadStream),
fork: false //should forEach input fork a cluster.worker to do map job or not
});
worldCounter.run(function(result){
console.log(result);
});
more think:
- should do reduce during mapping rather than wait until mapping done?
- use nodejs ChildProcess/Cluster fork to do map/reduce job?
- for processing and generating large data sets with a parallel, distributed algorithm on a cluster? you may look for Hadoop