db.once('open',function(){
var m39 = mongoose.Schema({
id:Number,//采集的地址
date:{type:Date,default:Date.now},
question:{
content:String,
author:String
},
answer:{
id:{
content:String,
author:String
}
},
url:String
})
var c_39_url = mongoose.model('c_39_url',m39)
}
求改进
1 回复
完整代码
/**
* 采集39问答网
*/
var n = require('needle')
var $ = require('jquery').create()
var iconv = require('iconv').Iconv;
var lite = require('iconv-lite')
var mongoose = require('mongoose');
var async = require('async')
mongoose.connect('mongodb://localhost/cj');
var db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error:'));
var question = []
var answer = []
//一。采集第一集分类的链接地址
var one = function(error,response,body){
var content = new iconv('gbk','UTF-8//TRANSLIT//IGNORE').convert(new Buffer(body,'binary')).toString()
//标题
question['title'] = $(content).find('title').text()
$.each($(content).find('.tboxs'),function(i,v){
$(v).find('.anniu').nextAll().remove()
$(v).find('.tousu').remove()
$(v).find('.anniu').remove()
var cc = $(v).find('.tbrig').html()//内容
var a = $(v).find('.username').text()
if(i == 0)
{
question['question'] = {content:cc,author:a}
} else {
answer.push({content:cc,author:a})
}
})
console.log(question)
db.once('open',function(){
var m39 = mongoose.Schema({
id:Number,//采集的地址
date:{type:Date,default:Date.now},
question:{
content:String,
author:String
},
answer:{
id:{
content:String,
author:String
}
},
url:String
})
var c_39_url = mongoose.model('c_39_url',m39)
}
}
n.get('http://ask.39.net/question/24749283.html',{decode_response:false,encoding:'binary'},one)