小弟学node才不久,打算写个爬虫试试,本职工作是小前端一枚,具体情况是这样我想爬某个网站的空气质量数据,测试每10秒钟刷新一次,请求代码: var http=require(‘http’); function download(url, callback) { http.get(url, function(res) { console.log(url); var data = ""; res.on(‘data’, function (chunk) { console.log(data+’first’); //测试是否chunk没请求到 console.log(chunk); console.log(1); data += chunk; console.log(data+’two’); console.log(chunk); }); res.on("end", function() { console.log(2); callback(data); }); }).on("error", function(e) { callback(null); }); } exports.download = download; 然后请求到空数据时,日志前面打印为 first <Buffer 3c 73 63 72 69 70 74 3e 64 6f 63 75 6d 65 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20 3d 20 22 68 74 74 70 3a 2f 2f 61 71 69 63 6e 2e 6f 72 67 2f 63 69 74 79 2f …> 1
<script>document.location = "http://aqicn.org/city/beijing/";</script>two <Buffer 3c 73 63 72 69 70 74 3e 64 6f 63 75 6d 65 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20 3d 20 22 68 74 74 70 3a 2f 2f 61 71 69 63 6e 2e 6f 72 67 2f 63 69 74 79 2f ...> 2 而正常情况是: first <Buffer 3c 73 63 72 69 70 74 3e 64 6f 63 75 6d 65 6e 74 2e 6c 6f 63 61 74 69 6f 6e 20 3d 20 22 68 74 74 70 3a 2f 2f 61 71 69 63 6e 2e 6f 72 67 2f 63 69 74 79 2f ...> 1 ----html文档,省略很多字-------- <Buffer------------------------> ----html文档,省略很多字------ <Buffer------------------------> 1 ----html文档,省略很多字-------- <Buffer------------------------> ----html文档,省略很多字------ <Buffer------------------------> ----------------- -------------- ---------- 第一次发帖,水平有限,求解答,表拍砖,我比较玻璃心哈!推荐使用这样的方式来获取,不要直接对Buffer相加。然后使用iconv(iconv-lite)之类的package进行编码
var req = http.request(options,function(res){
var chunks = [];
res.on('data', function(chunk) {
chunks.push(chunk);
});
res.on('end', function() {
var decodedBody = iconv.decode(Buffer.concat(chunks), 'utf8');
console.log(decodedBody);
});
});
不知道這個可不可以 :
var StringDecoder = require('string_decoder').StringDecoder;
var textChunk = '';
var req = http.request(reqOptions, function(res) {
var decoder = new StringDecoder('utf8');
res.on('data', function(chunk) {
textChunk += decoder.write(chunk);
});
});
或者這樣?
textChunk += chunk.toString('utf8');