Crawler can only obtain part of data when grabbing data

  node.js, question

The code source is node-lessons.

var eventproxy = require('eventproxy');
 var superagent = require('superagent');
 var cheerio = require('cheerio');
 var url = require('url');
 
 var cnodeUrl =  'https://cnodejs.org/' ;
 
 superagent.get(cnodeUrl)
 .end(function (err, res) {
 if (err) {
 return console.error(err);
 bracket
 var topicUrls = [];
 var $ = cheerio.load(res.text);
 $('#topic_list .topic_title').each(function (idx, element) {
 var $element = $(element);
 var href = url.resolve(cnodeUrl, $element.attr('href'));
 topicUrls.push(href);
 });
 
 var ep = new eventproxy();
 
 ep.after('topic_html', topicUrls.length, function (topics) {
 topics = topics.map(function (topicPair) {
 var topicUrl = topicPair[0];
 var topicHtml = topicPair[1];
 var $ = cheerio.load(topicHtml);
 return ({
 title: $('.topic_full_title').text().trim(),
 href: topicUrl,
 comment1: $('.reply_content').eq(0).text().trim(),
 });
 });
 
 console.log('final:');
 console.log(topics);
 });
 
 topicUrls.forEach(function (topicUrl) {
 superagent.get(topicUrl)
 .end(function (err, res) {
 Log ('fetch' plus topicUrl plus' successful');
 ep.emit('topic_html', [topicUrl, res.text]);
 });
 });
 });

This is the captured data. It can be seen that url exists, but some have comments and some have no comments.
图片描述

The speed is too fast, so take a break in the middle. You can print out the html you have obtained. The server refused.