872 lines
31 KiB
JavaScript
872 lines
31 KiB
JavaScript
//let puppeteer = require('puppeteer');
|
|
let fs = require('fs');
|
|
let path = require('path');
|
|
let https = require('https');
|
|
let sep = path.sep;
|
|
let delays = {
|
|
baidu: {
|
|
base: 1 * 1000,
|
|
delay: 1 * 1000
|
|
},
|
|
bing: {
|
|
base: 1 * 1000,
|
|
delay: 1 * 1000
|
|
},
|
|
so360: {
|
|
base: 3 * 1000,
|
|
delay: 1 * 1000
|
|
},
|
|
google: {
|
|
base: 1.2 * 1000,
|
|
delay: 0.8 * 1000
|
|
}
|
|
};
|
|
|
|
let fixLessNums = {
|
|
baidu: [0, 100],
|
|
google: [0, 500],
|
|
bing: [0, 200],//
|
|
bing_en: [0, 200],
|
|
so360: [0, 200]
|
|
};
|
|
let bingSpecialNumbers = {
|
|
// 12300: 1,
|
|
// 12400: 1,
|
|
// 12500: 1,
|
|
// 12600: 1,
|
|
// 12700: 1,
|
|
// 12800: 1,
|
|
// 12900: 1
|
|
};
|
|
let Zeros = {};
|
|
let ZerosFile = {};
|
|
let format = (type, complete, total, zero, tail) => {
|
|
let diff = 6 - type.length;
|
|
let pad = '';
|
|
while (diff--) {
|
|
pad += ' ';
|
|
}
|
|
type = pad + type;
|
|
diff = 4 - String(complete).length;
|
|
pad = '';
|
|
while (diff--) {
|
|
pad += ' ';
|
|
}
|
|
complete = pad + complete;
|
|
diff = 4 - String(total).length;
|
|
pad = '';
|
|
while (diff--) {
|
|
pad += ' ';
|
|
}
|
|
total = pad + total;
|
|
return `${type}:${complete}/${total} [${zero}]${tail ? ' ' + tail : ''}`;
|
|
};
|
|
|
|
let canVisitGoogle = false;
|
|
let checkGoogle = async () => {
|
|
return new Promise(resolve => {
|
|
setTimeout(resolve, 3000);
|
|
https.get('https://www.google.com/search?q=xinglie', res => {
|
|
if (res.statusCode == 200) {
|
|
canVisitGoogle = true;
|
|
}
|
|
resolve();
|
|
}).on('error', resolve);
|
|
});
|
|
};
|
|
//使用chrome模拟访问
|
|
let headless = {
|
|
before() {
|
|
return new Promise(async resolve => {
|
|
if (!this.$local) {
|
|
let brower = await puppeteer.launch({
|
|
headless: false,
|
|
defaultViewport: {
|
|
width: 1440,
|
|
height: 900
|
|
}
|
|
});
|
|
this.$local = brower;
|
|
let pages = await brower.pages();
|
|
let ps = [],
|
|
diff = 5 - pages.length;
|
|
while (diff--) {
|
|
ps.push(brower.newPage());
|
|
}
|
|
Promise.all(ps).then(resolve);
|
|
} else {
|
|
resolve();
|
|
}
|
|
});
|
|
},
|
|
async toUrl(url, pi, kd) {
|
|
let text = '',
|
|
retry = 3;
|
|
let sleep = time => {
|
|
return new Promise(resolve => {
|
|
setTimeout(resolve, time);
|
|
});
|
|
}
|
|
let request = async () => {
|
|
let local = this.$local;
|
|
let pages = await local.pages();
|
|
let page = pages[pi];
|
|
let d = await page.goto(url, {
|
|
waitUntil: 'domcontentloaded',
|
|
timeout: 10000
|
|
});
|
|
//console.log(url)
|
|
//let cookies = await page.cookies();
|
|
//await page.deleteCookie(...cookies);
|
|
if (url.includes('baidu.com')) {
|
|
if (!headless.$btf) {
|
|
await page.bringToFront();
|
|
headless.$btf = true;
|
|
}
|
|
await page.waitFor('#kw');
|
|
await page.type('#kw', decodeURIComponent(kd), { delay: 60 });
|
|
await sleep(1000 + (canVisitGoogle ? 0 : 3000));
|
|
|
|
text = await page.$eval('body', e => e.innerHTML);
|
|
let nums = 0;
|
|
text = String(text).replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/g, '');
|
|
text.replace(/百度为您找到相关结果约([\d,]+)个/, (_, m) => {
|
|
nums = parseInt(m.replace(/,/g, ''), 10);
|
|
});
|
|
if (nums === 0) {
|
|
input = await page.$('#su');
|
|
input.click();
|
|
await sleep(1000 + (canVisitGoogle ? 0 : 4000));
|
|
text = await page.$eval('body', e => e.innerHTML);
|
|
} else {
|
|
//console.log('baidu auto query');
|
|
}
|
|
//console.log(text);
|
|
} else if (url.includes('bing.com')) {
|
|
await sleep(canVisitGoogle ? 300 : 1000);
|
|
text = await page.$eval('body', e => e.innerHTML);
|
|
} else {
|
|
text = await d.text();
|
|
}
|
|
};
|
|
do {
|
|
try {
|
|
await request();
|
|
break;
|
|
} catch (ex) {
|
|
await sleep(5000);
|
|
console.log('retry', retry, ex);
|
|
retry--;
|
|
}
|
|
} while (retry);
|
|
return text;
|
|
},
|
|
after() {
|
|
if (this.$local) {
|
|
this.$local.close();
|
|
delete this.$local;
|
|
if (this.$browsers) {
|
|
for (let b of this.$browsers) {
|
|
b.close();
|
|
}
|
|
delete this.$browsers;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
let rank = {
|
|
remote(kd, type) {
|
|
return new Promise(async r => {
|
|
let maps = {
|
|
baidu: {
|
|
url: `https://www.baidu.com/`,
|
|
reg: /百度为您找到相关结果约([\d,]+)个/,
|
|
page: 0,
|
|
},
|
|
so360: {
|
|
url: `https://www.so.com/s?q=${kd}`,
|
|
reg: /找到相关结果约([\d,]+)个/,
|
|
page: 1,
|
|
},
|
|
google: {
|
|
url: `https://www.google.com/search?q=${kd}`,
|
|
reg: /(?:找到约|About)?\s*([\d,]+)\s*(?:条结果|results)/,
|
|
page: 2
|
|
},
|
|
bing: {
|
|
url: `https://cn.bing.com/search?q=${kd}&FORM=BESBTB&ensearch=0`,
|
|
reg: />([\d,]+)\s*(?:results|条结果)<\/span>/i,
|
|
page: 3
|
|
},
|
|
bing_en: {
|
|
url: `https://cn.bing.com/search?q=${kd}&FORM=BESBTB&ensearch=1`,
|
|
reg: />([\d,]+)\s*(?:results|条结果)<\/span>/i,
|
|
page: 4
|
|
}
|
|
};
|
|
let i = maps[type];
|
|
let ii = maps.bing_en;
|
|
let [text, newText] = await Promise.all([headless.toUrl(i.url, i.page, kd), type == 'bing' ? headless.toUrl(ii.url, ii.page, kd) : Promise.resolve('')]);
|
|
let nums = 0;
|
|
text = String(text).replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/g, '');
|
|
text.replace(i.reg, (_, m) => {
|
|
nums = parseInt(m.replace(/,/g, ''), 10);
|
|
});
|
|
let enNum = 0;
|
|
if (type == 'bing') {
|
|
newText.replace(ii.reg, (_, m) => {
|
|
enNum = parseInt(m.replace(/,/g, ''), 10);
|
|
});
|
|
}
|
|
let res = {
|
|
[type]: nums
|
|
};
|
|
if (type == 'bing') {
|
|
res.bing_en = enNum;
|
|
}
|
|
r(res);
|
|
});
|
|
},
|
|
read(file) {
|
|
let c = fs.readFileSync(file);
|
|
return c.toString();
|
|
},
|
|
list(folder, callback) {
|
|
if (fs.existsSync(folder)) {
|
|
let files = fs.readdirSync(folder);
|
|
files.forEach(file => {
|
|
let p = folder + sep + file;
|
|
let stat = fs.lstatSync(p);
|
|
if (stat.isDirectory()) {
|
|
//walk(p, callback);
|
|
} else {
|
|
callback(p);
|
|
}
|
|
});
|
|
}
|
|
},
|
|
write(to, content) {
|
|
let folders = path.dirname(to).split(sep);
|
|
let p = '';
|
|
while (folders.length) {
|
|
p += folders.shift() + sep;
|
|
if (!fs.existsSync(p)) {
|
|
fs.mkdirSync(p);
|
|
}
|
|
}
|
|
fs.writeFileSync(to, content);
|
|
}
|
|
};
|
|
|
|
let ciReg = /ci\.song\.\d+y?\.json$/;
|
|
let ciTask = async () => {
|
|
return new Promise(resolve => {
|
|
let readList = {};
|
|
let caches = {};
|
|
let cList = ['so360', 'baidu'];
|
|
if (canVisitGoogle) {
|
|
cList.push('google', 'bing');
|
|
}
|
|
for (let c of cList) {
|
|
caches[c] = Object.create(null);
|
|
}
|
|
rank.list('../ci', f => {
|
|
if (ciReg.test(f)) {
|
|
readList[path.resolve(f)] = 1;
|
|
}
|
|
});
|
|
for (let c of cList) {
|
|
if (fs.existsSync('./s.' + c + '.cache')) {
|
|
let d = rank.read('./s.' + c + '.cache');
|
|
let j = JSON.parse(d);
|
|
if (!caches[c]) caches[c] = {};
|
|
Object.assign(caches[c], j);
|
|
}
|
|
}
|
|
let loadList = Object.keys(readList);
|
|
let singleWork = (file, type) => {
|
|
let ranks = [];
|
|
let zeros = [];
|
|
return new Promise(resolve => {
|
|
let list = JSON.parse(rank.read(file));
|
|
let start = 0;
|
|
let task = async () => {
|
|
if (start < list.length) {
|
|
let r = list[start];
|
|
let kd = encodeURIComponent(`${r.author} ${r.rhythmic}`);
|
|
let data,
|
|
delay = 0;
|
|
if (caches[type][kd]) {
|
|
data = caches[type][kd];
|
|
} else {
|
|
data = await rank.remote(kd, type);
|
|
data.author = r.author;
|
|
data.rhythmic = r.rhythmic;
|
|
caches[type][kd] = data;
|
|
let dl = delays[type];
|
|
delay = dl.base + dl.delay * Math.random();
|
|
if (start && ((start % 2) === 0)) {
|
|
rank.write('./s.' + type + '.cache', JSON.stringify(caches[type]));
|
|
}
|
|
}
|
|
let less = fixLessNums[type],
|
|
bingEnLess = fixLessNums.bing_en;
|
|
if ((data[type] >= less[0] &&
|
|
data[type] <= less[1]) ||
|
|
(type == 'bing' &&
|
|
data.bing_en <= bingEnLess[1] &&
|
|
data.bing_en >= bingEnLess[0])) {
|
|
zeros.push(data);
|
|
Zeros[type]++;
|
|
}
|
|
ranks.push(data);
|
|
let tail = `(${data[type]}`;
|
|
if (type == 'bing') {
|
|
tail += `,${data.bing_en}`;
|
|
}
|
|
tail += ')';
|
|
console.log(format(type, start, list.length, Zeros[type], tail));
|
|
start++;
|
|
setTimeout(task, delay);
|
|
} else {
|
|
rank.write('./s.' + type + '.cache', JSON.stringify(caches[type]));
|
|
resolve([ranks, zeros]);
|
|
}
|
|
};
|
|
task();
|
|
});
|
|
};
|
|
let finised = {};
|
|
let check = () => {
|
|
let all = true;
|
|
for (let c of cList) {
|
|
if (finised[c] !== true) {
|
|
all = false;
|
|
}
|
|
}
|
|
if (all) {
|
|
resolve();
|
|
}
|
|
};
|
|
let work = async (type, index, list) => {
|
|
let one = list[index];
|
|
if (one) {
|
|
console.log(`${type} current ${index} ,total:${list.length}`);
|
|
let f = path.basename(one);
|
|
let aim = f.replace('ci.song', 'ci.song.rank.' + type);
|
|
if (!fs.existsSync('./ci_temp/' + aim)) {
|
|
//let zeroAim = f.replace(/poet\.(song|tang)/, 'poet.$1.zero.' + type);
|
|
|
|
Zeros[type] = 0;
|
|
let [ranks, zeros] = await singleWork(one, type);
|
|
rank.write('./ci_temp/' + aim, JSON.stringify(ranks, null, 4));
|
|
// if (zeros.length) {
|
|
// rank.write('./poet/' + zeroAim, JSON.stringify(zeros, null, 4));
|
|
// }
|
|
|
|
if (Zeros[type] > 0) {
|
|
if (!ZerosFile[type]) {
|
|
ZerosFile[type] = {};
|
|
}
|
|
ZerosFile[type][aim] = Zeros[type];
|
|
rank.write('./s.zero.cache', JSON.stringify(ZerosFile, null, 4));
|
|
}
|
|
caches[type] = Object.create(null);//文件写入后,清理缓存
|
|
setTimeout(() => {
|
|
work(type, index + 1, list);
|
|
}, 2 * 1000);
|
|
} else {
|
|
console.log('ignore ' + aim);
|
|
work(type, index + 1, list);
|
|
}
|
|
} else {
|
|
finised[type] = true;
|
|
check();
|
|
}
|
|
};
|
|
for (let c of cList) {
|
|
work(c, 0, loadList);
|
|
}
|
|
check();
|
|
});
|
|
};
|
|
let poetReg = /poet\.(song|tang)\.(\d+)\.json$/;
|
|
let poetTask = async () => {
|
|
return new Promise(resolve => {
|
|
let readList = {};
|
|
let caches = {};
|
|
let cList = ['so360', 'baidu', 'bing'];
|
|
if (canVisitGoogle) {
|
|
cList.push('google');
|
|
if (!cList.includes('baidu')) {
|
|
cList.push('baidu');
|
|
}
|
|
}
|
|
|
|
for (let c of cList) {
|
|
caches[c] = Object.create(null);
|
|
}
|
|
rank.list('../json', f => {
|
|
if (poetReg.test(f)) {
|
|
readList[path.resolve(f)] = 1;
|
|
}
|
|
});
|
|
for (let c of cList) {
|
|
if (fs.existsSync('./s.' + c + '.cache')) {
|
|
let d = rank.read('./s.' + c + '.cache');
|
|
let j = JSON.parse(d);
|
|
if (!caches[c]) caches[c] = {};
|
|
Object.assign(caches[c], j);
|
|
}
|
|
}
|
|
let loadList = Object.keys(readList);
|
|
let singleWork = (file, type) => {
|
|
let ranks = [];
|
|
let zeros = [];
|
|
return new Promise(resolve => {
|
|
let list = JSON.parse(rank.read(file));
|
|
let start = 0;
|
|
let task = async () => {
|
|
if (start < list.length) {
|
|
let r = list[start];
|
|
let kd = encodeURIComponent(`${r.author} ${r.title}`);
|
|
let data,
|
|
delay = 0
|
|
if (caches[type][kd]) {
|
|
data = caches[type][kd];
|
|
//console.log(`${type} from cache`);
|
|
} else {
|
|
data = await rank.remote(kd, type);
|
|
data.author = r.author;
|
|
data.title = r.title;
|
|
caches[type][kd] = data;
|
|
let dl = delays[type];
|
|
delay = dl.base + dl.delay * Math.random();
|
|
|
|
if (start && ((start % 2) === 0)) {
|
|
rank.write('./s.' + type + '.cache', JSON.stringify(caches[type]));
|
|
}
|
|
}
|
|
let less = fixLessNums[type],
|
|
bingEnLess = fixLessNums.bing_en;
|
|
if ((data[type] >= less[0] &&
|
|
data[type] <= less[1]) ||
|
|
(type == 'bing' &&
|
|
data.bing_en <= bingEnLess[1] &&
|
|
data.bing_en >= bingEnLess[0])) {
|
|
zeros.push(data);
|
|
Zeros[type]++;
|
|
}
|
|
ranks.push(data);
|
|
let tail = `(${data[type]}`;
|
|
if (type == 'bing') {
|
|
tail += `,${data.bing_en}`;
|
|
}
|
|
tail += ')';
|
|
console.log(format(type, start, list.length, Zeros[type], tail));
|
|
start++;
|
|
setTimeout(task, delay);
|
|
} else {
|
|
rank.write('./s.' + type + '.cache', JSON.stringify(caches[type]));
|
|
resolve([ranks, zeros]);
|
|
}
|
|
};
|
|
task();
|
|
});
|
|
};
|
|
let finised = {};
|
|
let check = () => {
|
|
let all = true;
|
|
for (let c of cList) {
|
|
if (finised[c] !== true) {
|
|
all = false;
|
|
}
|
|
}
|
|
if (all) {
|
|
resolve();
|
|
}
|
|
};
|
|
let work = async (type, index, list) => {
|
|
let one = list[index];
|
|
if (one) {
|
|
console.log(`${type} current ${index} ,total:${list.length}`);
|
|
let f = path.basename(one);
|
|
let aim = f.replace(/poet\.(song|tang)/, 'poet.$1.rank.' + type);
|
|
if (!fs.existsSync('./poet_temp/' + aim)) {
|
|
console.log('dest', aim);
|
|
//let zeroAim = f.replace(/poet\.(song|tang)/, 'poet.$1.zero.' + type);
|
|
Zeros[type] = 0;
|
|
let [ranks, zeros] = await singleWork(one, type);
|
|
rank.write('./poet_temp/' + aim, JSON.stringify(ranks, null, 4));
|
|
if (Zeros[type] > 0) {
|
|
if (!ZerosFile[type]) {
|
|
ZerosFile[type] = {};
|
|
}
|
|
ZerosFile[type][aim] = Zeros[type];
|
|
rank.write('./s.zero.cache', JSON.stringify(ZerosFile, null, 4));
|
|
}
|
|
// if (zeros.length) {
|
|
// rank.write('./poet/' + zeroAim, JSON.stringify(zeros, null, 4));
|
|
// }
|
|
caches[type] = Object.create(null);//文件写入后,清理缓存
|
|
setTimeout(() => {
|
|
work(type, index + 1, list);
|
|
}, 2 * 1000);
|
|
} else {
|
|
console.log('ignore ' + aim);
|
|
work(type, index + 1, list);
|
|
}
|
|
} else {
|
|
finised[type] = true;
|
|
check();
|
|
}
|
|
};
|
|
for (let c of cList) {
|
|
work(c, 0, loadList);
|
|
}
|
|
check();
|
|
});
|
|
};
|
|
|
|
let runLessNumberTask = async (cat = 'poet') => {
|
|
let caches = Object.create(null);
|
|
let writeCtrl = 0;
|
|
return new Promise(resolve => {
|
|
let cList = ['so360'];//'bing','baidu', 'so360',
|
|
if (canVisitGoogle) {
|
|
cList.push('google', 'baidu', 'bing');
|
|
}
|
|
for (let c of cList) {
|
|
caches[c] = Object.create(null);
|
|
}
|
|
let taskList = {};
|
|
let taskIndex = {};
|
|
rank.list(`./${cat}_temp`, f => {
|
|
for (let c of cList) {
|
|
if (f.includes(`.${c}.`)) {
|
|
if (!taskList[c]) {
|
|
taskList[c] = [];
|
|
}
|
|
taskList[c].push(f);
|
|
}
|
|
}
|
|
});
|
|
for (let c of cList) {
|
|
if (fs.existsSync('./s.' + c + '.index.cache')) {
|
|
let d = rank.read('./s.' + c + '.index.cache');
|
|
let i = d.split(',');
|
|
taskIndex[c] = [parseInt(i[0], 10), parseInt(i[1], 10)];
|
|
} else {
|
|
taskIndex[c] = [0, 0];
|
|
}
|
|
}
|
|
let singleWork = (file, type, start) => {
|
|
let oldCount = 0;
|
|
return new Promise(resolve => {
|
|
let list = JSON.parse(rank.read(file));
|
|
let task = async () => {
|
|
if (start < list.length) {
|
|
let r = list[start];
|
|
let less = fixLessNums[type],
|
|
bingEnLess = fixLessNums.bing_en;
|
|
if ((r[type] >= less[0] &&
|
|
r[type] <= less[1]) ||
|
|
(type == 'bing' &&
|
|
((r.bing_en >= bingEnLess[0] &&
|
|
r.bing_en <= bingEnLess[1]) ||
|
|
bingSpecialNumbers[r[type]] === 1 ||
|
|
bingSpecialNumbers[r.bing_en] === 1)
|
|
)
|
|
) {
|
|
writeCtrl++;
|
|
oldCount++;
|
|
//console.log(type, 'checked zero at', start);
|
|
let title = cat == 'ci' ? r.rhythmic : r.title;
|
|
let kd = encodeURIComponent(`${r.author} ${title}`);
|
|
let data,
|
|
delay = 0,
|
|
old = r[type],
|
|
old_en = r.bing_en;
|
|
if (caches[type][kd]) {
|
|
data = caches[type][kd];
|
|
} else {
|
|
data = await rank.remote(kd, type);
|
|
data.author = r.author;
|
|
if (cat == 'poet') {
|
|
data.title = r.title;
|
|
} else {
|
|
data.rhythmic = r.rhythmic;
|
|
}
|
|
caches[type][kd] = data;
|
|
}
|
|
let dl = delays[type];
|
|
delay = dl.base + dl.delay * Math.random();
|
|
if (data[type] > old) {
|
|
r[type] = data[type];
|
|
} else if (type != 'bing' ||
|
|
data.bing_en <= r.bing_en) {
|
|
Zeros[type]++;
|
|
}
|
|
if (type == 'bing' &&
|
|
(data.bing_en > r.bing_en ||
|
|
bingSpecialNumbers[r.bing_en] === 1)) {
|
|
r.bing_en = data.bing_en;
|
|
}
|
|
if (writeCtrl % 3 === 0) {
|
|
rank.write(file, JSON.stringify(list, null, 4));
|
|
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], start]);
|
|
writeCtrl = 0;
|
|
}
|
|
let tail = '(' + old + '->' + r[type] + '[' + data[type] + ']';
|
|
if (type == 'bing') {
|
|
tail += ',' + old_en + '->' + r.bing_en + '[' + data.bing_en + ']';
|
|
}
|
|
tail += ')';
|
|
console.log(format(type, start, list.length, oldCount + '=>' + Zeros[type], tail));
|
|
start++;
|
|
setTimeout(task, delay);
|
|
} else {
|
|
start++;
|
|
task();
|
|
}
|
|
} else {
|
|
resolve(list);
|
|
}
|
|
};
|
|
task();
|
|
});
|
|
};
|
|
let finised = {};
|
|
let check = () => {
|
|
let all = true;
|
|
for (let c of cList) {
|
|
if (finised[c] !== true) {
|
|
all = false;
|
|
}
|
|
}
|
|
if (all) {
|
|
resolve();
|
|
}
|
|
};
|
|
let work = async type => {
|
|
let list = taskList[type];
|
|
let index = taskIndex[type];
|
|
let one = list[index[0]];
|
|
let start = index[1];
|
|
if (one) {
|
|
console.log(one);
|
|
console.log(`${type} current ${index[0]} ,total:${list.length}`);
|
|
Zeros[type] = 0;
|
|
let ranks = await singleWork(one, type, start);
|
|
rank.write(one, JSON.stringify(ranks, null, 4));
|
|
//caches[type] = Object.create(null);//文件写入后,清理缓存
|
|
taskIndex[type][0]++;
|
|
taskIndex[type][1] = 0;
|
|
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]);
|
|
caches[type] = Object.create(null);
|
|
setTimeout(() => {
|
|
work(type);
|
|
}, 1000);
|
|
} else {
|
|
finised[type] = true;
|
|
rank.write('./s.' + type + '.index.cache', [taskIndex[type][0], 0]);
|
|
console.log(`${type} finished`);
|
|
check();
|
|
}
|
|
};
|
|
for (let c of cList) {
|
|
work(c);
|
|
}
|
|
check();
|
|
});
|
|
};
|
|
let split = () => {
|
|
let keys = ['baidu', 'so360', 'bing', 'google'];
|
|
rank.list('./poet', f => {
|
|
let b = path.basename(f);
|
|
let d = JSON.parse(rank.read(f));
|
|
for (let key of keys) {
|
|
let aim = b.replace(/poet\.(song|tang).rank/, 'poet.$1.rank.' + key);
|
|
let newList = [];
|
|
for (let e of d) {
|
|
newList.push({
|
|
author: e.author,
|
|
title: e.title,
|
|
[key]: e[key]
|
|
});
|
|
}
|
|
rank.write('./poet2/' + aim, JSON.stringify(newList, null, 4));
|
|
}
|
|
});
|
|
};
|
|
let mergeCi = () => {
|
|
let readList = {};
|
|
let cList = ['baidu', 'so360', 'bing', 'google'];
|
|
rank.list('../ci', f => {
|
|
if (ciReg.test(f)) {
|
|
readList[path.resolve(f)] = 1;
|
|
}
|
|
});
|
|
let loadList = Object.keys(readList);
|
|
for (let ll of loadList) {
|
|
let f = path.basename(ll);
|
|
let aim = f.replace('ci.song', 'ci.song.rank');
|
|
let zero = f.replace('ci.song', 'ci.song.zero');
|
|
let newList = [],
|
|
zeros = [],
|
|
canMerge = true;
|
|
for (let c of cList) {
|
|
let src = f.replace('ci.song', 'ci.song.rank.' + c);
|
|
if (fs.existsSync('./ci_temp/' + src)) {
|
|
let d = rank.read('./ci_temp/' + src);
|
|
let j = JSON.parse(d);
|
|
for (let i = j.length; i--;) {
|
|
newList[i] = Object.assign(newList[i] || {}, j[i]);
|
|
}
|
|
} else {
|
|
canMerge = false;
|
|
}
|
|
}
|
|
if (canMerge) {
|
|
rank.write('./ci/' + aim, JSON.stringify(newList, null, 4));
|
|
for (let e of newList) {
|
|
let hasZero = false;
|
|
for (let p in e) {
|
|
if (e[p] === 0) {
|
|
hasZero = true;
|
|
}
|
|
}
|
|
if (hasZero) {
|
|
zeros.push(e);
|
|
}
|
|
}
|
|
if (zeros.length) {
|
|
rank.write('./ci/' + zero, JSON.stringify(zeros, null, 4));
|
|
}
|
|
}
|
|
}
|
|
};
|
|
let mergePoet = () => {
|
|
let readList = {};
|
|
let cList = ['baidu', 'so360', 'bing', 'google'];
|
|
rank.list('../json', f => {
|
|
if (poetReg.test(f)) {
|
|
readList[path.resolve(f)] = 1;
|
|
}
|
|
});
|
|
let loadList = Object.keys(readList);
|
|
for (let ll of loadList) {
|
|
let f = path.basename(ll);
|
|
let aim = f.replace(/poet\.(song|tang)/, 'poet.$1.rank');
|
|
let zero = f.replace(/poet\.(song|tang)/, 'poet.$1.zero');
|
|
let newList = [],
|
|
canMerge = true,
|
|
zeros = [];
|
|
for (let c of cList) {
|
|
let src = f.replace(/poet\.(song|tang)/, 'poet.$1.rank.' + c);
|
|
if (fs.existsSync('./poet_temp/' + src)) {
|
|
let d = rank.read('./poet_temp/' + src);
|
|
let j = JSON.parse(d);
|
|
for (let i = j.length; i--;) {
|
|
if (newList[i]) {
|
|
if (newList[i].author != j[i].author ||
|
|
newList[i].title != j[i].title) {
|
|
throw new Error(`bad info ${src} at ${i}`);
|
|
}
|
|
}
|
|
newList[i] = Object.assign(newList[i] || {}, j[i]);
|
|
}
|
|
} else {
|
|
canMerge = false;
|
|
}
|
|
}
|
|
if (canMerge) {
|
|
rank.write('./poet/' + aim, JSON.stringify(newList, null, 4));
|
|
// for (let e of newList) {
|
|
// let hasZero = false;
|
|
// for (let p in e) {
|
|
// if (e[p] === 0) {
|
|
// hasZero = true;
|
|
// }
|
|
// }
|
|
// if (hasZero) {
|
|
// zeros.push(e);
|
|
// }
|
|
// }
|
|
// if (zeros.length) {
|
|
// rank.write('./poet/' + zero, JSON.stringify(zeros, null, 4));
|
|
// }
|
|
}
|
|
}
|
|
};
|
|
|
|
let outputBingNumbers = () => {
|
|
let numbers = {};
|
|
let result = [];
|
|
rank.list('./poet_temp', f => {
|
|
if (f.includes('rank.bing')) {
|
|
let list = JSON.parse(rank.read(f));
|
|
for (let e of list) {
|
|
if (!numbers[e.bing]) {
|
|
numbers[e.bing] = 0;
|
|
}
|
|
numbers[e.bing]++;
|
|
if (!numbers[e.bing_en]) {
|
|
numbers[e.bing_en] = 0;
|
|
}
|
|
numbers[e.bing_en]++;
|
|
}
|
|
}
|
|
});
|
|
for (let num in numbers) {
|
|
if (numbers[num] > 1 && num > 10000) {
|
|
result.push({
|
|
num,
|
|
count: numbers[num]
|
|
});
|
|
}
|
|
}
|
|
result = result.sort((b, a) => a.count - b.count);
|
|
console.log(result);
|
|
};
|
|
|
|
let splitStrains = () => {
|
|
rank.list('../json', f => {
|
|
if (poetReg.test(f)) {
|
|
let file = path.basename(f);
|
|
let list = JSON.parse(rank.read(f));
|
|
let aims = JSON.parse(rank.read(`../strains/json/${file}`));
|
|
let index = 0;
|
|
for (let e of list) {
|
|
let strain = aims[index];
|
|
aims[index] = {
|
|
strains: strain,
|
|
id: e.id
|
|
};
|
|
index++;
|
|
//delete e.strains;
|
|
}
|
|
//rank.write(`../json/${file}`, JSON.stringify(list, null, 4));
|
|
rank.write(`../strains/json/${file}`, JSON.stringify(aims, null, 4));
|
|
}
|
|
});
|
|
};
|
|
(async () => {
|
|
splitStrains();
|
|
//mergeCi();
|
|
//merge();
|
|
//await ciTask();
|
|
//mergeCi();
|
|
//outputBingNumbers();
|
|
//await headless.before();
|
|
//console.log('check google.com');
|
|
//await checkGoogle();
|
|
//console.log('google.com', canVisitGoogle);
|
|
//await ciTask();
|
|
//await Promise.all([ciTask(), runLessNumberTask('ci')]);
|
|
//await runLessNumberTask('ci');
|
|
//await runLessNumberTask();
|
|
//headless.after();
|
|
//mergePoet();
|
|
//mergePoet();
|
|
})(); |