35
rank/README.md
Normal file
35
rank/README.md
Normal file
@ -0,0 +1,35 @@
|
||||
诗词搜索结果
|
||||
-----
|
||||
|
||||
根据该[关于诗词的知名度](https://github.com/chinese-poetry/chinese-poetry/issues/115)整理的数据,以“作者+作品名”做为搜索关键字,通过搜索引擎搜索,并把搜索结果数量做为排行的一个维度制作而成,目前收集了“百度、必应、360搜索和谷歌”等`4`个引擎的数据
|
||||
|
||||
一般来讲,搜索结果越多,表示该诗词越知名。
|
||||
|
||||
## 说明
|
||||
|
||||
该目录下收集了`ci`和`json`两个文件夹下的诗词搜索数据,且该文件夹下的文件与`ci`和`json`中的文件一一对应,比如`./ci/ci.song.rang.8000.json`对应`../ci/ci.song.8000.json`。
|
||||
|
||||
不但文件一一对应,文件中的内容也是一一对应的,即`./ci/ci.song.rang.8000.json`中数组中的第`n`条和`../ci/ci.song.8000.json`数组中的第`n`条对应
|
||||
|
||||
Q: 为什么没有把该结果和诗词数据放在一起?
|
||||
A: 为了保持诗词数据的纯洁,并非所有数据都适合塞进去。对于这种非所有人需要的数据,通过一一对应关系,可以很方便的给原诗词扩展数据,同时也保证了原诗词的纯洁度。所以当需要对诗词根据知名度排序的需求时,可以把该数据附加到原有诗词数据上即可。
|
||||
|
||||
|
||||
## 数据形式
|
||||
|
||||
每个 JSON 文件1000条记录. 为了举例, 删除了余下999条.
|
||||
|
||||
```js
|
||||
[
|
||||
{
|
||||
"author": "石孝友",
|
||||
"rhythmic": "玉楼春",
|
||||
"baidu": 77500, //百度搜索结果条数
|
||||
"so360": 1060,//360搜索结果条数
|
||||
"google": 991000,//谷歌搜索结果条数
|
||||
"bing": 20//必应搜索结果条数
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
搜索引擎的结果数据仅供参考,不同时间搜出来的数据未必一致
|
||||
8002
rank/ci/ci.song.rank.0.json
Normal file
8002
rank/ci/ci.song.rank.0.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.1000.json
Normal file
8002
rank/ci/ci.song.rank.1000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.10000.json
Normal file
8002
rank/ci/ci.song.rank.10000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.11000.json
Normal file
8002
rank/ci/ci.song.rank.11000.json
Normal file
File diff suppressed because it is too large
Load Diff
8010
rank/ci/ci.song.rank.12000.json
Normal file
8010
rank/ci/ci.song.rank.12000.json
Normal file
File diff suppressed because it is too large
Load Diff
7986
rank/ci/ci.song.rank.13000.json
Normal file
7986
rank/ci/ci.song.rank.13000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.14000.json
Normal file
8002
rank/ci/ci.song.rank.14000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.15000.json
Normal file
8002
rank/ci/ci.song.rank.15000.json
Normal file
File diff suppressed because it is too large
Load Diff
7986
rank/ci/ci.song.rank.16000.json
Normal file
7986
rank/ci/ci.song.rank.16000.json
Normal file
File diff suppressed because it is too large
Load Diff
7904
rank/ci/ci.song.rank.17000.json
Normal file
7904
rank/ci/ci.song.rank.17000.json
Normal file
File diff suppressed because it is too large
Load Diff
8001
rank/ci/ci.song.rank.18000.json
Normal file
8001
rank/ci/ci.song.rank.18000.json
Normal file
File diff suppressed because it is too large
Load Diff
8026
rank/ci/ci.song.rank.19000.json
Normal file
8026
rank/ci/ci.song.rank.19000.json
Normal file
File diff suppressed because it is too large
Load Diff
7999
rank/ci/ci.song.rank.2000.json
Normal file
7999
rank/ci/ci.song.rank.2000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.20000.json
Normal file
8002
rank/ci/ci.song.rank.20000.json
Normal file
File diff suppressed because it is too large
Load Diff
402
rank/ci/ci.song.rank.21000.json
Normal file
402
rank/ci/ci.song.rank.21000.json
Normal file
@ -0,0 +1,402 @@
|
||||
[
|
||||
{
|
||||
"baidu": 1050,
|
||||
"so360": 40,
|
||||
"google": 19500,
|
||||
"bing": 510,
|
||||
"author": "臧馀庆",
|
||||
"rhythmic": "感皇恩"
|
||||
},
|
||||
{
|
||||
"baidu": 1050,
|
||||
"so360": 40,
|
||||
"google": 19500,
|
||||
"bing": 510,
|
||||
"author": "臧馀庆",
|
||||
"rhythmic": "感皇恩"
|
||||
},
|
||||
{
|
||||
"baidu": 1050,
|
||||
"so360": 40,
|
||||
"google": 19500,
|
||||
"bing": 510,
|
||||
"author": "臧馀庆",
|
||||
"rhythmic": "感皇恩"
|
||||
},
|
||||
{
|
||||
"baidu": 382000,
|
||||
"so360": 67,
|
||||
"google": 1210000,
|
||||
"bing": 51900,
|
||||
"author": "胡于",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 382000,
|
||||
"so360": 67,
|
||||
"google": 1210000,
|
||||
"bing": 51900,
|
||||
"author": "胡于",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 382000,
|
||||
"so360": 67,
|
||||
"google": 1210000,
|
||||
"bing": 51900,
|
||||
"author": "胡于",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 382000,
|
||||
"so360": 67,
|
||||
"google": 1210000,
|
||||
"bing": 51900,
|
||||
"author": "胡于",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 59,
|
||||
"so360": 30,
|
||||
"google": 8,
|
||||
"bing": 886,
|
||||
"author": "申国章",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 1050,
|
||||
"so360": 45,
|
||||
"google": 27300,
|
||||
"bing": 592,
|
||||
"author": "陈日章",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 46,
|
||||
"so360": 52,
|
||||
"google": 28100,
|
||||
"bing": 4980,
|
||||
"author": "李景良",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 90,
|
||||
"so360": 48,
|
||||
"google": 27200,
|
||||
"bing": 525000,
|
||||
"author": "张思济",
|
||||
"rhythmic": "鹧鸪天"
|
||||
},
|
||||
{
|
||||
"baidu": 31300,
|
||||
"so360": 546,
|
||||
"google": 1660000,
|
||||
"bing": 135000,
|
||||
"author": "李夫人",
|
||||
"rhythmic": "减字木兰花"
|
||||
},
|
||||
{
|
||||
"baidu": 75100,
|
||||
"so360": 2370,
|
||||
"google": 5720000,
|
||||
"bing": 27600,
|
||||
"author": "李夫人",
|
||||
"rhythmic": "蝶恋花"
|
||||
},
|
||||
{
|
||||
"baidu": 57,
|
||||
"so360": 68,
|
||||
"google": 230000,
|
||||
"bing": 2170000,
|
||||
"author": "李夫人",
|
||||
"rhythmic": "瑞鹧鸪"
|
||||
},
|
||||
{
|
||||
"baidu": 60,
|
||||
"so360": 55,
|
||||
"google": 116000,
|
||||
"bing": 575,
|
||||
"author": "张藻",
|
||||
"rhythmic": "望海潮"
|
||||
},
|
||||
{
|
||||
"baidu": 4560,
|
||||
"so360": 149,
|
||||
"google": 1400,
|
||||
"bing": 3730,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "虞美人"
|
||||
},
|
||||
{
|
||||
"baidu": 4560,
|
||||
"so360": 149,
|
||||
"google": 1400,
|
||||
"bing": 3730,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "虞美人"
|
||||
},
|
||||
{
|
||||
"baidu": 4560,
|
||||
"so360": 149,
|
||||
"google": 1400,
|
||||
"bing": 3730,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "虞美人"
|
||||
},
|
||||
{
|
||||
"baidu": 4560,
|
||||
"so360": 149,
|
||||
"google": 1400,
|
||||
"bing": 3730,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "虞美人"
|
||||
},
|
||||
{
|
||||
"baidu": 4560,
|
||||
"so360": 149,
|
||||
"google": 1400,
|
||||
"bing": 3730,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "虞美人"
|
||||
},
|
||||
{
|
||||
"baidu": 4180,
|
||||
"so360": 77,
|
||||
"google": 2250,
|
||||
"bing": 13300,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "阮郎归"
|
||||
},
|
||||
{
|
||||
"baidu": 4180,
|
||||
"so360": 77,
|
||||
"google": 2250,
|
||||
"bing": 13300,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "阮郎归"
|
||||
},
|
||||
{
|
||||
"baidu": 4180,
|
||||
"so360": 77,
|
||||
"google": 2250,
|
||||
"bing": 13300,
|
||||
"author": "胡文卿",
|
||||
"rhythmic": "阮郎归"
|
||||
},
|
||||
{
|
||||
"baidu": 92,
|
||||
"so360": 44,
|
||||
"google": 27100,
|
||||
"bing": 1320,
|
||||
"author": "杨道居",
|
||||
"rhythmic": "蝶恋花"
|
||||
},
|
||||
{
|
||||
"baidu": 3220,
|
||||
"so360": 55,
|
||||
"google": 2660,
|
||||
"bing": 1140,
|
||||
"author": "史佐尧",
|
||||
"rhythmic": "苏幕遮"
|
||||
},
|
||||
{
|
||||
"baidu": 1070,
|
||||
"so360": 36,
|
||||
"google": 3310000,
|
||||
"bing": 41600,
|
||||
"author": "徐去非",
|
||||
"rhythmic": "满庭芳"
|
||||
},
|
||||
{
|
||||
"baidu": 528,
|
||||
"so360": 50,
|
||||
"google": 5260000,
|
||||
"bing": 2460000,
|
||||
"author": "徐去非",
|
||||
"rhythmic": "锦被堆"
|
||||
},
|
||||
{
|
||||
"baidu": 59,
|
||||
"so360": 12,
|
||||
"google": 2390000,
|
||||
"bing": 26700,
|
||||
"author": "徐去非",
|
||||
"rhythmic": "卷珠帘・蝶恋花"
|
||||
},
|
||||
{
|
||||
"baidu": 8,
|
||||
"so360": 491,
|
||||
"google": 6,
|
||||
"bing": 26,
|
||||
"author": "舒大成",
|
||||
"rhythmic": "点绛唇"
|
||||
},
|
||||
{
|
||||
"baidu": 3660,
|
||||
"so360": 35,
|
||||
"google": 12700,
|
||||
"bing": 572,
|
||||
"author": "贾少卿",
|
||||
"rhythmic": "临江仙"
|
||||
},
|
||||
{
|
||||
"baidu": 2640,
|
||||
"so360": 77,
|
||||
"google": 1350,
|
||||
"bing": 844000,
|
||||
"author": "陆汉广",
|
||||
"rhythmic": "江城子"
|
||||
},
|
||||
{
|
||||
"baidu": 95,
|
||||
"so360": 39,
|
||||
"google": 1880000,
|
||||
"bing": 581000,
|
||||
"author": "王阜民",
|
||||
"rhythmic": "临江仙"
|
||||
},
|
||||
{
|
||||
"baidu": 61,
|
||||
"so360": 28,
|
||||
"google": 6240000,
|
||||
"bing": 678000,
|
||||
"author": "霍安人",
|
||||
"rhythmic": "感皇恩"
|
||||
},
|
||||
{
|
||||
"baidu": 73,
|
||||
"so360": 57,
|
||||
"google": 2270000,
|
||||
"bing": 16500,
|
||||
"author": "霍安人",
|
||||
"rhythmic": "醉蓬莱"
|
||||
},
|
||||
{
|
||||
"baidu": 86,
|
||||
"so360": 33,
|
||||
"google": 2750000,
|
||||
"bing": 13200,
|
||||
"author": "霍安人",
|
||||
"rhythmic": "满庭芳"
|
||||
},
|
||||
{
|
||||
"baidu": 64,
|
||||
"so360": 30,
|
||||
"google": 481000,
|
||||
"bing": 7550000,
|
||||
"author": "希叟",
|
||||
"rhythmic": "瑞鹤仙"
|
||||
},
|
||||
{
|
||||
"baidu": 2040,
|
||||
"so360": 4040000,
|
||||
"google": 27300,
|
||||
"bing": 4930000,
|
||||
"author": "沈元实",
|
||||
"rhythmic": "水调歌头"
|
||||
},
|
||||
{
|
||||
"baidu": 79000,
|
||||
"so360": 66,
|
||||
"google": 86400,
|
||||
"bing": 14500,
|
||||
"author": "游子蒙",
|
||||
"rhythmic": "满江红"
|
||||
},
|
||||
{
|
||||
"baidu": 79000,
|
||||
"so360": 66,
|
||||
"google": 86400,
|
||||
"bing": 14500,
|
||||
"author": "游子蒙",
|
||||
"rhythmic": "满江红"
|
||||
},
|
||||
{
|
||||
"baidu": 71,
|
||||
"so360": 31,
|
||||
"google": 66100,
|
||||
"bing": 402,
|
||||
"author": "贾逋",
|
||||
"rhythmic": "清平乐"
|
||||
},
|
||||
{
|
||||
"baidu": 97,
|
||||
"so360": 258,
|
||||
"google": 27700,
|
||||
"bing": 2750,
|
||||
"author": "吴文若",
|
||||
"rhythmic": "蝶恋花"
|
||||
},
|
||||
{
|
||||
"baidu": 290000,
|
||||
"so360": 508,
|
||||
"google": 7010000,
|
||||
"bing": 25400,
|
||||
"author": "去非",
|
||||
"rhythmic": "满庭芳"
|
||||
},
|
||||
{
|
||||
"baidu": 853,
|
||||
"so360": 33,
|
||||
"google": 6870000,
|
||||
"bing": 621,
|
||||
"author": "潘熊飞",
|
||||
"rhythmic": "南乡子"
|
||||
},
|
||||
{
|
||||
"baidu": 33200,
|
||||
"so360": 73,
|
||||
"google": 26900,
|
||||
"bing": 2350,
|
||||
"author": "黄庭佐",
|
||||
"rhythmic": "水调歌头"
|
||||
},
|
||||
{
|
||||
"baidu": 7100,
|
||||
"so360": 6330,
|
||||
"google": 238000,
|
||||
"bing": 63,
|
||||
"author": "赵□",
|
||||
"rhythmic": "失调名"
|
||||
},
|
||||
{
|
||||
"baidu": 11,
|
||||
"so360": 32,
|
||||
"google": 455000,
|
||||
"bing": 19400,
|
||||
"author": "吴氏3",
|
||||
"rhythmic": "失调名"
|
||||
},
|
||||
{
|
||||
"baidu": 90,
|
||||
"so360": 529,
|
||||
"google": 5410000,
|
||||
"bing": 13700,
|
||||
"author": "吴氏3",
|
||||
"rhythmic": "南乡子"
|
||||
},
|
||||
{
|
||||
"baidu": 90,
|
||||
"so360": 529,
|
||||
"google": 5410000,
|
||||
"bing": 13700,
|
||||
"author": "吴氏3",
|
||||
"rhythmic": "南乡子"
|
||||
},
|
||||
{
|
||||
"baidu": 176000,
|
||||
"so360": 157,
|
||||
"google": 10500000,
|
||||
"bing": 8820000,
|
||||
"author": "吴氏3",
|
||||
"rhythmic": "多丽"
|
||||
},
|
||||
{
|
||||
"baidu": 88,
|
||||
"so360": 604,
|
||||
"google": 344000,
|
||||
"bing": 55200,
|
||||
"author": "吴氏3",
|
||||
"rhythmic": "渔家傲"
|
||||
}
|
||||
]
|
||||
8002
rank/ci/ci.song.rank.3000.json
Normal file
8002
rank/ci/ci.song.rank.3000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.4000.json
Normal file
8002
rank/ci/ci.song.rank.4000.json
Normal file
File diff suppressed because it is too large
Load Diff
8000
rank/ci/ci.song.rank.5000.json
Normal file
8000
rank/ci/ci.song.rank.5000.json
Normal file
File diff suppressed because it is too large
Load Diff
8010
rank/ci/ci.song.rank.6000.json
Normal file
8010
rank/ci/ci.song.rank.6000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.7000.json
Normal file
8002
rank/ci/ci.song.rank.7000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.8000.json
Normal file
8002
rank/ci/ci.song.rank.8000.json
Normal file
File diff suppressed because it is too large
Load Diff
8002
rank/ci/ci.song.rank.9000.json
Normal file
8002
rank/ci/ci.song.rank.9000.json
Normal file
File diff suppressed because it is too large
Load Diff
410
rank/server.js
Normal file
410
rank/server.js
Normal file
@ -0,0 +1,410 @@
|
||||
let puppeteer = require('puppeteer');
|
||||
let fs = require('fs');
|
||||
let path = require('path');
|
||||
let sep = path.sep;
|
||||
let delayBaseTime = 3 * 1000;//延迟基础时间
|
||||
let delayMaxTime = 2 * 1000;
|
||||
//查询缓存,防止过多的使用同样的关键字查询搜索引擎被block
|
||||
let cache = Object.create(null);
|
||||
// let todayProxies = [
|
||||
// '218.60.8.99:3129',
|
||||
// '212.64.51.13:8888',
|
||||
// '125.62.27.53:3128'
|
||||
// ];
|
||||
let wait = second => new Promise(r => { setTimeout(r, second * 1000); });
|
||||
//使用chrome模拟访问
|
||||
let headless = {
|
||||
async before() {
|
||||
if (!this.$local) {
|
||||
let brower = await puppeteer.launch({
|
||||
headless: true,
|
||||
defaultViewport: {
|
||||
width: 1440,
|
||||
height: 900
|
||||
}
|
||||
});
|
||||
this.$local = brower;
|
||||
// let browsers = [brower];
|
||||
// for (let p of todayProxies) {
|
||||
// browsers.push(await puppeteer.launch({
|
||||
// headless: true,
|
||||
// defaultViewport: {
|
||||
// width: 1440,
|
||||
// height: 900
|
||||
// },
|
||||
// args: [
|
||||
// '--proxy-server=' + p
|
||||
// ]
|
||||
// }));
|
||||
// }
|
||||
// this.$browsers = browsers;
|
||||
// this.$current = 0;
|
||||
}
|
||||
},
|
||||
async toUrl(...urls) {
|
||||
let ps,
|
||||
retry = 3,
|
||||
newPages,
|
||||
imgReg = /\.(?:png|jpg|gif)$/i;
|
||||
let closePages = () => {
|
||||
if (newPages) {
|
||||
for (let p of newPages) {
|
||||
p.close();
|
||||
}
|
||||
}
|
||||
};
|
||||
let request = async () => {
|
||||
ps = [];
|
||||
let pages = [];
|
||||
let datas = [];
|
||||
let local = this.$local;
|
||||
// let proxy = this.$browsers[this.$current++];
|
||||
// if (this.$current >= this.$browsers.length) {
|
||||
// this.$current = 0;
|
||||
// }
|
||||
for (let url of urls) {
|
||||
let b = local;//url.startsWith('https://www.google.com') ? local : proxy;
|
||||
pages.push(b.newPage());
|
||||
}
|
||||
newPages = await Promise.all(pages);
|
||||
let start = 0;
|
||||
for (let page of newPages) {
|
||||
datas.push(page.goto(urls[start++], {
|
||||
waitUntil: 'networkidle0'
|
||||
}));
|
||||
}
|
||||
let newDatas = await Promise.all(datas);
|
||||
for (let d of newDatas) {
|
||||
ps.push(d.text());
|
||||
}
|
||||
};
|
||||
do {
|
||||
try {
|
||||
await request();
|
||||
break;
|
||||
} catch{
|
||||
console.log('retry', retry);
|
||||
retry--;
|
||||
closePages();
|
||||
}
|
||||
} while (retry);
|
||||
let returned = await Promise.all(ps);
|
||||
closePages();
|
||||
return returned;
|
||||
},
|
||||
after() {
|
||||
if (this.$local) {
|
||||
this.$local.close();
|
||||
delete this.$local;
|
||||
// for (let b of this.$browsers) {
|
||||
// b.close();
|
||||
// }
|
||||
// delete this.$browsers;
|
||||
}
|
||||
}
|
||||
};
|
||||
let rank = {
|
||||
remote(kd) {
|
||||
return new Promise(async r => {
|
||||
await headless.before();
|
||||
let bd = `https://www.baidu.com/s?wd=${kd}&rsv_spt=1`;
|
||||
let so = `https://www.so.com/s?q=${kd}`;
|
||||
let google = `https://www.google.com/search?q=${kd}`;
|
||||
//let google = `https://www.googlebridge.com/search?q=${kd}`;
|
||||
let bing = `https://cn.bing.com/search?q=${kd}&FORM=BESBTB`;
|
||||
let texts = await headless.toUrl(bd, so, google, bing);
|
||||
let regs = [
|
||||
/百度为您找到相关结果约([\d,]+)个/,
|
||||
/找到相关结果约([\d,]+)个/,
|
||||
/(?:找到约|About)\s*([\d,]+)\s*(?:条结果|results)/,
|
||||
/<span\s+class="sb_count">([\d,]+)\s+(?:results|条结果)<\/span>/
|
||||
];
|
||||
let nums = [0, 0, 0, 0];
|
||||
for (let i = texts.length; i--;) {
|
||||
let text = texts[i];
|
||||
text = text.replace(/<(script|style)[^>]*>[\s\S]*?<\/\1>/g, '');
|
||||
text.replace(regs[i], (_, m) => {
|
||||
nums[i] = parseInt(m.replace(/,/g, ''), 10);
|
||||
});
|
||||
// if (nums[i] === 0) {
|
||||
// console.log(text);
|
||||
// }
|
||||
}
|
||||
let [bdNum, soNum, googleNum, bingNum] = nums;
|
||||
r({
|
||||
baidu: bdNum,
|
||||
so360: soNum,
|
||||
google: googleNum,
|
||||
bing: bingNum
|
||||
});
|
||||
});
|
||||
},
|
||||
read(file) {
|
||||
let c = fs.readFileSync(file);
|
||||
return c.toString();
|
||||
},
|
||||
list(folder, callback) {
|
||||
if (fs.existsSync(folder)) {
|
||||
let files = fs.readdirSync(folder);
|
||||
files.forEach(file => {
|
||||
let p = folder + sep + file;
|
||||
let stat = fs.lstatSync(p);
|
||||
if (stat.isDirectory()) {
|
||||
walk(p, callback);
|
||||
} else {
|
||||
callback(p);
|
||||
}
|
||||
});
|
||||
}
|
||||
},
|
||||
write(to, content) {
|
||||
let folders = path.dirname(to).split(sep);
|
||||
let p = '';
|
||||
while (folders.length) {
|
||||
p += folders.shift() + sep;
|
||||
if (!fs.existsSync(p)) {
|
||||
fs.mkdirSync(p);
|
||||
}
|
||||
}
|
||||
fs.writeFileSync(to, content);
|
||||
}
|
||||
};
|
||||
let ciTask = async () => {
|
||||
return new Promise(resolve => {
|
||||
let ciReg = /ci\.song\.\d+\.json$/;
|
||||
let ciRankReg = /ci\.song\.rank\.\d+\.json$/;
|
||||
let readList = {};
|
||||
let exist = {};
|
||||
rank.list('./ci', f => {
|
||||
if (ciRankReg.test(f)) {
|
||||
exist[path.basename(f).replace('ci.song.rank', 'ci.song')] = 1;
|
||||
}
|
||||
});
|
||||
rank.list('../ci', f => {
|
||||
if (ciReg.test(f)) {
|
||||
let base = path.basename(f);
|
||||
if (!exist[base]) {
|
||||
readList[path.resolve(f)] = 1;
|
||||
}
|
||||
}
|
||||
});
|
||||
if (fs.existsSync('./s.cache')) {
|
||||
let c = rank.read('./s.cache');
|
||||
let j = JSON.parse(c);
|
||||
for (let p in j) {
|
||||
let a = j[p];
|
||||
for (let z in a) {
|
||||
if (a[z] === 0) {
|
||||
delete j[p];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Object.assign(cache, j);
|
||||
}
|
||||
let loadList = Object.keys(readList);
|
||||
|
||||
let singleWork = file => {
|
||||
let ranks = [];
|
||||
let zeros = [];
|
||||
return new Promise(resolve => {
|
||||
let list = JSON.parse(rank.read(file));
|
||||
let start = 0;
|
||||
let task = async () => {
|
||||
if (start < list.length) {
|
||||
let r = list[start];
|
||||
let kd = encodeURIComponent(`${r.author} ${r.rhythmic}`);
|
||||
let data,
|
||||
delay = 0,
|
||||
zeroRetry = 3;
|
||||
if (cache[kd]) {
|
||||
data = cache[kd];
|
||||
} else {
|
||||
do {
|
||||
data = await rank.remote(kd);
|
||||
let hasZero = false;
|
||||
for (let p in data) {
|
||||
if (data[p] === 0) {
|
||||
hasZero = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!hasZero) {
|
||||
zeroRetry = 0;
|
||||
} else {
|
||||
console.log('has zero retry', zeroRetry);
|
||||
await wait(3);
|
||||
zeroRetry--;
|
||||
}
|
||||
} while (zeroRetry);
|
||||
data.author = r.author;
|
||||
data.rhythmic = r.rhythmic;
|
||||
cache[kd] = data;
|
||||
delay = delayBaseTime + delayMaxTime * Math.random();
|
||||
}
|
||||
for (let p in data) {
|
||||
if (data[p] === 0) {
|
||||
zeros.push(data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start && ((start % 5) === 0)) {
|
||||
rank.write('./s.cache', JSON.stringify(cache));
|
||||
}
|
||||
ranks.push(data);
|
||||
console.log(start + '/' + list.length);
|
||||
start++;
|
||||
setTimeout(task, delay);
|
||||
} else {
|
||||
rank.write('./s.cache', JSON.stringify(cache));
|
||||
resolve([ranks, zeros]);
|
||||
}
|
||||
};
|
||||
task();
|
||||
});
|
||||
};
|
||||
let work = async list => {
|
||||
let one = list.pop();
|
||||
console.log('remain ', list.length);
|
||||
if (one) {
|
||||
let f = path.basename(one);
|
||||
let aim = f.replace('ci.song', 'ci.song.rank');
|
||||
let zeroAim = f.replace('ci.song', 'ci.song.zero');
|
||||
let [ranks, zeros] = await singleWork(one);
|
||||
rank.write('./ci/' + aim, JSON.stringify(ranks, null, 4));
|
||||
if (zeros.length) {
|
||||
rank.write('./ci/' + zeroAim, JSON.stringify(zeros, null, 4));
|
||||
}
|
||||
cache = Object.create(null);//文件写入后,清理缓存
|
||||
work(list);
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
work(loadList);
|
||||
});
|
||||
};
|
||||
let poetTask = async () => {
|
||||
return new Promise(resolve => {
|
||||
let poetReg = /poet\.(?:song|tang)\.\d+\.json$/;
|
||||
let poetRankReg = /poet\.(?:song|tang)\.rank\.\d+\.json$/;
|
||||
let readList = {};
|
||||
let exist = {};
|
||||
rank.list('./poet', f => {
|
||||
if (poetRankReg.test(f)) {
|
||||
exist[path.basename(f).replace(/poet\.(song|tang)\.rank/, 'poet.$1')] = 1;
|
||||
}
|
||||
});
|
||||
rank.list('../json', f => {
|
||||
if (poetReg.test(f)) {
|
||||
let base = path.basename(f);
|
||||
if (!exist[base]) {
|
||||
readList[path.resolve(f)] = 1;
|
||||
}
|
||||
}
|
||||
});
|
||||
if (fs.existsSync('./s.cache')) {
|
||||
let c = rank.read('./s.cache');
|
||||
let j = JSON.parse(c);
|
||||
for (let p in j) {
|
||||
let a = j[p];
|
||||
for (let z in a) {
|
||||
if (a[z] === 0) {
|
||||
delete j[p];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Object.assign(cache, j);
|
||||
}
|
||||
let loadList = Object.keys(readList);
|
||||
|
||||
let singleWork = file => {
|
||||
let ranks = [];
|
||||
let zeros = [];
|
||||
return new Promise(resolve => {
|
||||
let list = JSON.parse(rank.read(file));
|
||||
let start = 0;
|
||||
let task = async () => {
|
||||
if (start < list.length) {
|
||||
let r = list[start];
|
||||
let kd = encodeURIComponent(`${r.author} ${r.title}`);
|
||||
let data,
|
||||
delay = 0,
|
||||
zeroRetry = 3;
|
||||
if (cache[kd]) {
|
||||
data = cache[kd];
|
||||
} else {
|
||||
do {
|
||||
data = await rank.remote(kd);
|
||||
let hasZero = false;
|
||||
// for (let p in data) {
|
||||
// if (data[p] === 0) {
|
||||
// hasZero = true;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//baidu有时候查询不到,需要再次查询
|
||||
hasZero = data.baidu === 0;
|
||||
if (!hasZero) {
|
||||
zeroRetry = 0;
|
||||
} else {
|
||||
console.log('baidu zero result, retry', zeroRetry);
|
||||
await wait(5 + Math.random() * 10);
|
||||
zeroRetry--;
|
||||
}
|
||||
} while (zeroRetry);
|
||||
data.author = r.author;
|
||||
data.title = r.title;
|
||||
cache[kd] = data;
|
||||
delay = delayBaseTime + delayMaxTime * Math.random();
|
||||
}
|
||||
for (let p in data) {
|
||||
if (data[p] === 0) {
|
||||
zeros.push(data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (start && ((start % 5) === 0)) {
|
||||
rank.write('./s.cache', JSON.stringify(cache));
|
||||
}
|
||||
ranks.push(data);
|
||||
console.log(start + '/' + list.length);
|
||||
start++;
|
||||
setTimeout(task, delay);
|
||||
} else {
|
||||
rank.write('./s.cache', JSON.stringify(cache));
|
||||
resolve([ranks, zeros]);
|
||||
}
|
||||
};
|
||||
task();
|
||||
});
|
||||
};
|
||||
let work = async list => {
|
||||
let one = list.pop();
|
||||
console.log('remain ', list.length);
|
||||
if (one) {
|
||||
let f = path.basename(one);
|
||||
let aim = f.replace(/poet\.(song|tang)/, 'poet.$1.rank');
|
||||
let zeroAim = f.replace(/poet\.(song|tang)/, 'poet.$1.zero');
|
||||
let [ranks, zeros] = await singleWork(one);
|
||||
rank.write('./poet/' + aim, JSON.stringify(ranks, null, 4));
|
||||
if (zeros.length) {
|
||||
rank.write('./poet/' + zeroAim, JSON.stringify(zeros, null, 4));
|
||||
}
|
||||
cache = Object.create(null);//文件写入后,清理缓存
|
||||
work(list);
|
||||
} else {
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
work(loadList);
|
||||
});
|
||||
};
|
||||
(async () => {
|
||||
await ciTask();
|
||||
//await poetTask();
|
||||
headless.after();
|
||||
fs.unlink('./s.cache');
|
||||
console.log('complete');
|
||||
})();
|
||||
Reference in New Issue
Block a user