Update extractor script

This commit is contained in:
Angelos Chalaris
2019-08-13 11:39:11 +03:00
parent 233815cc22
commit 7c84eea7cb
11 changed files with 5160 additions and 2954 deletions

View File

@ -7,88 +7,151 @@ const fs = require('fs-extra');
const path = require('path');
const { green } = require('kleur');
const util = require('./util');
const config = require('../config');
// Paths
const SNIPPETS_PATH = './snippets';
const SNIPPETS_ARCHIVE_PATH = './snippets_archive';
const OUTPUT_PATH = './snippet_data';
// Check if running on Travis - only build for cron jobs and custom builds
const SNIPPETS_PATH = `./${config.snippetPath}`;
const SNIPPETS_ARCHIVE_PATH = `./${config.snippetArchivePath}`;
const GLOSSARY_PATH = `./${config.glossaryPath}`;
const OUTPUT_PATH = `./${config.snippetDataPath}`;
// Check if running on Travis, only build for cron jobs and custom builds
if (
util.isTravisCI() &&
process.env['TRAVIS_EVENT_TYPE'] !== 'cron' &&
process.env['TRAVIS_EVENT_TYPE'] !== 'api'
) {
console.log(`${green('NOBUILD')} snippet extraction terminated, not a cron or api build!`);
console.log(
`${green(
'NOBUILD',
)} snippet extraction terminated, not a cron or api build!`,
);
process.exit(0);
}
// Read data
let snippets = {},
archivedSnippets = {},
tagDbData = {};
console.time('Extractor');
snippets = util.readSnippets(SNIPPETS_PATH);
archivedSnippets = util.readSnippets(SNIPPETS_ARCHIVE_PATH);
tagDbData = util.readTags();
// Extract snippet data
let snippetData = Object.keys(snippets).map(key => {
return {
id: key.slice(0, -3),
type: 'snippet',
attributes: {
fileName: key,
text: util.getTextualContent(snippets[key]).trim(),
codeBlocks: util.getCodeBlocks(snippets[key]),
tags: tagDbData[key.slice(0, -3)]
},
meta: {
archived: false,
hash: util.hashData(snippets[key])
}
};
});
// Extract archived snippet data
let snippetArchiveData = Object.keys(archivedSnippets).map(key => {
return {
id: key.slice(0, -3),
type: 'snippet',
attributes: {
fileName: key,
text: util.getTextualContent(archivedSnippets[key]).trim(),
codeBlocks: util.getCodeBlocks(archivedSnippets[key]),
tags: []
},
meta: {
archived: true,
hash: util.hashData(archivedSnippets[key])
}
};
});
const completeData = {
data: [...snippetData, ...snippetArchiveData],
meta: {
specification: 'http://jsonapi.org/format/'
}
};
let listingData = {
data:
completeData.data.map(v => ({
id: v.id,
type: 'snippetListing',
attributes: {
tags: v.attributes.tags,
archived: v.meta.archived
},
meta: {
hash: v.meta.hash
}
})),
// Setup everything
let snippets = {},
snippetsArray = [],
archivedSnippets = {},
archivedSnippetsArray = [],
glossarySnippets = {},
glossarySnippetsArray = [];
console.time('Extractor');
// Synchronously read all snippets from snippets, snippets_archive and glossary folders and sort them as necessary (case-insensitive)
snippets = util.readSnippets(SNIPPETS_PATH);
snippetsArray = Object.keys(snippets).reduce((acc, key) => {
acc.push(snippets[key]);
return acc;
}, []);
archivedSnippets = util.readSnippets(SNIPPETS_ARCHIVE_PATH);
archivedSnippetsArray = Object.keys(archivedSnippets).reduce((acc, key) => {
acc.push(archivedSnippets[key]);
return acc;
}, []);
glossarySnippets = util.readSnippets(GLOSSARY_PATH);
glossarySnippetsArray = Object.keys(glossarySnippets).reduce((acc, key) => {
acc.push(glossarySnippets[key]);
return acc;
}, []);
const completeData = {
data: [...snippetsArray],
meta: {
specification: 'http://jsonapi.org/format/'
specification: 'http://jsonapi.org/format/',
type: 'snippetArray',
},
};
const listingData = {
data: completeData.data.map(v => ({
id: v.id,
type: 'snippetListing',
title: v.title,
attributes: {
text: v.attributes.text,
tags: v.attributes.tags,
},
meta: {
hash: v.meta.hash,
},
})),
meta: {
specification: 'http://jsonapi.org/format/',
type: 'snippetListingArray',
},
};
const archiveCompleteData = {
data: [...archivedSnippetsArray],
meta: {
specification: 'http://jsonapi.org/format/',
type: 'snippetArray',
}
};
const archiveListingData = {
data: archiveCompleteData.data.map(v => ({
id: v.id,
type: 'snippetListing',
title: v.title,
attributes: {
text: v.attributes.text,
tags: v.attributes.tags,
},
meta: {
hash: v.meta.hash,
},
})),
meta: {
specification: 'http://jsonapi.org/format/',
type: 'snippetListingArray',
},
};
const glossaryData = {
data: glossarySnippetsArray.map(v => ({
id: v.id,
type: 'glossaryTerm',
title: v.title,
attributes: {
text: v.attributes.text,
tags: v.attributes.tags,
},
meta: {
hash: v.meta.hash,
},
})),
meta: {
specification: 'http://jsonapi.org/format/',
type: 'glossaryTermArray',
},
};
// Write files
fs.writeFileSync(path.join(OUTPUT_PATH, 'snippets.json'), JSON.stringify(completeData, null, 2));
fs.writeFileSync(path.join(OUTPUT_PATH, 'snippetList.json'), JSON.stringify(listingData, null, 2));
fs.writeFileSync(
path.join(OUTPUT_PATH, 'snippets.json'),
JSON.stringify(completeData, null, 2),
);
fs.writeFileSync(
path.join(OUTPUT_PATH, 'snippetList.json'),
JSON.stringify(listingData, null, 2),
);
fs.writeFileSync(
path.join(OUTPUT_PATH, 'archivedSnippets.json'),
JSON.stringify(archiveCompleteData, null, 2),
);
fs.writeFileSync(
path.join(OUTPUT_PATH, 'archivedSnippetList.json'),
JSON.stringify(archiveListingData, null, 2),
);
fs.writeFileSync(
path.join(OUTPUT_PATH, 'glossaryTerms.json'),
JSON.stringify(glossaryData, null, 2),
);
// Display messages and time
console.log(`${green('SUCCESS!')} snippets.json and snippetList.json files generated!`);
console.log(`${green('SUCCESS!')} JSON data files generated!`);
console.timeEnd('Extractor');

View File

@ -0,0 +1,12 @@
// Checks if current environment is Travis CI, Cron builds, API builds
const isTravisCI = () => 'TRAVIS' in process.env && 'CI' in process.env;
const isTravisCronOrAPI = () =>
process.env['TRAVIS_EVENT_TYPE'] === 'cron' ||
process.env['TRAVIS_EVENT_TYPE'] === 'api';
const isNotTravisCronOrAPI = () => !isTravisCronOrAPI();
module.exports = {
isTravisCI,
isTravisCronOrAPI,
isNotTravisCronOrAPI,
};

60
scripts/util/helpers.js Normal file
View File

@ -0,0 +1,60 @@
const config = require('../../config');
const getMarkDownAnchor = paragraphTitle =>
paragraphTitle
.trim()
.toLowerCase()
.replace(/[^\w\- ]+/g, '')
.replace(/\s/g, '-')
.replace(/\-+$/, '');
// Creates an object from pairs
const objectFromPairs = arr => arr.reduce((a, v) => ((a[v[0]] = v[1]), a), {});
// Optimizes nodes in an HTML document
const optimizeNodes = (data, regexp, replacer) => {
let count = 0;
let output = data;
do {
output = output.replace(regexp, replacer);
count = 0;
while (regexp.exec(output) !== null) ++count;
} while (count > 0);
return output;
};
// Capitalizes the first letter of a string
const capitalize = (str, lowerRest = false) =>
str.slice(0, 1).toUpperCase() +
(lowerRest ? str.slice(1).toLowerCase() : str.slice(1));
const prepTaggedData = tagDbData =>
[...new Set(Object.entries(tagDbData).map(t => t[1][0]))]
.filter(v => v)
.sort((a, b) =>
capitalize(a, true) === 'Uncategorized'
? 1
: capitalize(b, true) === 'Uncategorized'
? -1
: a.localeCompare(b),
);
const makeExamples = data => {
data =
data.slice(0, data.lastIndexOf(`\`\`\`${config.language}`)).trim() +
misc.collapsible(
'Examples',
data.slice(
data.lastIndexOf(`\`\`\`${config.language}`),
data.lastIndexOf('```'),
) + data.slice(data.lastIndexOf('```')),
);
return `${data}\n<br>${misc.link(
'⬆ Back to top',
misc.anchor('Contents'),
)}\n\n`;
};
module.exports = {
getMarkDownAnchor,
objectFromPairs,
optimizeNodes,
capitalize,
prepTaggedData,
makeExamples,
};

37
scripts/util/index.js Normal file
View File

@ -0,0 +1,37 @@
const {
isTravisCI,
isTravisCronOrAPI,
isNotTravisCronOrAPI,
} = require('./environmentCheck');
const {
getMarkDownAnchor,
objectFromPairs,
optimizeNodes,
capitalize,
prepTaggedData,
makeExamples,
} = require('./helpers');
const {
getFilesInDir,
hashData,
getCodeBlocks,
getTextualContent,
readSnippets,
} = require('./snippetParser');
module.exports = {
isTravisCI,
isTravisCronOrAPI,
isNotTravisCronOrAPI,
getMarkDownAnchor,
objectFromPairs,
optimizeNodes,
capitalize,
prepTaggedData,
makeExamples,
getFilesInDir,
hashData,
getCodeBlocks,
getTextualContent,
readSnippets,
};

View File

@ -0,0 +1,121 @@
const fs = require('fs-extra'),
path = require('path'),
{ red } = require('kleur'),
crypto = require('crypto'),
frontmatter = require('front-matter'),
babel = require('@babel/core');
const config = require('../../config');
// Reade all files in a directory
const getFilesInDir = (directoryPath, withPath, exclude = null) => {
try {
let directoryFilenames = fs.readdirSync(directoryPath);
directoryFilenames.sort((a, b) => {
a = a.toLowerCase();
b = b.toLowerCase();
if (a < b) return -1;
if (a > b) return 1;
return 0;
});
if (withPath) {
// a hacky way to do conditional array.map
return directoryFilenames.reduce((fileNames, fileName) => {
if (
exclude == null ||
!exclude.some(toExclude => fileName === toExclude)
)
fileNames.push(`${directoryPath}/${fileName}`);
return fileNames;
}, []);
}
return directoryFilenames.filter(v => v !== 'README.md');
} catch (err) {
console.log(`${red('ERROR!')} During snippet loading: ${err}`);
process.exit(1);
}
};
// Creates a hash for a value using the SHA-256 algorithm.
const hashData = val =>
crypto
.createHash('sha256')
.update(val)
.digest('hex');
// Gets the code blocks for a snippet file.
const getCodeBlocks = str => {
const regex = /```[.\S\s]*?```/g;
let results = [];
let m = null;
while ((m = regex.exec(str)) !== null) {
if (m.index === regex.lastIndex) regex.lastIndex += 1;
m.forEach((match, groupIndex) => {
results.push(match);
});
}
const replacer = new RegExp(
`\`\`\`${config.language}([\\s\\S]*?)\`\`\``,
'g',
);
results = results.map(v => v.replace(replacer, '$1').trim());
return {
es6: results[0],
es5: babel.transformSync(results[0], { presets: ['@babel/preset-env'] }).code.replace('"use strict";\n\n', ''),
example: results[1],
};
};
// Gets the textual content for a snippet file.
const getTextualContent = str => {
const regex = /([\s\S]*?)```/g;
const results = [];
let m = null;
while ((m = regex.exec(str)) !== null) {
if (m.index === regex.lastIndex) regex.lastIndex += 1;
m.forEach((match, groupIndex) => {
results.push(match);
});
}
if (!results.length) return str.replace(/\r\n/g, '\n');
return results[1].replace(/\r\n/g, '\n');
};
// Synchronously read all snippets and sort them as necessary (case-insensitive)
const readSnippets = snippetsPath => {
const snippetFilenames = getFilesInDir(snippetsPath, false);
let snippets = {};
try {
for (let snippet of snippetFilenames) {
let data = frontmatter(
fs.readFileSync(path.join(snippetsPath, snippet), 'utf8'),
);
snippets[snippet] = {
id: snippet.slice(0, -3),
title: data.attributes.title,
type: 'snippet',
attributes: {
fileName: snippet,
text: getTextualContent(data.body),
codeBlocks: getCodeBlocks(data.body),
tags: data.attributes.tags.split(',').map(t => t.trim()),
},
meta: {
hash: hashData(data.body),
},
};
}
} catch (err) {
console.log(`${red('ERROR!')} During snippet loading: ${err}`);
process.exit(1);
}
return snippets;
};
module.exports = {
getFilesInDir,
hashData,
getCodeBlocks,
getTextualContent,
readSnippets,
};