Update extractor script

2019-08-13 11:39:11 +03:00
parent 233815cc22
commit 7c84eea7cb
11 changed files with 5160 additions and 2954 deletions
--- a/scripts/extract.js
+++ b/scripts/extract.js
@ -7,88 +7,151 @@ const fs = require('fs-extra');
 const path = require('path');
 const { green } = require('kleur');
 const util = require('./util');
+const config = require('../config');
+
 // Paths
-const SNIPPETS_PATH = './snippets';
-const SNIPPETS_ARCHIVE_PATH = './snippets_archive';
-const OUTPUT_PATH = './snippet_data';
-// Check if running on Travis - only build for cron jobs and custom builds
+const SNIPPETS_PATH = `./${config.snippetPath}`;
+const SNIPPETS_ARCHIVE_PATH = `./${config.snippetArchivePath}`;
+const GLOSSARY_PATH = `./${config.glossaryPath}`;
+const OUTPUT_PATH = `./${config.snippetDataPath}`;
+
+// Check if running on Travis, only build for cron jobs and custom builds
 if (
  util.isTravisCI() &&
  process.env['TRAVIS_EVENT_TYPE'] !== 'cron' &&
  process.env['TRAVIS_EVENT_TYPE'] !== 'api'
 ) {
-  console.log(`${green('NOBUILD')} snippet extraction terminated, not a cron or api build!`);
+  console.log(
+    `${green(
+      'NOBUILD',
+    )} snippet extraction terminated, not a cron or api build!`,
+  );
  process.exit(0);
 }
-// Read data
-let snippets = {},
-  archivedSnippets = {},
-  tagDbData = {};
-console.time('Extractor');
-snippets = util.readSnippets(SNIPPETS_PATH);
-archivedSnippets = util.readSnippets(SNIPPETS_ARCHIVE_PATH);
-tagDbData = util.readTags();
-// Extract snippet data
-let snippetData = Object.keys(snippets).map(key => {
-  return {
-    id: key.slice(0, -3),
-    type: 'snippet',
-    attributes: {
-      fileName: key,
-      text: util.getTextualContent(snippets[key]).trim(),
-      codeBlocks: util.getCodeBlocks(snippets[key]),
-      tags: tagDbData[key.slice(0, -3)]
-    },
-    meta: {
-      archived: false,
-      hash: util.hashData(snippets[key])
-    }
-  };
-});
-// Extract archived snippet data
-let snippetArchiveData = Object.keys(archivedSnippets).map(key => {
-  return {
-    id: key.slice(0, -3),
-    type: 'snippet',
-    attributes: {
-      fileName: key,
-      text: util.getTextualContent(archivedSnippets[key]).trim(),
-      codeBlocks: util.getCodeBlocks(archivedSnippets[key]),
-      tags: []
-    },
-    meta: {
-      archived: true,
-      hash: util.hashData(archivedSnippets[key])
-    }
-  };
-});
-const completeData = {
-  data: [...snippetData, ...snippetArchiveData],
-  meta: {
-    specification: 'http://jsonapi.org/format/'
-  }
-};
-let listingData = {
-  data:
-    completeData.data.map(v => ({
-      id: v.id,
-      type: 'snippetListing',
-      attributes: {
-        tags: v.attributes.tags,
-        archived: v.meta.archived
-      },
-      meta: {
-        hash: v.meta.hash
-      }
-    })),

+// Setup everything
+let snippets = {},
+  snippetsArray = [],
+  archivedSnippets = {},
+  archivedSnippetsArray = [],
+  glossarySnippets = {},
+  glossarySnippetsArray = [];
+console.time('Extractor');
+
+// Synchronously read all snippets from snippets, snippets_archive and glossary folders and sort them as necessary (case-insensitive)
+snippets = util.readSnippets(SNIPPETS_PATH);
+snippetsArray = Object.keys(snippets).reduce((acc, key) => {
+  acc.push(snippets[key]);
+  return acc;
+}, []);
+
+archivedSnippets = util.readSnippets(SNIPPETS_ARCHIVE_PATH);
+archivedSnippetsArray = Object.keys(archivedSnippets).reduce((acc, key) => {
+  acc.push(archivedSnippets[key]);
+  return acc;
+}, []);
+
+glossarySnippets = util.readSnippets(GLOSSARY_PATH);
+glossarySnippetsArray = Object.keys(glossarySnippets).reduce((acc, key) => {
+  acc.push(glossarySnippets[key]);
+  return acc;
+}, []);
+
+const completeData = {
+  data: [...snippetsArray],
  meta: {
-    specification: 'http://jsonapi.org/format/'
+    specification: 'http://jsonapi.org/format/',
+    type: 'snippetArray',
+  },
+};
+const listingData = {
+  data: completeData.data.map(v => ({
+    id: v.id,
+    type: 'snippetListing',
+    title: v.title,
+    attributes: {
+      text: v.attributes.text,
+      tags: v.attributes.tags,
+    },
+    meta: {
+      hash: v.meta.hash,
+    },
+  })),
+  meta: {
+    specification: 'http://jsonapi.org/format/',
+    type: 'snippetListingArray',
+  },
+};
+
+const archiveCompleteData = {
+  data: [...archivedSnippetsArray],
+  meta: {
+    specification: 'http://jsonapi.org/format/',
+    type: 'snippetArray',
  }
 };
+const archiveListingData = {
+  data: archiveCompleteData.data.map(v => ({
+    id: v.id,
+    type: 'snippetListing',
+    title: v.title,
+    attributes: {
+      text: v.attributes.text,
+      tags: v.attributes.tags,
+    },
+    meta: {
+      hash: v.meta.hash,
+    },
+  })),
+  meta: {
+    specification: 'http://jsonapi.org/format/',
+    type: 'snippetListingArray',
+  },
+};
+
+const glossaryData = {
+  data: glossarySnippetsArray.map(v => ({
+    id: v.id,
+    type: 'glossaryTerm',
+    title: v.title,
+    attributes: {
+      text: v.attributes.text,
+      tags: v.attributes.tags,
+    },
+    meta: {
+      hash: v.meta.hash,
+    },
+  })),
+  meta: {
+    specification: 'http://jsonapi.org/format/',
+    type: 'glossaryTermArray',
+  },
+};
+
 // Write files
-fs.writeFileSync(path.join(OUTPUT_PATH, 'snippets.json'), JSON.stringify(completeData, null, 2));
-fs.writeFileSync(path.join(OUTPUT_PATH, 'snippetList.json'), JSON.stringify(listingData, null, 2));
+fs.writeFileSync(
+  path.join(OUTPUT_PATH, 'snippets.json'),
+  JSON.stringify(completeData, null, 2),
+);
+fs.writeFileSync(
+  path.join(OUTPUT_PATH, 'snippetList.json'),
+  JSON.stringify(listingData, null, 2),
+);
+
+fs.writeFileSync(
+  path.join(OUTPUT_PATH, 'archivedSnippets.json'),
+  JSON.stringify(archiveCompleteData, null, 2),
+);
+fs.writeFileSync(
+  path.join(OUTPUT_PATH, 'archivedSnippetList.json'),
+  JSON.stringify(archiveListingData, null, 2),
+);
+
+fs.writeFileSync(
+  path.join(OUTPUT_PATH, 'glossaryTerms.json'),
+  JSON.stringify(glossaryData, null, 2),
+);
+
 // Display messages and time
-console.log(`${green('SUCCESS!')} snippets.json and snippetList.json files generated!`);
+console.log(`${green('SUCCESS!')} JSON data files generated!`);
 console.timeEnd('Extractor');
--- a/scripts/util/environmentCheck.js
+++ b/scripts/util/environmentCheck.js
@ -0,0 +1,12 @@
+// Checks if current environment is Travis CI, Cron builds, API builds
+const isTravisCI = () => 'TRAVIS' in process.env && 'CI' in process.env;
+const isTravisCronOrAPI = () =>
+  process.env['TRAVIS_EVENT_TYPE'] === 'cron' ||
+  process.env['TRAVIS_EVENT_TYPE'] === 'api';
+const isNotTravisCronOrAPI = () => !isTravisCronOrAPI();
+
+module.exports = {
+  isTravisCI,
+  isTravisCronOrAPI,
+  isNotTravisCronOrAPI,
+};
--- a/scripts/util/helpers.js
+++ b/scripts/util/helpers.js
@ -0,0 +1,60 @@
+const config = require('../../config');
+
+const getMarkDownAnchor = paragraphTitle =>
+  paragraphTitle
+    .trim()
+    .toLowerCase()
+    .replace(/[^\w\- ]+/g, '')
+    .replace(/\s/g, '-')
+    .replace(/\-+$/, '');
+// Creates an object from pairs
+const objectFromPairs = arr => arr.reduce((a, v) => ((a[v[0]] = v[1]), a), {});
+// Optimizes nodes in an HTML document
+const optimizeNodes = (data, regexp, replacer) => {
+  let count = 0;
+  let output = data;
+  do {
+    output = output.replace(regexp, replacer);
+    count = 0;
+    while (regexp.exec(output) !== null) ++count;
+  } while (count > 0);
+  return output;
+};
+// Capitalizes the first letter of a string
+const capitalize = (str, lowerRest = false) =>
+  str.slice(0, 1).toUpperCase() +
+  (lowerRest ? str.slice(1).toLowerCase() : str.slice(1));
+const prepTaggedData = tagDbData =>
+  [...new Set(Object.entries(tagDbData).map(t => t[1][0]))]
+    .filter(v => v)
+    .sort((a, b) =>
+      capitalize(a, true) === 'Uncategorized'
+        ? 1
+        : capitalize(b, true) === 'Uncategorized'
+        ? -1
+        : a.localeCompare(b),
+    );
+const makeExamples = data => {
+  data =
+    data.slice(0, data.lastIndexOf(`\`\`\`${config.language}`)).trim() +
+    misc.collapsible(
+      'Examples',
+      data.slice(
+        data.lastIndexOf(`\`\`\`${config.language}`),
+        data.lastIndexOf('```'),
+      ) + data.slice(data.lastIndexOf('```')),
+    );
+  return `${data}\n<br>${misc.link(
+    '⬆ Back to top',
+    misc.anchor('Contents'),
+  )}\n\n`;
+};
+
+module.exports = {
+  getMarkDownAnchor,
+  objectFromPairs,
+  optimizeNodes,
+  capitalize,
+  prepTaggedData,
+  makeExamples,
+};
--- a/scripts/util/index.js
+++ b/scripts/util/index.js
@ -0,0 +1,37 @@
+const {
+  isTravisCI,
+  isTravisCronOrAPI,
+  isNotTravisCronOrAPI,
+} = require('./environmentCheck');
+const {
+  getMarkDownAnchor,
+  objectFromPairs,
+  optimizeNodes,
+  capitalize,
+  prepTaggedData,
+  makeExamples,
+} = require('./helpers');
+const {
+  getFilesInDir,
+  hashData,
+  getCodeBlocks,
+  getTextualContent,
+  readSnippets,
+} = require('./snippetParser');
+
+module.exports = {
+  isTravisCI,
+  isTravisCronOrAPI,
+  isNotTravisCronOrAPI,
+  getMarkDownAnchor,
+  objectFromPairs,
+  optimizeNodes,
+  capitalize,
+  prepTaggedData,
+  makeExamples,
+  getFilesInDir,
+  hashData,
+  getCodeBlocks,
+  getTextualContent,
+  readSnippets,
+};
--- a/scripts/util/snippetParser.js
+++ b/scripts/util/snippetParser.js
@ -0,0 +1,121 @@
+const fs = require('fs-extra'),
+  path = require('path'),
+  { red } = require('kleur'),
+  crypto = require('crypto'),
+  frontmatter = require('front-matter'),
+  babel = require('@babel/core');
+const config = require('../../config');
+
+// Reade all files in a directory
+const getFilesInDir = (directoryPath, withPath, exclude = null) => {
+  try {
+    let directoryFilenames = fs.readdirSync(directoryPath);
+    directoryFilenames.sort((a, b) => {
+      a = a.toLowerCase();
+      b = b.toLowerCase();
+      if (a < b) return -1;
+      if (a > b) return 1;
+      return 0;
+    });
+
+    if (withPath) {
+      // a hacky way to do conditional array.map
+      return directoryFilenames.reduce((fileNames, fileName) => {
+        if (
+          exclude == null ||
+          !exclude.some(toExclude => fileName === toExclude)
+        )
+          fileNames.push(`${directoryPath}/${fileName}`);
+        return fileNames;
+      }, []);
+    }
+    return directoryFilenames.filter(v => v !== 'README.md');
+  } catch (err) {
+    console.log(`${red('ERROR!')} During snippet loading: ${err}`);
+    process.exit(1);
+  }
+};
+// Creates a hash for a value using the SHA-256 algorithm.
+const hashData = val =>
+  crypto
+    .createHash('sha256')
+    .update(val)
+    .digest('hex');
+// Gets the code blocks for a snippet file.
+const getCodeBlocks = str => {
+  const regex = /```[.\S\s]*?```/g;
+  let results = [];
+  let m = null;
+  while ((m = regex.exec(str)) !== null) {
+    if (m.index === regex.lastIndex) regex.lastIndex += 1;
+
+    m.forEach((match, groupIndex) => {
+      results.push(match);
+    });
+  }
+  const replacer = new RegExp(
+    `\`\`\`${config.language}([\\s\\S]*?)\`\`\``,
+    'g',
+  );
+  results = results.map(v => v.replace(replacer, '$1').trim());
+  return {
+    es6: results[0],
+    es5: babel.transformSync(results[0], { presets: ['@babel/preset-env'] }).code.replace('"use strict";\n\n', ''),
+    example: results[1],
+  };
+};
+// Gets the textual content for a snippet file.
+const getTextualContent = str => {
+  const regex = /([\s\S]*?)```/g;
+  const results = [];
+  let m = null;
+  while ((m = regex.exec(str)) !== null) {
+    if (m.index === regex.lastIndex) regex.lastIndex += 1;
+
+    m.forEach((match, groupIndex) => {
+      results.push(match);
+    });
+  }
+  if (!results.length) return str.replace(/\r\n/g, '\n');
+  return results[1].replace(/\r\n/g, '\n');
+};
+
+// Synchronously read all snippets and sort them as necessary (case-insensitive)
+const readSnippets = snippetsPath => {
+  const snippetFilenames = getFilesInDir(snippetsPath, false);
+
+  let snippets = {};
+  try {
+    for (let snippet of snippetFilenames) {
+      let data = frontmatter(
+        fs.readFileSync(path.join(snippetsPath, snippet), 'utf8'),
+      );
+      snippets[snippet] = {
+        id: snippet.slice(0, -3),
+        title: data.attributes.title,
+        type: 'snippet',
+        attributes: {
+          fileName: snippet,
+          text: getTextualContent(data.body),
+          codeBlocks: getCodeBlocks(data.body),
+          tags: data.attributes.tags.split(',').map(t => t.trim()),
+        },
+        meta: {
+          hash: hashData(data.body),
+        },
+      };
+    }
+  } catch (err) {
+    console.log(`${red('ERROR!')} During snippet loading: ${err}`);
+    process.exit(1);
+  }
+  return snippets;
+};
+
+module.exports = {
+  getFilesInDir,
+  hashData,
+  getCodeBlocks,
+  getTextualContent,
+  readSnippets,
+};