WIP - add extractor, generate snippet_data

This commit is contained in:
Stefan Fejes
2019-08-20 15:52:05 +02:00
parent 88084d3d30
commit cc8f1d8a7a
37396 changed files with 4588842 additions and 133 deletions

30
node_modules/parse-entities/decode-entity.browser.js generated vendored Normal file
View File

@ -0,0 +1,30 @@
'use strict'
/* eslint-env browser */
var el
var semicolon = 59 // ';'
module.exports = decodeEntity
function decodeEntity(characters) {
var entity = '&' + characters + ';'
var char
el = el || document.createElement('i')
el.innerHTML = entity
char = el.textContent
// Some entities do not require the closing semicolon (`&not` - for instance),
// which leads to situations where parsing the assumed entity of &notit; will
// result in the string `¬it;`. When we encounter a trailing semicolon after
// parsing and the entity to decode was not a semicolon (`;`), we can
// assume that the matching was incomplete
if (char.charCodeAt(char.length - 1) === semicolon && characters !== 'semi') {
return false
}
// If the decoded string is equal to the input, the entity was not valid
return char === entity ? false : char
}

13
node_modules/parse-entities/decode-entity.js generated vendored Normal file
View File

@ -0,0 +1,13 @@
'use strict'
var characterEntities = require('character-entities')
module.exports = decodeEntity
var own = {}.hasOwnProperty
function decodeEntity(characters) {
return own.call(characterEntities, characters)
? characterEntities[characters]
: false
}

450
node_modules/parse-entities/index.js generated vendored Normal file
View File

@ -0,0 +1,450 @@
'use strict'
var legacy = require('character-entities-legacy')
var invalid = require('character-reference-invalid')
var decimal = require('is-decimal')
var hexadecimal = require('is-hexadecimal')
var alphanumerical = require('is-alphanumerical')
var decodeEntity = require('./decode-entity')
module.exports = parseEntities
var own = {}.hasOwnProperty
var fromCharCode = String.fromCharCode
var noop = Function.prototype
// Default settings.
var defaults = {
warning: null,
reference: null,
text: null,
warningContext: null,
referenceContext: null,
textContext: null,
position: {},
additional: null,
attribute: false,
nonTerminated: true
}
// Characters.
var tab = 9 // '\t'
var lineFeed = 10 // '\n'
var formFeed = 12 // '\f'
var space = 32 // ' '
var ampersand = 38 // '&'
var semicolon = 59 // ';'
var lessThan = 60 // '<'
var equalsTo = 61 // '='
var numberSign = 35 // '#'
var uppercaseX = 88 // 'X'
var lowercaseX = 120 // 'x'
var replacementCharacter = 65533 // '<27>'
// Reference types.
var name = 'named'
var hexa = 'hexadecimal'
var deci = 'decimal'
// Map of bases.
var bases = {}
bases[hexa] = 16
bases[deci] = 10
// Map of types to tests.
// Each type of character reference accepts different characters.
// This test is used to detect whether a reference has ended (as the semicolon
// is not strictly needed).
var tests = {}
tests[name] = alphanumerical
tests[deci] = decimal
tests[hexa] = hexadecimal
// Warning types.
var namedNotTerminated = 1
var numericNotTerminated = 2
var namedEmpty = 3
var numericEmpty = 4
var namedUnknown = 5
var numericDisallowed = 6
var numericProhibited = 7
// Warning messages.
var messages = {}
messages[namedNotTerminated] =
'Named character references must be terminated by a semicolon'
messages[numericNotTerminated] =
'Numeric character references must be terminated by a semicolon'
messages[namedEmpty] = 'Named character references cannot be empty'
messages[numericEmpty] = 'Numeric character references cannot be empty'
messages[namedUnknown] = 'Named character references must be known'
messages[numericDisallowed] =
'Numeric character references cannot be disallowed'
messages[numericProhibited] =
'Numeric character references cannot be outside the permissible Unicode range'
// Wrap to ensure clean parameters are given to `parse`.
function parseEntities(value, options) {
var settings = {}
var option
var key
if (!options) {
options = {}
}
for (key in defaults) {
option = options[key]
settings[key] =
option === null || option === undefined ? defaults[key] : option
}
if (settings.position.indent || settings.position.start) {
settings.indent = settings.position.indent || []
settings.position = settings.position.start
}
return parse(value, settings)
}
// Parse entities.
// eslint-disable-next-line complexity
function parse(value, settings) {
var additional = settings.additional
var nonTerminated = settings.nonTerminated
var handleText = settings.text
var handleReference = settings.reference
var handleWarning = settings.warning
var textContext = settings.textContext
var referenceContext = settings.referenceContext
var warningContext = settings.warningContext
var pos = settings.position
var indent = settings.indent || []
var length = value.length
var index = 0
var lines = -1
var column = pos.column || 1
var line = pos.line || 1
var queue = ''
var result = []
var entityCharacters
var namedEntity
var terminated
var characters
var character
var reference
var following
var warning
var reason
var output
var entity
var begin
var start
var type
var test
var prev
var next
var diff
var end
if (typeof additional === 'string') {
additional = additional.charCodeAt(0)
}
// Cache the current point.
prev = now()
// Wrap `handleWarning`.
warning = handleWarning ? parseError : noop
// Ensure the algorithm walks over the first character and the end (inclusive).
index--
length++
while (++index < length) {
// If the previous character was a newline.
if (character === lineFeed) {
column = indent[lines] || 1
}
character = value.charCodeAt(index)
if (character === ampersand) {
following = value.charCodeAt(index + 1)
// The behaviour depends on the identity of the next character.
if (
following === tab ||
following === lineFeed ||
following === formFeed ||
following === space ||
following === ampersand ||
following === lessThan ||
following !== following ||
(additional && following === additional)
) {
// Not a character reference.
// No characters are consumed, and nothing is returned.
// This is not an error, either.
queue += fromCharCode(character)
column++
continue
}
start = index + 1
begin = start
end = start
if (following === numberSign) {
// Numerical entity.
end = ++begin
// The behaviour further depends on the next character.
following = value.charCodeAt(end)
if (following === uppercaseX || following === lowercaseX) {
// ASCII hex digits.
type = hexa
end = ++begin
} else {
// ASCII digits.
type = deci
}
} else {
// Named entity.
type = name
}
entityCharacters = ''
entity = ''
characters = ''
test = tests[type]
end--
while (++end < length) {
following = value.charCodeAt(end)
if (!test(following)) {
break
}
characters += fromCharCode(following)
// Check if we can match a legacy named reference.
// If so, we cache that as the last viable named reference.
// This ensures we do not need to walk backwards later.
if (type === name && own.call(legacy, characters)) {
entityCharacters = characters
entity = legacy[characters]
}
}
terminated = value.charCodeAt(end) === semicolon
if (terminated) {
end++
namedEntity = type === name ? decodeEntity(characters) : false
if (namedEntity) {
entityCharacters = characters
entity = namedEntity
}
}
diff = 1 + end - start
if (!terminated && !nonTerminated) {
// Empty.
} else if (!characters) {
// An empty (possible) entity is valid, unless its numeric (thus an
// ampersand followed by an octothorp).
if (type !== name) {
warning(numericEmpty, diff)
}
} else if (type === name) {
// An ampersand followed by anything unknown, and not terminated, is
// invalid.
if (terminated && !entity) {
warning(namedUnknown, 1)
} else {
// If theres something after an entity name which is not known, cap
// the reference.
if (entityCharacters !== characters) {
end = begin + entityCharacters.length
diff = 1 + end - begin
terminated = false
}
// If the reference is not terminated, warn.
if (!terminated) {
reason = entityCharacters ? namedNotTerminated : namedEmpty
if (settings.attribute) {
following = value.charCodeAt(end)
if (following === equalsTo) {
warning(reason, diff)
entity = null
} else if (alphanumerical(following)) {
entity = null
} else {
warning(reason, diff)
}
} else {
warning(reason, diff)
}
}
}
reference = entity
} else {
if (!terminated) {
// All non-terminated numeric entities are not rendered, and trigger a
// warning.
warning(numericNotTerminated, diff)
}
// When terminated and number, parse as either hexadecimal or decimal.
reference = parseInt(characters, bases[type])
// Trigger a warning when the parsed number is prohibited, and replace
// with replacement character.
if (prohibited(reference)) {
warning(numericProhibited, diff)
reference = fromCharCode(replacementCharacter)
} else if (reference in invalid) {
// Trigger a warning when the parsed number is disallowed, and replace
// by an alternative.
warning(numericDisallowed, diff)
reference = invalid[reference]
} else {
// Parse the number.
output = ''
// Trigger a warning when the parsed number should not be used.
if (disallowed(reference)) {
warning(numericDisallowed, diff)
}
// Stringify the number.
if (reference > 0xffff) {
reference -= 0x10000
output += fromCharCode((reference >>> (10 & 0x3ff)) | 0xd800)
reference = 0xdc00 | (reference & 0x3ff)
}
reference = output + fromCharCode(reference)
}
}
// Found it!
// First eat the queued characters as normal text, then eat an entity.
if (reference) {
flush()
prev = now()
index = end - 1
column += end - start + 1
result.push(reference)
next = now()
next.offset++
if (handleReference) {
handleReference.call(
referenceContext,
reference,
{start: prev, end: next},
value.slice(start - 1, end)
)
}
prev = next
} else {
// If we could not find a reference, queue the checked characters (as
// normal characters), and move the pointer to their end.
// This is possible because we can be certain neither newlines nor
// ampersands are included.
characters = value.slice(start - 1, end)
queue += characters
column += characters.length
index = end - 1
}
} else {
// Handle anything other than an ampersand, including newlines and EOF.
if (
character === 10 // Line feed
) {
line++
lines++
column = 0
}
if (character === character) {
queue += fromCharCode(character)
column++
} else {
flush()
}
}
}
// Return the reduced nodes, and any possible warnings.
return result.join('')
// Get current position.
function now() {
return {
line: line,
column: column,
offset: index + (pos.offset || 0)
}
}
// “Throw” a parse-error: a warning.
function parseError(code, offset) {
var position = now()
position.column += offset
position.offset += offset
handleWarning.call(warningContext, messages[code], position, code)
}
// Flush `queue` (normal text).
// Macro invoked before each entity and at the end of `value`.
// Does nothing when `queue` is empty.
function flush() {
if (queue) {
result.push(queue)
if (handleText) {
handleText.call(textContext, queue, {start: prev, end: now()})
}
queue = ''
}
}
}
// Check if `character` is outside the permissible unicode range.
function prohibited(code) {
return (code >= 0xd800 && code <= 0xdfff) || code > 0x10ffff
}
// Check if `character` is disallowed.
function disallowed(code) {
return (
(code >= 0x0001 && code <= 0x0008) ||
code === 0x000b ||
(code >= 0x000d && code <= 0x001f) ||
(code >= 0x007f && code <= 0x009f) ||
(code >= 0xfdd0 && code <= 0xfdef) ||
(code & 0xffff) === 0xffff ||
(code & 0xffff) === 0xfffe
)
}

22
node_modules/parse-entities/license generated vendored Normal file
View File

@ -0,0 +1,22 @@
(The MIT License)
Copyright (c) 2015 Titus Wormer <mailto:tituswormer@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

132
node_modules/parse-entities/package.json generated vendored Normal file
View File

@ -0,0 +1,132 @@
{
"_from": "parse-entities@^1.1.0",
"_id": "parse-entities@1.2.2",
"_inBundle": false,
"_integrity": "sha512-NzfpbxW/NPrzZ/yYSoQxyqUZMZXIdCfE0OIN4ESsnptHJECoUk3FZktxNuzQf4tjt5UEopnxpYJbvYuxIFDdsg==",
"_location": "/parse-entities",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "parse-entities@^1.1.0",
"name": "parse-entities",
"escapedName": "parse-entities",
"rawSpec": "^1.1.0",
"saveSpec": null,
"fetchSpec": "^1.1.0"
},
"_requiredBy": [
"/remark-parse",
"/remark-stringify",
"/remark/remark-stringify"
],
"_resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-1.2.2.tgz",
"_shasum": "c31bf0f653b6661354f8973559cb86dd1d5edf50",
"_spec": "parse-entities@^1.1.0",
"_where": "/Users/stefanfejes/Projects/30-seconds-of-python-code/node_modules/remark-parse",
"author": {
"name": "Titus Wormer",
"email": "tituswormer@gmail.com",
"url": "https://wooorm.com"
},
"browser": {
"./decode-entity.js": "./decode-entity.browser.js"
},
"bugs": {
"url": "https://github.com/wooorm/parse-entities/issues"
},
"bundleDependencies": false,
"contributors": [
{
"name": "Titus Wormer",
"email": "tituswormer@gmail.com",
"url": "https://wooorm.com"
}
],
"dependencies": {
"character-entities": "^1.0.0",
"character-entities-legacy": "^1.0.0",
"character-reference-invalid": "^1.0.0",
"is-alphanumerical": "^1.0.0",
"is-decimal": "^1.0.0",
"is-hexadecimal": "^1.0.0"
},
"deprecated": false,
"description": "Parse HTML character references: fast, spec-compliant, positional information",
"devDependencies": {
"browserify": "^16.0.0",
"nyc": "^14.0.0",
"prettier": "^1.12.1",
"remark-cli": "^6.0.0",
"remark-preset-wooorm": "^4.0.0",
"tape": "^4.2.0",
"tape-run": "^6.0.0",
"tinyify": "^2.4.3",
"xo": "^0.24.0"
},
"files": [
"index.js",
"decode-entity.js",
"decode-entity.browser.js"
],
"homepage": "https://github.com/wooorm/parse-entities#readme",
"keywords": [
"parse",
"html",
"character",
"reference",
"entity",
"entities"
],
"license": "MIT",
"name": "parse-entities",
"nyc": {
"check-coverage": true,
"lines": 100,
"functions": 100,
"branches": 100
},
"prettier": {
"tabWidth": 2,
"useTabs": false,
"singleQuote": true,
"bracketSpacing": false,
"semi": false,
"trailingComma": "none"
},
"react-native": {
"./decode-entity.js": "./decode-entity.js"
},
"remarkConfig": {
"plugins": [
"preset-wooorm"
]
},
"repository": {
"type": "git",
"url": "git+https://github.com/wooorm/parse-entities.git"
},
"scripts": {
"build": "npm run build-bundle && npm run build-mangle",
"build-bundle": "browserify . -s parseEntities > parse-entities.js",
"build-mangle": "browserify . -s parseEntities -p tinyify > parse-entities.min.js",
"format": "remark . -qfo && prettier --write \"**/*.js\" && xo --fix",
"test": "npm run format && npm run build && npm run test-coverage && npm run test-browser",
"test-api": "node test",
"test-browser": "browserify test.js | tape-run",
"test-coverage": "nyc --reporter lcov tape test.js"
},
"version": "1.2.2",
"xo": {
"prettier": true,
"esnext": false,
"rules": {
"no-self-compare": "off",
"guard-for-in": "off",
"max-depth": "off"
},
"ignores": [
"parse-entities.js"
]
}
}

217
node_modules/parse-entities/readme.md generated vendored Normal file
View File

@ -0,0 +1,217 @@
# parse-entities
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][size-badge]][size]
Parse HTML character references: fast, spec-compliant, positional
information.
## Installation
[npm][]:
```bash
npm install parse-entities
```
## Usage
```js
var decode = require('parse-entities')
decode('alpha &amp bravo')
// => alpha & bravo
decode('charlie &copycat; delta')
// => charlie ©cat; delta
decode('echo &copy; foxtrot &#8800; golf &#x1D306; hotel')
// => echo © foxtrot ≠ golf 𝌆 hotel
```
## API
## `parseEntities(value[, options])`
##### `options`
###### `options.additional`
Additional character to accept (`string?`, default: `''`).
This allows other characters, without error, when following an ampersand.
###### `options.attribute`
Whether to parse `value` as an attribute value (`boolean?`, default:
`false`).
###### `options.nonTerminated`
Whether to allow non-terminated entities (`boolean`, default: `true`).
For example, `&copycat` for `©cat`. This behaviour is spec-compliant but
can lead to unexpected results.
###### `options.warning`
Error handler ([`Function?`][warning]).
###### `options.text`
Text handler ([`Function?`][text]).
###### `options.reference`
Reference handler ([`Function?`][reference]).
###### `options.warningContext`
Context used when invoking `warning` (`'*'`, optional).
###### `options.textContext`
Context used when invoking `text` (`'*'`, optional).
###### `options.referenceContext`
Context used when invoking `reference` (`'*'`, optional)
###### `options.position`
Starting `position` of `value` (`Location` or `Position`, optional). Useful
when dealing with values nested in some sort of syntax tree. The default is:
```js
{
start: {line: 1, column: 1, offset: 0},
indent: []
}
```
##### Returns
`string` — Decoded `value`.
### `function warning(reason, position, code)`
Error handler.
##### Context
`this` refers to `warningContext` when given to `parseEntities`.
##### Parameters
###### `reason`
Human-readable reason for triggering a parse error (`string`).
###### `position`
Place at which the parse error occurred (`Position`).
###### `code`
Identifier of reason for triggering a parse error (`number`).
The following codes are used:
| Code | Example | Note |
| ---- | ------------------ | --------------------------------------------- |
| `1` | `foo &amp bar` | Missing semicolon (named) |
| `2` | `foo &#123 bar` | Missing semicolon (numeric) |
| `3` | `Foo &bar baz` | Ampersand did not start a reference |
| `4` | `Foo &#` | Empty reference |
| `5` | `Foo &bar; baz` | Unknown entity |
| `6` | `Foo &#128; baz` | [Disallowed reference][invalid] |
| `7` | `Foo &#xD800; baz` | Prohibited: outside permissible unicode range |
### `function text(value, location)`
Text handler.
##### Context
`this` refers to `textContext` when given to `parseEntities`.
##### Parameters
###### `value`
String of content (`string`).
###### `location`
Location at which `value` starts and ends (`Location`).
### `function reference(value, location, source)`
Character reference handler.
##### Context
`this` refers to `referenceContext` when given to `parseEntities`.
##### Parameters
###### `value`
Encoded character reference (`string`).
###### `location`
Location at which `value` starts and ends (`Location`).
###### `source`
Source of character reference (`Location`).
## Related
* [`stringify-entities`](https://github.com/wooorm/stringify-entities)
— Encode HTML character references
* [`character-entities`](https://github.com/wooorm/character-entities)
— Info on character entities
* [`character-entities-html4`](https://github.com/wooorm/character-entities-html4)
— Info on HTML4 character entities
* [`character-entities-legacy`](https://github.com/wooorm/character-entities-legacy)
— Info on legacy character entities
* [`character-reference-invalid`](https://github.com/wooorm/character-reference-invalid)
— Info on invalid numeric character references
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://img.shields.io/travis/wooorm/parse-entities.svg
[build]: https://travis-ci.org/wooorm/parse-entities
[coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/parse-entities.svg
[coverage]: https://codecov.io/github/wooorm/parse-entities
[downloads-badge]: https://img.shields.io/npm/dm/parse-entities.svg
[downloads]: https://www.npmjs.com/package/parse-entities
[size-badge]: https://img.shields.io/bundlephobia/minzip/parse-entities.svg
[size]: https://bundlephobia.com/result?p=parse-entities
[npm]: https://docs.npmjs.com/cli/install
[license]: license
[author]: https://wooorm.com
[warning]: #function-warningreason-position-code
[text]: #function-textvalue-location
[reference]: #function-referencevalue-location-source
[invalid]: https://github.com/wooorm/character-reference-invalid