WIP - add extractor, generate snippet_data
This commit is contained in:
65
node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
generated
vendored
Normal file
65
node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
generated
vendored
Normal file
@ -0,0 +1,65 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(breakImplicitSentences)
|
||||
|
||||
// Two or more new line characters.
|
||||
var multiNewLine = expressions.newLineMulti
|
||||
|
||||
// Break a sentence if a white space with more than one new-line is found.
|
||||
function breakImplicitSentences(child, index, parent) {
|
||||
var children
|
||||
var position
|
||||
var length
|
||||
var tail
|
||||
var head
|
||||
var end
|
||||
var insertion
|
||||
var node
|
||||
|
||||
if (child.type !== 'SentenceNode') {
|
||||
return
|
||||
}
|
||||
|
||||
children = child.children
|
||||
|
||||
// Ignore first and last child.
|
||||
length = children.length - 1
|
||||
position = 0
|
||||
|
||||
while (++position < length) {
|
||||
node = children[position]
|
||||
|
||||
if (node.type !== 'WhiteSpaceNode' || !multiNewLine.test(toString(node))) {
|
||||
continue
|
||||
}
|
||||
|
||||
child.children = children.slice(0, position)
|
||||
|
||||
insertion = {
|
||||
type: 'SentenceNode',
|
||||
children: children.slice(position + 1)
|
||||
}
|
||||
|
||||
tail = children[position - 1]
|
||||
head = children[position + 1]
|
||||
|
||||
parent.children.splice(index + 1, 0, node, insertion)
|
||||
|
||||
if (child.position && tail.position && head.position) {
|
||||
end = child.position.end
|
||||
|
||||
child.position.end = tail.position.end
|
||||
|
||||
insertion.position = {
|
||||
start: head.position.start,
|
||||
end: end
|
||||
}
|
||||
}
|
||||
|
||||
return index + 1
|
||||
}
|
||||
}
|
||||
28
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
generated
vendored
Normal file
28
node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
generated
vendored
Normal file
@ -0,0 +1,28 @@
|
||||
'use strict'
|
||||
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(makeFinalWhiteSpaceSiblings)
|
||||
|
||||
// Move white space ending a paragraph up, so they are the siblings of
|
||||
// paragraphs.
|
||||
function makeFinalWhiteSpaceSiblings(child, index, parent) {
|
||||
var children = child.children
|
||||
var prev
|
||||
|
||||
if (
|
||||
children &&
|
||||
children.length !== 0 &&
|
||||
children[children.length - 1].type === 'WhiteSpaceNode'
|
||||
) {
|
||||
parent.children.splice(index + 1, 0, child.children.pop())
|
||||
prev = children[children.length - 1]
|
||||
|
||||
if (prev && prev.position && child.position) {
|
||||
child.position.end = prev.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the current node again.
|
||||
return index
|
||||
}
|
||||
}
|
||||
25
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
generated
vendored
Normal file
25
node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
generated
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
'use strict'
|
||||
|
||||
var visitChildren = require('unist-util-visit-children')
|
||||
|
||||
module.exports = visitChildren(makeInitialWhiteSpaceSiblings)
|
||||
|
||||
// Move white space starting a sentence up, so they are the siblings of
|
||||
// sentences.
|
||||
function makeInitialWhiteSpaceSiblings(child, index, parent) {
|
||||
var children = child.children
|
||||
var next
|
||||
|
||||
if (
|
||||
children &&
|
||||
children.length !== 0 &&
|
||||
children[0].type === 'WhiteSpaceNode'
|
||||
) {
|
||||
parent.children.splice(index, 0, children.shift())
|
||||
next = children[0]
|
||||
|
||||
if (next && next.position && child.position) {
|
||||
child.position.start = next.position.start
|
||||
}
|
||||
}
|
||||
}
|
||||
52
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
generated
vendored
Normal file
52
node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeAffixExceptions)
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// comma.
|
||||
function mergeAffixExceptions(child, index, parent) {
|
||||
var children = child.children
|
||||
var node
|
||||
var position
|
||||
var value
|
||||
var previousChild
|
||||
|
||||
if (!children || children.length === 0 || index === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
position = -1
|
||||
|
||||
while (children[++position]) {
|
||||
node = children[position]
|
||||
|
||||
if (node.type === 'WordNode') {
|
||||
return
|
||||
}
|
||||
|
||||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
|
||||
value = toString(node)
|
||||
|
||||
if (value !== ',' && value !== ';') {
|
||||
return
|
||||
}
|
||||
|
||||
previousChild = parent.children[index - 1]
|
||||
|
||||
previousChild.children = previousChild.children.concat(children)
|
||||
|
||||
// Update position.
|
||||
if (previousChild.position && child.position) {
|
||||
previousChild.position.end = child.position.end
|
||||
}
|
||||
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
46
node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
generated
vendored
Normal file
46
node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(mergeAffixSymbol)
|
||||
|
||||
// Closing or final punctuation, or terminal markers that should still be
|
||||
// included in the previous sentence, even though they follow the sentence’s
|
||||
// terminal marker.
|
||||
var affixSymbol = expressions.affixSymbol
|
||||
|
||||
// Move certain punctuation following a terminal marker (thus in the next
|
||||
// sentence) to the previous sentence.
|
||||
function mergeAffixSymbol(child, index, parent) {
|
||||
var children = child.children
|
||||
var first
|
||||
var second
|
||||
var prev
|
||||
|
||||
if (children && children.length !== 0 && index !== 0) {
|
||||
first = children[0]
|
||||
second = children[1]
|
||||
prev = parent.children[index - 1]
|
||||
|
||||
if (
|
||||
(first.type === 'SymbolNode' || first.type === 'PunctuationNode') &&
|
||||
affixSymbol.test(toString(first))
|
||||
) {
|
||||
prev.children.push(children.shift())
|
||||
|
||||
// Update position.
|
||||
if (first.position && prev.position) {
|
||||
prev.position.end = first.position.end
|
||||
}
|
||||
|
||||
if (second && second.position && child.position) {
|
||||
child.position.start = second.position.start
|
||||
}
|
||||
|
||||
// Next, iterate over the previous node again.
|
||||
return index - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
44
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
generated
vendored
Normal file
44
node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
generated
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeFinalWordSymbol)
|
||||
|
||||
// Merge certain punctuation marks into their preceding words.
|
||||
function mergeFinalWordSymbol(child, index, parent) {
|
||||
var children
|
||||
var prev
|
||||
var next
|
||||
|
||||
if (
|
||||
index !== 0 &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
|
||||
toString(child) === '-'
|
||||
) {
|
||||
children = parent.children
|
||||
|
||||
prev = children[index - 1]
|
||||
next = children[index + 1]
|
||||
|
||||
if (
|
||||
(!next || next.type !== 'WordNode') &&
|
||||
(prev && prev.type === 'WordNode')
|
||||
) {
|
||||
// Remove `child` from parent.
|
||||
children.splice(index, 1)
|
||||
|
||||
// Add the punctuation mark at the end of the previous node.
|
||||
prev.children.push(child)
|
||||
|
||||
// Update position.
|
||||
if (prev.position && child.position) {
|
||||
prev.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was
|
||||
// the next node).
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
32
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
generated
vendored
Normal file
32
node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
generated
vendored
Normal file
@ -0,0 +1,32 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(mergeInitialDigitSentences)
|
||||
|
||||
// Initial lowercase letter.
|
||||
var digit = expressions.digitStart
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// lower case letter.
|
||||
function mergeInitialDigitSentences(child, index, parent) {
|
||||
var children = child.children
|
||||
var siblings = parent.children
|
||||
var prev = siblings[index - 1]
|
||||
var head = children[0]
|
||||
|
||||
if (prev && head && head.type === 'WordNode' && digit.test(toString(head))) {
|
||||
prev.children = prev.children.concat(children)
|
||||
siblings.splice(index, 1)
|
||||
|
||||
// Update position.
|
||||
if (prev.position && child.position) {
|
||||
prev.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
54
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
generated
vendored
Normal file
54
node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
generated
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(mergeInitialLowerCaseLetterSentences)
|
||||
|
||||
// Initial lowercase letter.
|
||||
var lowerInitial = expressions.lowerInitial
|
||||
|
||||
// Merge a sentence into its previous sentence, when the sentence starts with a
|
||||
// lower case letter.
|
||||
function mergeInitialLowerCaseLetterSentences(child, index, parent) {
|
||||
var children = child.children
|
||||
var position
|
||||
var node
|
||||
var siblings
|
||||
var prev
|
||||
|
||||
if (children && children.length !== 0 && index !== 0) {
|
||||
position = -1
|
||||
|
||||
while (children[++position]) {
|
||||
node = children[position]
|
||||
|
||||
if (node.type === 'WordNode') {
|
||||
if (!lowerInitial.test(toString(node))) {
|
||||
return
|
||||
}
|
||||
|
||||
siblings = parent.children
|
||||
|
||||
prev = siblings[index - 1]
|
||||
|
||||
prev.children = prev.children.concat(children)
|
||||
|
||||
siblings.splice(index, 1)
|
||||
|
||||
// Update position.
|
||||
if (prev.position && child.position) {
|
||||
prev.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
|
||||
if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
46
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
generated
vendored
Normal file
46
node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
generated
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeInitialWordSymbol)
|
||||
|
||||
// Merge certain punctuation marks into their following words.
|
||||
function mergeInitialWordSymbol(child, index, parent) {
|
||||
var children
|
||||
var next
|
||||
|
||||
if (
|
||||
(child.type !== 'SymbolNode' && child.type !== 'PunctuationNode') ||
|
||||
toString(child) !== '&'
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
children = parent.children
|
||||
|
||||
next = children[index + 1]
|
||||
|
||||
// If either a previous word, or no following word, exists, exit early.
|
||||
if (
|
||||
(index !== 0 && children[index - 1].type === 'WordNode') ||
|
||||
!(next && next.type === 'WordNode')
|
||||
) {
|
||||
return
|
||||
}
|
||||
|
||||
// Remove `child` from parent.
|
||||
children.splice(index, 1)
|
||||
|
||||
// Add the punctuation mark at the start of the next node.
|
||||
next.children.unshift(child)
|
||||
|
||||
// Update position.
|
||||
if (next.position && child.position) {
|
||||
next.position.start = child.position.start
|
||||
}
|
||||
|
||||
// Next, iterate over the node at the previous position, as it's now adjacent
|
||||
// to a following word.
|
||||
return index - 1
|
||||
}
|
||||
75
node_modules/parse-latin/lib/plugin/merge-initialisms.js
generated
vendored
Normal file
75
node_modules/parse-latin/lib/plugin/merge-initialisms.js
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(mergeInitialisms)
|
||||
|
||||
var numerical = expressions.numerical
|
||||
|
||||
// Merge initialisms.
|
||||
function mergeInitialisms(child, index, parent) {
|
||||
var siblings
|
||||
var prev
|
||||
var children
|
||||
var length
|
||||
var position
|
||||
var otherChild
|
||||
var isAllDigits
|
||||
var value
|
||||
|
||||
if (index !== 0 && toString(child) === '.') {
|
||||
siblings = parent.children
|
||||
|
||||
prev = siblings[index - 1]
|
||||
children = prev.children
|
||||
|
||||
length = children && children.length
|
||||
|
||||
if (prev.type === 'WordNode' && length !== 1 && length % 2 !== 0) {
|
||||
position = length
|
||||
|
||||
isAllDigits = true
|
||||
|
||||
while (children[--position]) {
|
||||
otherChild = children[position]
|
||||
|
||||
value = toString(otherChild)
|
||||
|
||||
if (position % 2 === 0) {
|
||||
// Initialisms consist of one character values.
|
||||
if (value.length > 1) {
|
||||
return
|
||||
}
|
||||
|
||||
if (!numerical.test(value)) {
|
||||
isAllDigits = false
|
||||
}
|
||||
} else if (value !== '.') {
|
||||
if (position < length - 2) {
|
||||
break
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAllDigits) {
|
||||
// Remove `child` from parent.
|
||||
siblings.splice(index, 1)
|
||||
|
||||
// Add child to the previous children.
|
||||
children.push(child)
|
||||
|
||||
// Update position.
|
||||
if (prev.position && child.position) {
|
||||
prev.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
57
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
generated
vendored
Normal file
57
node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
generated
vendored
Normal file
@ -0,0 +1,57 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeInnerWordSlash)
|
||||
|
||||
var slash = '/'
|
||||
|
||||
// Merge words joined by certain punctuation marks.
|
||||
function mergeInnerWordSlash(child, index, parent) {
|
||||
var siblings = parent.children
|
||||
var prev
|
||||
var next
|
||||
var prevValue
|
||||
var nextValue
|
||||
var queue
|
||||
var tail
|
||||
var count
|
||||
|
||||
prev = siblings[index - 1]
|
||||
next = siblings[index + 1]
|
||||
|
||||
if (
|
||||
prev &&
|
||||
prev.type === 'WordNode' &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
|
||||
toString(child) === slash
|
||||
) {
|
||||
prevValue = toString(prev)
|
||||
tail = child
|
||||
queue = [child]
|
||||
count = 1
|
||||
|
||||
if (next && next.type === 'WordNode') {
|
||||
nextValue = toString(next)
|
||||
tail = next
|
||||
queue = queue.concat(next.children)
|
||||
count++
|
||||
}
|
||||
|
||||
if (prevValue.length < 3 && (!nextValue || nextValue.length < 3)) {
|
||||
// Add all found tokens to `prev`s children.
|
||||
prev.children = prev.children.concat(queue)
|
||||
|
||||
siblings.splice(index, count)
|
||||
|
||||
// Update position.
|
||||
if (prev.position && tail.position) {
|
||||
prev.position.end = tail.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
82
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
generated
vendored
Normal file
82
node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
generated
vendored
Normal file
@ -0,0 +1,82 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = modifyChildren(mergeInnerWordSymbol)
|
||||
|
||||
// Symbols part of surrounding words.
|
||||
var wordSymbolInner = expressions.wordSymbolInner
|
||||
|
||||
// Merge words joined by certain punctuation marks.
|
||||
function mergeInnerWordSymbol(child, index, parent) {
|
||||
var siblings
|
||||
var sibling
|
||||
var prev
|
||||
var last
|
||||
var position
|
||||
var tokens
|
||||
var queue
|
||||
|
||||
if (
|
||||
index !== 0 &&
|
||||
(child.type === 'SymbolNode' || child.type === 'PunctuationNode')
|
||||
) {
|
||||
siblings = parent.children
|
||||
prev = siblings[index - 1]
|
||||
|
||||
if (prev && prev.type === 'WordNode') {
|
||||
position = index - 1
|
||||
|
||||
tokens = []
|
||||
queue = []
|
||||
|
||||
// - If a token which is neither word nor inner word symbol is found,
|
||||
// the loop is broken
|
||||
// - If an inner word symbol is found, it’s queued
|
||||
// - If a word is found, it’s queued (and the queue stored and emptied)
|
||||
while (siblings[++position]) {
|
||||
sibling = siblings[position]
|
||||
|
||||
if (sibling.type === 'WordNode') {
|
||||
tokens = tokens.concat(queue, sibling.children)
|
||||
|
||||
queue = []
|
||||
} else if (
|
||||
(sibling.type === 'SymbolNode' ||
|
||||
sibling.type === 'PunctuationNode') &&
|
||||
wordSymbolInner.test(toString(sibling))
|
||||
) {
|
||||
queue.push(sibling)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (tokens.length !== 0) {
|
||||
// If there is a queue, remove its length from `position`.
|
||||
if (queue.length !== 0) {
|
||||
position -= queue.length
|
||||
}
|
||||
|
||||
// Remove every (one or more) inner-word punctuation marks and children
|
||||
// of words.
|
||||
siblings.splice(index, position - index)
|
||||
|
||||
// Add all found tokens to `prev`s children.
|
||||
prev.children = prev.children.concat(tokens)
|
||||
|
||||
last = tokens[tokens.length - 1]
|
||||
|
||||
// Update position.
|
||||
if (prev.position && last.position) {
|
||||
prev.position.end = last.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
52
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
generated
vendored
Normal file
52
node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
generated
vendored
Normal file
@ -0,0 +1,52 @@
|
||||
'use strict'
|
||||
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeNonWordSentences)
|
||||
|
||||
// Merge a sentence into the following sentence, when the sentence does not
|
||||
// contain word tokens.
|
||||
function mergeNonWordSentences(child, index, parent) {
|
||||
var children = child.children
|
||||
var position = -1
|
||||
var prev
|
||||
var next
|
||||
|
||||
while (children[++position]) {
|
||||
if (children[position].type === 'WordNode') {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
prev = parent.children[index - 1]
|
||||
|
||||
if (prev) {
|
||||
prev.children = prev.children.concat(children)
|
||||
|
||||
// Remove the child.
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Patch position.
|
||||
if (prev.position && child.position) {
|
||||
prev.position.end = child.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was the
|
||||
// next node).
|
||||
return index
|
||||
}
|
||||
|
||||
next = parent.children[index + 1]
|
||||
|
||||
if (next) {
|
||||
next.children = children.concat(next.children)
|
||||
|
||||
// Patch position.
|
||||
if (next.position && child.position) {
|
||||
next.position.start = child.position.start
|
||||
}
|
||||
|
||||
// Remove the child.
|
||||
parent.children.splice(index, 1)
|
||||
}
|
||||
}
|
||||
75
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
generated
vendored
Normal file
75
node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
generated
vendored
Normal file
@ -0,0 +1,75 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergePrefixExceptions)
|
||||
|
||||
// Blacklist of full stop characters that should not be treated as terminal
|
||||
// sentence markers: A case-insensitive abbreviation.
|
||||
var abbreviationPrefix = new RegExp(
|
||||
'^(' +
|
||||
'[0-9]{1,3}|' +
|
||||
'[a-z]|' +
|
||||
// Common Latin Abbreviations:
|
||||
// Based on: <https://en.wikipedia.org/wiki/List_of_Latin_abbreviations>.
|
||||
// Where only the abbreviations written without joining full stops,
|
||||
// but with a final full stop, were extracted.
|
||||
//
|
||||
// circa, capitulus, confer, compare, centum weight, eadem, (et) alii,
|
||||
// et cetera, floruit, foliis, ibidem, idem, nemine && contradicente,
|
||||
// opere && citato, (per) cent, (per) procurationem, (pro) tempore,
|
||||
// sic erat scriptum, (et) sequentia, statim, videlicet. */
|
||||
'al|ca|cap|cca|cent|cf|cit|con|cp|cwt|ead|etc|ff|' +
|
||||
'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' +
|
||||
')$'
|
||||
)
|
||||
|
||||
// Merge a sentence into its next sentence, when the sentence ends with a
|
||||
// certain word.
|
||||
function mergePrefixExceptions(child, index, parent) {
|
||||
var children = child.children
|
||||
var period
|
||||
var node
|
||||
var next
|
||||
|
||||
if (children && children.length > 1) {
|
||||
period = children[children.length - 1]
|
||||
|
||||
if (period && toString(period) === '.') {
|
||||
node = children[children.length - 2]
|
||||
|
||||
if (
|
||||
node &&
|
||||
node.type === 'WordNode' &&
|
||||
abbreviationPrefix.test(toString(node).toLowerCase())
|
||||
) {
|
||||
// Merge period into abbreviation.
|
||||
node.children.push(period)
|
||||
children.pop()
|
||||
|
||||
// Update position.
|
||||
if (period.position && node.position) {
|
||||
node.position.end = period.position.end
|
||||
}
|
||||
|
||||
// Merge sentences.
|
||||
next = parent.children[index + 1]
|
||||
|
||||
if (next) {
|
||||
child.children = children.concat(next.children)
|
||||
|
||||
parent.children.splice(index + 1, 1)
|
||||
|
||||
// Update position.
|
||||
if (next.position && child.position) {
|
||||
child.position.end = next.position.end
|
||||
}
|
||||
|
||||
// Next, iterate over the current node again.
|
||||
return index - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
99
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
generated
vendored
Normal file
99
node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
generated
vendored
Normal file
@ -0,0 +1,99 @@
|
||||
'use strict'
|
||||
|
||||
var toString = require('nlcst-to-string')
|
||||
var visitChildren = require('unist-util-visit-children')
|
||||
var expressions = require('../expressions')
|
||||
|
||||
module.exports = visitChildren(mergeRemainingFullStops)
|
||||
|
||||
// Blacklist of full stop characters that should not be treated as terminal
|
||||
// sentence markers: A case-insensitive abbreviation.
|
||||
var terminalMarker = expressions.terminalMarker
|
||||
|
||||
// Merge non-terminal-marker full stops into the previous word (if available),
|
||||
// or the next word (if available).
|
||||
function mergeRemainingFullStops(child) {
|
||||
var children = child.children
|
||||
var position = children.length
|
||||
var hasFoundDelimiter = false
|
||||
var grandchild
|
||||
var prev
|
||||
var next
|
||||
var nextNext
|
||||
|
||||
while (children[--position]) {
|
||||
grandchild = children[position]
|
||||
|
||||
if (
|
||||
grandchild.type !== 'SymbolNode' &&
|
||||
grandchild.type !== 'PunctuationNode'
|
||||
) {
|
||||
// This is a sentence without terminal marker, so we 'fool' the code to
|
||||
// make it think we have found one.
|
||||
if (grandchild.type === 'WordNode') {
|
||||
hasFoundDelimiter = true
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Exit when this token is not a terminal marker.
|
||||
if (!terminalMarker.test(toString(grandchild))) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore the first terminal marker found (starting at the end), as it
|
||||
// should not be merged.
|
||||
if (!hasFoundDelimiter) {
|
||||
hasFoundDelimiter = true
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Only merge a single full stop.
|
||||
if (toString(grandchild) !== '.') {
|
||||
continue
|
||||
}
|
||||
|
||||
prev = children[position - 1]
|
||||
next = children[position + 1]
|
||||
|
||||
if (prev && prev.type === 'WordNode') {
|
||||
nextNext = children[position + 2]
|
||||
|
||||
// Continue when the full stop is followed by a space and another full
|
||||
// stop, such as: `{.} .`
|
||||
if (
|
||||
next &&
|
||||
nextNext &&
|
||||
next.type === 'WhiteSpaceNode' &&
|
||||
toString(nextNext) === '.'
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove `child` from parent.
|
||||
children.splice(position, 1)
|
||||
|
||||
// Add the punctuation mark at the end of the previous node.
|
||||
prev.children.push(grandchild)
|
||||
|
||||
// Update position.
|
||||
if (grandchild.position && prev.position) {
|
||||
prev.position.end = grandchild.position.end
|
||||
}
|
||||
|
||||
position--
|
||||
} else if (next && next.type === 'WordNode') {
|
||||
// Remove `child` from parent.
|
||||
children.splice(position, 1)
|
||||
|
||||
// Add the punctuation mark at the start of the next node.
|
||||
next.children.unshift(grandchild)
|
||||
|
||||
if (grandchild.position && next.position) {
|
||||
next.position.start = grandchild.position.start
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
33
node_modules/parse-latin/lib/plugin/merge-words.js
generated
vendored
Normal file
33
node_modules/parse-latin/lib/plugin/merge-words.js
generated
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
'use strict'
|
||||
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(mergeFinalWordSymbol)
|
||||
|
||||
// Merge multiple words. This merges the children of adjacent words, something
|
||||
// which should not occur naturally by parse-latin, but might happen when custom
|
||||
// tokens were passed in.
|
||||
function mergeFinalWordSymbol(child, index, parent) {
|
||||
var siblings = parent.children
|
||||
var next
|
||||
|
||||
if (child.type === 'WordNode') {
|
||||
next = siblings[index + 1]
|
||||
|
||||
if (next && next.type === 'WordNode') {
|
||||
// Remove `next` from parent.
|
||||
siblings.splice(index + 1, 1)
|
||||
|
||||
// Add the punctuation mark at the end of the previous node.
|
||||
child.children = child.children.concat(next.children)
|
||||
|
||||
// Update position.
|
||||
if (next.position && child.position) {
|
||||
child.position.end = next.position.end
|
||||
}
|
||||
|
||||
// Next, re-iterate the current node.
|
||||
return index
|
||||
}
|
||||
}
|
||||
}
|
||||
34
node_modules/parse-latin/lib/plugin/patch-position.js
generated
vendored
Normal file
34
node_modules/parse-latin/lib/plugin/patch-position.js
generated
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
'use strict'
|
||||
|
||||
var visitChildren = require('unist-util-visit-children')
|
||||
|
||||
module.exports = visitChildren(patchPosition)
|
||||
|
||||
// Patch the position on a parent node based on its first and last child.
|
||||
function patchPosition(child, index, node) {
|
||||
var siblings = node.children
|
||||
|
||||
if (!child.position) {
|
||||
return
|
||||
}
|
||||
|
||||
if (
|
||||
index === 0 &&
|
||||
(!node.position || /* istanbul ignore next */ !node.position.start)
|
||||
) {
|
||||
patch(node)
|
||||
node.position.start = child.position.start
|
||||
}
|
||||
|
||||
if (index === siblings.length - 1 && (!node.position || !node.position.end)) {
|
||||
patch(node)
|
||||
node.position.end = child.position.end
|
||||
}
|
||||
}
|
||||
|
||||
// Add a `position` object when it does not yet exist on `node`.
|
||||
function patch(node) {
|
||||
if (!node.position) {
|
||||
node.position = {}
|
||||
}
|
||||
}
|
||||
16
node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
generated
vendored
Normal file
16
node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
generated
vendored
Normal file
@ -0,0 +1,16 @@
|
||||
'use strict'
|
||||
|
||||
var modifyChildren = require('unist-util-modify-children')
|
||||
|
||||
module.exports = modifyChildren(removeEmptyNodes)
|
||||
|
||||
// Remove empty children.
|
||||
function removeEmptyNodes(child, index, parent) {
|
||||
if ('children' in child && child.children.length === 0) {
|
||||
parent.children.splice(index, 1)
|
||||
|
||||
// Next, iterate over the node *now* at the current position (which was the
|
||||
// next node).
|
||||
return index
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user