WIP - add extractor, generate snippet_data

2019-08-20 15:52:05 +02:00
parent 88084d3d30
commit cc8f1d8a7a
37396 changed files with 4588842 additions and 133 deletions
--- a/node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/break-implicit-sentences.js
@ -0,0 +1,65 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(breakImplicitSentences)
+
+// Two or more new line characters.
+var multiNewLine = expressions.newLineMulti
+
+// Break a sentence if a white space with more than one new-line is found.
+function breakImplicitSentences(child, index, parent) {
+  var children
+  var position
+  var length
+  var tail
+  var head
+  var end
+  var insertion
+  var node
+
+  if (child.type !== 'SentenceNode') {
+    return
+  }
+
+  children = child.children
+
+  // Ignore first and last child.
+  length = children.length - 1
+  position = 0
+
+  while (++position < length) {
+    node = children[position]
+
+    if (node.type !== 'WhiteSpaceNode' || !multiNewLine.test(toString(node))) {
+      continue
+    }
+
+    child.children = children.slice(0, position)
+
+    insertion = {
+      type: 'SentenceNode',
+      children: children.slice(position + 1)
+    }
+
+    tail = children[position - 1]
+    head = children[position + 1]
+
+    parent.children.splice(index + 1, 0, node, insertion)
+
+    if (child.position && tail.position && head.position) {
+      end = child.position.end
+
+      child.position.end = tail.position.end
+
+      insertion.position = {
+        start: head.position.start,
+        end: end
+      }
+    }
+
+    return index + 1
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
+++ b/node_modules/parse-latin/lib/plugin/make-final-white-space-siblings.js
@ -0,0 +1,28 @@
+'use strict'
+
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(makeFinalWhiteSpaceSiblings)
+
+// Move white space ending a paragraph up, so they are the siblings of
+// paragraphs.
+function makeFinalWhiteSpaceSiblings(child, index, parent) {
+  var children = child.children
+  var prev
+
+  if (
+    children &&
+    children.length !== 0 &&
+    children[children.length - 1].type === 'WhiteSpaceNode'
+  ) {
+    parent.children.splice(index + 1, 0, child.children.pop())
+    prev = children[children.length - 1]
+
+    if (prev && prev.position && child.position) {
+      child.position.end = prev.position.end
+    }
+
+    // Next, iterate over the current node again.
+    return index
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
+++ b/node_modules/parse-latin/lib/plugin/make-initial-white-space-siblings.js
@ -0,0 +1,25 @@
+'use strict'
+
+var visitChildren = require('unist-util-visit-children')
+
+module.exports = visitChildren(makeInitialWhiteSpaceSiblings)
+
+// Move white space starting a sentence up, so they are the siblings of
+// sentences.
+function makeInitialWhiteSpaceSiblings(child, index, parent) {
+  var children = child.children
+  var next
+
+  if (
+    children &&
+    children.length !== 0 &&
+    children[0].type === 'WhiteSpaceNode'
+  ) {
+    parent.children.splice(index, 0, children.shift())
+    next = children[0]
+
+    if (next && next.position && child.position) {
+      child.position.start = next.position.start
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
+++ b/node_modules/parse-latin/lib/plugin/merge-affix-exceptions.js
@ -0,0 +1,52 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeAffixExceptions)
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// comma.
+function mergeAffixExceptions(child, index, parent) {
+  var children = child.children
+  var node
+  var position
+  var value
+  var previousChild
+
+  if (!children || children.length === 0 || index === 0) {
+    return
+  }
+
+  position = -1
+
+  while (children[++position]) {
+    node = children[position]
+
+    if (node.type === 'WordNode') {
+      return
+    }
+
+    if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
+      value = toString(node)
+
+      if (value !== ',' && value !== ';') {
+        return
+      }
+
+      previousChild = parent.children[index - 1]
+
+      previousChild.children = previousChild.children.concat(children)
+
+      // Update position.
+      if (previousChild.position && child.position) {
+        previousChild.position.end = child.position.end
+      }
+
+      parent.children.splice(index, 1)
+
+      // Next, iterate over the node *now* at the current position.
+      return index
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-affix-symbol.js
@ -0,0 +1,46 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(mergeAffixSymbol)
+
+// Closing or final punctuation, or terminal markers that should still be
+// included in the previous sentence, even though they follow the sentence’s
+// terminal marker.
+var affixSymbol = expressions.affixSymbol
+
+// Move certain punctuation following a terminal marker (thus in the next
+// sentence) to the previous sentence.
+function mergeAffixSymbol(child, index, parent) {
+  var children = child.children
+  var first
+  var second
+  var prev
+
+  if (children && children.length !== 0 && index !== 0) {
+    first = children[0]
+    second = children[1]
+    prev = parent.children[index - 1]
+
+    if (
+      (first.type === 'SymbolNode' || first.type === 'PunctuationNode') &&
+      affixSymbol.test(toString(first))
+    ) {
+      prev.children.push(children.shift())
+
+      // Update position.
+      if (first.position && prev.position) {
+        prev.position.end = first.position.end
+      }
+
+      if (second && second.position && child.position) {
+        child.position.start = second.position.start
+      }
+
+      // Next, iterate over the previous node again.
+      return index - 1
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-final-word-symbol.js
@ -0,0 +1,44 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeFinalWordSymbol)
+
+// Merge certain punctuation marks into their preceding words.
+function mergeFinalWordSymbol(child, index, parent) {
+  var children
+  var prev
+  var next
+
+  if (
+    index !== 0 &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
+    toString(child) === '-'
+  ) {
+    children = parent.children
+
+    prev = children[index - 1]
+    next = children[index + 1]
+
+    if (
+      (!next || next.type !== 'WordNode') &&
+      (prev && prev.type === 'WordNode')
+    ) {
+      // Remove `child` from parent.
+      children.splice(index, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      prev.children.push(child)
+
+      // Update position.
+      if (prev.position && child.position) {
+        prev.position.end = child.position.end
+      }
+
+      // Next, iterate over the node *now* at the current position (which was
+      // the next node).
+      return index
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-digit-sentences.js
@ -0,0 +1,32 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(mergeInitialDigitSentences)
+
+// Initial lowercase letter.
+var digit = expressions.digitStart
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// lower case letter.
+function mergeInitialDigitSentences(child, index, parent) {
+  var children = child.children
+  var siblings = parent.children
+  var prev = siblings[index - 1]
+  var head = children[0]
+
+  if (prev && head && head.type === 'WordNode' && digit.test(toString(head))) {
+    prev.children = prev.children.concat(children)
+    siblings.splice(index, 1)
+
+    // Update position.
+    if (prev.position && child.position) {
+      prev.position.end = child.position.end
+    }
+
+    // Next, iterate over the node *now* at the current position.
+    return index
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-lower-case-letter-sentences.js
@ -0,0 +1,54 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(mergeInitialLowerCaseLetterSentences)
+
+// Initial lowercase letter.
+var lowerInitial = expressions.lowerInitial
+
+// Merge a sentence into its previous sentence, when the sentence starts with a
+// lower case letter.
+function mergeInitialLowerCaseLetterSentences(child, index, parent) {
+  var children = child.children
+  var position
+  var node
+  var siblings
+  var prev
+
+  if (children && children.length !== 0 && index !== 0) {
+    position = -1
+
+    while (children[++position]) {
+      node = children[position]
+
+      if (node.type === 'WordNode') {
+        if (!lowerInitial.test(toString(node))) {
+          return
+        }
+
+        siblings = parent.children
+
+        prev = siblings[index - 1]
+
+        prev.children = prev.children.concat(children)
+
+        siblings.splice(index, 1)
+
+        // Update position.
+        if (prev.position && child.position) {
+          prev.position.end = child.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+
+      if (node.type === 'SymbolNode' || node.type === 'PunctuationNode') {
+        return
+      }
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initial-word-symbol.js
@ -0,0 +1,46 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeInitialWordSymbol)
+
+// Merge certain punctuation marks into their following words.
+function mergeInitialWordSymbol(child, index, parent) {
+  var children
+  var next
+
+  if (
+    (child.type !== 'SymbolNode' && child.type !== 'PunctuationNode') ||
+    toString(child) !== '&'
+  ) {
+    return
+  }
+
+  children = parent.children
+
+  next = children[index + 1]
+
+  // If either a previous word, or no following word, exists, exit early.
+  if (
+    (index !== 0 && children[index - 1].type === 'WordNode') ||
+    !(next && next.type === 'WordNode')
+  ) {
+    return
+  }
+
+  // Remove `child` from parent.
+  children.splice(index, 1)
+
+  // Add the punctuation mark at the start of the next node.
+  next.children.unshift(child)
+
+  // Update position.
+  if (next.position && child.position) {
+    next.position.start = child.position.start
+  }
+
+  // Next, iterate over the node at the previous position, as it's now adjacent
+  // to a following word.
+  return index - 1
+}
--- a/node_modules/parse-latin/lib/plugin/merge-initialisms.js
+++ b/node_modules/parse-latin/lib/plugin/merge-initialisms.js
@ -0,0 +1,75 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(mergeInitialisms)
+
+var numerical = expressions.numerical
+
+// Merge initialisms.
+function mergeInitialisms(child, index, parent) {
+  var siblings
+  var prev
+  var children
+  var length
+  var position
+  var otherChild
+  var isAllDigits
+  var value
+
+  if (index !== 0 && toString(child) === '.') {
+    siblings = parent.children
+
+    prev = siblings[index - 1]
+    children = prev.children
+
+    length = children && children.length
+
+    if (prev.type === 'WordNode' && length !== 1 && length % 2 !== 0) {
+      position = length
+
+      isAllDigits = true
+
+      while (children[--position]) {
+        otherChild = children[position]
+
+        value = toString(otherChild)
+
+        if (position % 2 === 0) {
+          // Initialisms consist of one character values.
+          if (value.length > 1) {
+            return
+          }
+
+          if (!numerical.test(value)) {
+            isAllDigits = false
+          }
+        } else if (value !== '.') {
+          if (position < length - 2) {
+            break
+          } else {
+            return
+          }
+        }
+      }
+
+      if (!isAllDigits) {
+        // Remove `child` from parent.
+        siblings.splice(index, 1)
+
+        // Add child to the previous children.
+        children.push(child)
+
+        // Update position.
+        if (prev.position && child.position) {
+          prev.position.end = child.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
+++ b/node_modules/parse-latin/lib/plugin/merge-inner-word-slash.js
@ -0,0 +1,57 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeInnerWordSlash)
+
+var slash = '/'
+
+// Merge words joined by certain punctuation marks.
+function mergeInnerWordSlash(child, index, parent) {
+  var siblings = parent.children
+  var prev
+  var next
+  var prevValue
+  var nextValue
+  var queue
+  var tail
+  var count
+
+  prev = siblings[index - 1]
+  next = siblings[index + 1]
+
+  if (
+    prev &&
+    prev.type === 'WordNode' &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode') &&
+    toString(child) === slash
+  ) {
+    prevValue = toString(prev)
+    tail = child
+    queue = [child]
+    count = 1
+
+    if (next && next.type === 'WordNode') {
+      nextValue = toString(next)
+      tail = next
+      queue = queue.concat(next.children)
+      count++
+    }
+
+    if (prevValue.length < 3 && (!nextValue || nextValue.length < 3)) {
+      // Add all found tokens to `prev`s children.
+      prev.children = prev.children.concat(queue)
+
+      siblings.splice(index, count)
+
+      // Update position.
+      if (prev.position && tail.position) {
+        prev.position.end = tail.position.end
+      }
+
+      // Next, iterate over the node *now* at the current position.
+      return index
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
+++ b/node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
@ -0,0 +1,82 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+var expressions = require('../expressions')
+
+module.exports = modifyChildren(mergeInnerWordSymbol)
+
+// Symbols part of surrounding words.
+var wordSymbolInner = expressions.wordSymbolInner
+
+// Merge words joined by certain punctuation marks.
+function mergeInnerWordSymbol(child, index, parent) {
+  var siblings
+  var sibling
+  var prev
+  var last
+  var position
+  var tokens
+  var queue
+
+  if (
+    index !== 0 &&
+    (child.type === 'SymbolNode' || child.type === 'PunctuationNode')
+  ) {
+    siblings = parent.children
+    prev = siblings[index - 1]
+
+    if (prev && prev.type === 'WordNode') {
+      position = index - 1
+
+      tokens = []
+      queue = []
+
+      // -   If a token which is neither word nor inner word symbol is found,
+      //     the loop is broken
+      // -   If an inner word symbol is found,  it’s queued
+      // -   If a word is found, it’s queued (and the queue stored and emptied)
+      while (siblings[++position]) {
+        sibling = siblings[position]
+
+        if (sibling.type === 'WordNode') {
+          tokens = tokens.concat(queue, sibling.children)
+
+          queue = []
+        } else if (
+          (sibling.type === 'SymbolNode' ||
+            sibling.type === 'PunctuationNode') &&
+          wordSymbolInner.test(toString(sibling))
+        ) {
+          queue.push(sibling)
+        } else {
+          break
+        }
+      }
+
+      if (tokens.length !== 0) {
+        // If there is a queue, remove its length from `position`.
+        if (queue.length !== 0) {
+          position -= queue.length
+        }
+
+        // Remove every (one or more) inner-word punctuation marks and children
+        // of words.
+        siblings.splice(index, position - index)
+
+        // Add all found tokens to `prev`s children.
+        prev.children = prev.children.concat(tokens)
+
+        last = tokens[tokens.length - 1]
+
+        // Update position.
+        if (prev.position && last.position) {
+          prev.position.end = last.position.end
+        }
+
+        // Next, iterate over the node *now* at the current position.
+        return index
+      }
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
+++ b/node_modules/parse-latin/lib/plugin/merge-non-word-sentences.js
@ -0,0 +1,52 @@
+'use strict'
+
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeNonWordSentences)
+
+// Merge a sentence into the following sentence, when the sentence does not
+// contain word tokens.
+function mergeNonWordSentences(child, index, parent) {
+  var children = child.children
+  var position = -1
+  var prev
+  var next
+
+  while (children[++position]) {
+    if (children[position].type === 'WordNode') {
+      return
+    }
+  }
+
+  prev = parent.children[index - 1]
+
+  if (prev) {
+    prev.children = prev.children.concat(children)
+
+    // Remove the child.
+    parent.children.splice(index, 1)
+
+    // Patch position.
+    if (prev.position && child.position) {
+      prev.position.end = child.position.end
+    }
+
+    // Next, iterate over the node *now* at the current position (which was the
+    // next node).
+    return index
+  }
+
+  next = parent.children[index + 1]
+
+  if (next) {
+    next.children = children.concat(next.children)
+
+    // Patch position.
+    if (next.position && child.position) {
+      next.position.start = child.position.start
+    }
+
+    // Remove the child.
+    parent.children.splice(index, 1)
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
+++ b/node_modules/parse-latin/lib/plugin/merge-prefix-exceptions.js
@ -0,0 +1,75 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergePrefixExceptions)
+
+// Blacklist of full stop characters that should not be treated as terminal
+// sentence markers: A case-insensitive abbreviation.
+var abbreviationPrefix = new RegExp(
+  '^(' +
+    '[0-9]{1,3}|' +
+    '[a-z]|' +
+    // Common Latin Abbreviations:
+    // Based on: <https://en.wikipedia.org/wiki/List_of_Latin_abbreviations>.
+    // Where only the abbreviations written without joining full stops,
+    // but with a final full stop, were extracted.
+    //
+    // circa, capitulus, confer, compare, centum weight, eadem, (et) alii,
+    // et cetera, floruit, foliis, ibidem, idem, nemine && contradicente,
+    // opere && citato, (per) cent, (per) procurationem, (pro) tempore,
+    // sic erat scriptum, (et) sequentia, statim, videlicet. */
+    'al|ca|cap|cca|cent|cf|cit|con|cp|cwt|ead|etc|ff|' +
+    'fl|ibid|id|nem|op|pro|seq|sic|stat|tem|viz' +
+    ')$'
+)
+
+// Merge a sentence into its next sentence, when the sentence ends with a
+// certain word.
+function mergePrefixExceptions(child, index, parent) {
+  var children = child.children
+  var period
+  var node
+  var next
+
+  if (children && children.length > 1) {
+    period = children[children.length - 1]
+
+    if (period && toString(period) === '.') {
+      node = children[children.length - 2]
+
+      if (
+        node &&
+        node.type === 'WordNode' &&
+        abbreviationPrefix.test(toString(node).toLowerCase())
+      ) {
+        // Merge period into abbreviation.
+        node.children.push(period)
+        children.pop()
+
+        // Update position.
+        if (period.position && node.position) {
+          node.position.end = period.position.end
+        }
+
+        // Merge sentences.
+        next = parent.children[index + 1]
+
+        if (next) {
+          child.children = children.concat(next.children)
+
+          parent.children.splice(index + 1, 1)
+
+          // Update position.
+          if (next.position && child.position) {
+            child.position.end = next.position.end
+          }
+
+          // Next, iterate over the current node again.
+          return index - 1
+        }
+      }
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
+++ b/node_modules/parse-latin/lib/plugin/merge-remaining-full-stops.js
@ -0,0 +1,99 @@
+'use strict'
+
+var toString = require('nlcst-to-string')
+var visitChildren = require('unist-util-visit-children')
+var expressions = require('../expressions')
+
+module.exports = visitChildren(mergeRemainingFullStops)
+
+// Blacklist of full stop characters that should not be treated as terminal
+// sentence markers: A case-insensitive abbreviation.
+var terminalMarker = expressions.terminalMarker
+
+// Merge non-terminal-marker full stops into the previous word (if available),
+// or the next word (if available).
+function mergeRemainingFullStops(child) {
+  var children = child.children
+  var position = children.length
+  var hasFoundDelimiter = false
+  var grandchild
+  var prev
+  var next
+  var nextNext
+
+  while (children[--position]) {
+    grandchild = children[position]
+
+    if (
+      grandchild.type !== 'SymbolNode' &&
+      grandchild.type !== 'PunctuationNode'
+    ) {
+      // This is a sentence without terminal marker, so we 'fool' the code to
+      // make it think we have found one.
+      if (grandchild.type === 'WordNode') {
+        hasFoundDelimiter = true
+      }
+
+      continue
+    }
+
+    // Exit when this token is not a terminal marker.
+    if (!terminalMarker.test(toString(grandchild))) {
+      continue
+    }
+
+    // Ignore the first terminal marker found (starting at the end), as it
+    // should not be merged.
+    if (!hasFoundDelimiter) {
+      hasFoundDelimiter = true
+
+      continue
+    }
+
+    // Only merge a single full stop.
+    if (toString(grandchild) !== '.') {
+      continue
+    }
+
+    prev = children[position - 1]
+    next = children[position + 1]
+
+    if (prev && prev.type === 'WordNode') {
+      nextNext = children[position + 2]
+
+      // Continue when the full stop is followed by a space and another full
+      // stop, such as: `{.} .`
+      if (
+        next &&
+        nextNext &&
+        next.type === 'WhiteSpaceNode' &&
+        toString(nextNext) === '.'
+      ) {
+        continue
+      }
+
+      // Remove `child` from parent.
+      children.splice(position, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      prev.children.push(grandchild)
+
+      // Update position.
+      if (grandchild.position && prev.position) {
+        prev.position.end = grandchild.position.end
+      }
+
+      position--
+    } else if (next && next.type === 'WordNode') {
+      // Remove `child` from parent.
+      children.splice(position, 1)
+
+      // Add the punctuation mark at the start of the next node.
+      next.children.unshift(grandchild)
+
+      if (grandchild.position && next.position) {
+        next.position.start = grandchild.position.start
+      }
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/merge-words.js
+++ b/node_modules/parse-latin/lib/plugin/merge-words.js
@ -0,0 +1,33 @@
+'use strict'
+
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(mergeFinalWordSymbol)
+
+// Merge multiple words. This merges the children of adjacent words, something
+// which should not occur naturally by parse-latin, but might happen when custom
+// tokens were passed in.
+function mergeFinalWordSymbol(child, index, parent) {
+  var siblings = parent.children
+  var next
+
+  if (child.type === 'WordNode') {
+    next = siblings[index + 1]
+
+    if (next && next.type === 'WordNode') {
+      // Remove `next` from parent.
+      siblings.splice(index + 1, 1)
+
+      // Add the punctuation mark at the end of the previous node.
+      child.children = child.children.concat(next.children)
+
+      // Update position.
+      if (next.position && child.position) {
+        child.position.end = next.position.end
+      }
+
+      // Next, re-iterate the current node.
+      return index
+    }
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/patch-position.js
+++ b/node_modules/parse-latin/lib/plugin/patch-position.js
@ -0,0 +1,34 @@
+'use strict'
+
+var visitChildren = require('unist-util-visit-children')
+
+module.exports = visitChildren(patchPosition)
+
+// Patch the position on a parent node based on its first and last child.
+function patchPosition(child, index, node) {
+  var siblings = node.children
+
+  if (!child.position) {
+    return
+  }
+
+  if (
+    index === 0 &&
+    (!node.position || /* istanbul ignore next */ !node.position.start)
+  ) {
+    patch(node)
+    node.position.start = child.position.start
+  }
+
+  if (index === siblings.length - 1 && (!node.position || !node.position.end)) {
+    patch(node)
+    node.position.end = child.position.end
+  }
+}
+
+// Add a `position` object when it does not yet exist on `node`.
+function patch(node) {
+  if (!node.position) {
+    node.position = {}
+  }
+}
--- a/node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
+++ b/node_modules/parse-latin/lib/plugin/remove-empty-nodes.js
@ -0,0 +1,16 @@
+'use strict'
+
+var modifyChildren = require('unist-util-modify-children')
+
+module.exports = modifyChildren(removeEmptyNodes)
+
+// Remove empty children.
+function removeEmptyNodes(child, index, parent) {
+  if ('children' in child && child.children.length === 0) {
+    parent.children.splice(index, 1)
+
+    // Next, iterate over the node *now* at the current position (which was the
+    // next node).
+    return index
+  }
+}