WIP - add extractor, generate snippet_data

2019-08-20 15:52:05 +02:00
parent 88084d3d30
commit cc8f1d8a7a
37396 changed files with 4588842 additions and 133 deletions
--- a/node_modules/string-similarity/README.md
+++ b/node_modules/string-similarity/README.md
@ -0,0 +1,93 @@
+string-similarity
+=================
+
+Finds degree of similarity between two strings, based on [Dice's Coefficient](http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient), which is mostly better than [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance).
+
+## Usage
+Install using:
+
+```shell
+npm install string-similarity --save
+```
+
+In your code:
+
+```javascript
+var stringSimilarity = require('string-similarity');
+
+var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed'); 
+
+var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
+```
+## API
+
+Requiring the module gives an object with two methods:
+
+### compareTwoStrings(string1, string2)
+
+Returns a fraction between 0 and 1, which indicates the degree of similarity between the two strings. 0 indicates completely different strings, 1 indicates identical strings. The comparison is case-insensitive.
+
+##### Arguments
+  
+1. string1 (string): The first string
+2. string2 (string): The second string
+  
+Order does not make a difference.
+  
+##### Returns
+  
+(number): A fraction from 0 to 1, both inclusive. Higher number indicates more similarity.
+
+##### Examples
+  
+```javascript
+stringSimilarity.compareTwoStrings('healed', 'sealed');
+// → 0.8
+
+stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.', 
+  'For sale: table in very good  condition, olive green in colour.');
+// → 0.7073170731707317
+
+stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.', 
+  'For sale: green Subaru Impreza, 210,000 miles');
+// → 0.3013698630136986
+
+stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.', 
+  'Wanted: mountain bike with at least 21 gears.');
+// → 0.11267605633802817
+```
+
+### findBestMatch(mainString, targetStrings)
+
+Compares `mainString` against each string in `targetStrings`.
+
+##### Arguments
+
+1. mainString (string): The string to match each target string against.
+2. targetStrings (Array): Each string in this array will be matched against the main string.
+
+##### Returns
+(Object): An object with a `ratings` property, which gives a similarity rating for each target string, and a `bestMatch` property, which specifies which target string was most similar to the main string.
+
+##### Examples
+```javascript
+stringSimilarity.findBestMatch('Olive-green table for sale, in extremely good condition.', [
+  'For sale: green Subaru Impreza, 210,000 miles', 
+  'For sale: table in very good condition, olive green in colour.', 
+  'Wanted: mountain bike with at least 21 gears.'
+]);
+// → 
+{ ratings:
+   [ { target: 'For sale: green Subaru Impreza, 210,000 miles',
+       rating: 0.3013698630136986 },
+     { target: 'For sale: table in very good condition, olive green in colour.',
+       rating: 0.7073170731707317 },
+     { target: 'Wanted: mountain bike with at least 21 gears.',
+       rating: 0.11267605633802817 } ],
+  bestMatch:
+   { target: 'For sale: table in very good condition, olive green in colour.',
+     rating: 0.7073170731707317 } }
+```
+
+![Build status](https://codeship.com/projects/2aa453d0-0959-0134-8a76-4abcb29fe9b4/status?branch=master)
+[![Known Vulnerabilities](https://snyk.io/test/github/aceakash/string-similarity/badge.svg)](https://snyk.io/test/github/aceakash/string-similarity)
--- a/node_modules/string-similarity/compare-strings.js
+++ b/node_modules/string-similarity/compare-strings.js
@ -0,0 +1,113 @@
+var _forEach = require('lodash.foreach');
+var _map = require('lodash.map');
+var _every = require('lodash.every');
+var _maxBy = require('lodash.maxby');
+var _flattenDeep = require('lodash.flattendeep');
+
+exports.compareTwoStrings = compareTwoStrings;
+exports.findBestMatch = findBestMatch;
+
+function compareTwoStrings(str1, str2) {
+  var result = null;
+  result = calculateResultIfIdentical(str1, str2);
+  if (result != null) {
+    return result;
+  }
+  result = calculateResultIfEitherIsEmpty(str1, str2);
+  if (result != null) {
+    return result;
+  }
+  result = calculateResultIfBothAreSingleCharacter(str1, str2);
+  if (result != null) {
+    return result;
+  }
+
+  var pairs1 = wordLetterPairs(str1.toUpperCase());
+  var pairs2 = wordLetterPairs(str2.toUpperCase());
+  var intersection = 0;
+  var union = pairs1.length + pairs2.length;
+
+  _forEach(pairs1, function (pair1) {
+    for(var i = 0; i < pairs2.length; i++) {
+      var pair2 = pairs2[i];
+      if (pair1 === pair2) {
+        intersection++;
+        pairs2.splice(i, 1);
+        break;
+      }
+    }
+  });
+
+  return (2.0 * intersection) / union;
+
+  // private functions ---------------------------
+  function letterPairs(str) {
+    var numPairs = str.length - 1;
+    var pairs = [];
+    for(var i = 0; i < numPairs; i++) {
+      pairs[i] = str.substring(i, i + 2);
+    }
+    return pairs;
+  }
+
+  function wordLetterPairs(str) {
+    return _flattenDeep(_map(str.split(' '), letterPairs));
+  }
+
+  function calculateResultIfIdentical(str1, str2) {
+    if (str1.toUpperCase() == str2.toUpperCase()) {
+      return 1;
+    }
+    return null;
+  }
+
+  function calculateResultIfBothAreSingleCharacter(str1, str2) {
+    if (str1.length == 1 && str2.length == 1) {
+      return 0;
+    }
+  }
+
+  function calculateResultIfEitherIsEmpty(str1, str2) {
+    // if both are empty strings
+    if (str1.length == 0 && str2.length == 0) {
+      return 1;
+    }
+
+    // if only one is empty string
+    if ((str1.length + str2.length) > 0 && (str1.length * str2.length) == 0) {
+      return 0;
+    }
+    return null;
+  }
+}
+
+
+function findBestMatch(mainString, targetStrings) {
+  if (!areArgsValid(mainString, targetStrings)) {
+    throw new Error('Bad arguments: First argument should be a string, second should be an array of strings');
+  }
+  var ratings = _map(targetStrings, function (targetString) {
+    return {
+      target: targetString,
+      rating: compareTwoStrings(mainString, targetString)
+    };
+  });
+
+  return {
+    ratings: ratings,
+    bestMatch: _maxBy(ratings, 'rating')
+  };
+
+  // private functions ---------------------------
+  function areArgsValid(mainString, targetStrings) {
+    var mainStringIsAString = (typeof mainString === 'string');
+
+    var targetStringsIsAnArrayOfStrings = Array.isArray(targetStrings) &&
+      targetStrings.length > 0 &&
+      _every(targetStrings, function (targetString) {
+        return (typeof targetString === 'string');
+      });
+
+    return mainStringIsAString && targetStringsIsAnArrayOfStrings;
+  }
+}
--- a/node_modules/string-similarity/package.json
+++ b/node_modules/string-similarity/package.json
@ -0,0 +1,74 @@
+{
+  "_from": "string-similarity@^1.2.0",
+  "_id": "string-similarity@1.2.2",
+  "_inBundle": false,
+  "_integrity": "sha512-IoHUjcw3Srl8nsPlW04U3qwWPk3oG2ffLM0tN853d/E/JlIvcmZmDY2Kz5HzKp4lEi2T7QD7Zuvjq/1rDw+XcQ==",
+  "_location": "/string-similarity",
+  "_phantomChildren": {},
+  "_requested": {
+    "type": "range",
+    "registry": true,
+    "raw": "string-similarity@^1.2.0",
+    "name": "string-similarity",
+    "escapedName": "string-similarity",
+    "rawSpec": "^1.2.0",
+    "saveSpec": null,
+    "fetchSpec": "^1.2.0"
+  },
+  "_requiredBy": [
+    "/gatsby"
+  ],
+  "_resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-1.2.2.tgz",
+  "_shasum": "99b2c20a3c9bbb3903964eae1d89856db3d8db9b",
+  "_spec": "string-similarity@^1.2.0",
+  "_where": "/Users/stefanfejes/Projects/30-seconds-of-python-code/node_modules/gatsby",
+  "author": {
+    "name": "Akash Kurdekar",
+    "email": "npm@kurdekar.com",
+    "url": "http://untilfalse.com/"
+  },
+  "bugs": {
+    "url": "https://github.com/aceakash/string-similarity/issues"
+  },
+  "bundleDependencies": false,
+  "dependencies": {
+    "lodash.every": "^4.6.0",
+    "lodash.flattendeep": "^4.4.0",
+    "lodash.foreach": "^4.5.0",
+    "lodash.map": "^4.6.0",
+    "lodash.maxby": "^4.6.0"
+  },
+  "deprecated": false,
+  "description": "Finds degree of similarity between strings, based on Dice's Coefficient, which is mostly better than Levenshtein distance.",
+  "devDependencies": {
+    "jasmine": "^3.2.0"
+  },
+  "files": [
+    "compare-strings.js"
+  ],
+  "homepage": "https://github.com/aceakash/string-similarity#readme",
+  "keywords": [
+    "strings",
+    "similar",
+    "difference",
+    "similarity",
+    "compare",
+    "comparison",
+    "degree",
+    "match",
+    "matching",
+    "dice",
+    "levenshtein"
+  ],
+  "license": "ISC",
+  "main": "compare-strings.js",
+  "name": "string-similarity",
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/aceakash/string-similarity.git"
+  },
+  "scripts": {
+    "test": "jasmine"
+  },
+  "version": "1.2.2"
+}