WIP - add extractor, generate snippet_data

This commit is contained in:
Stefan Fejes
2019-08-20 15:52:05 +02:00
parent 88084d3d30
commit cc8f1d8a7a
37396 changed files with 4588842 additions and 133 deletions

93
node_modules/string-similarity/README.md generated vendored Normal file
View File

@ -0,0 +1,93 @@
string-similarity
=================
Finds degree of similarity between two strings, based on [Dice's Coefficient](http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient), which is mostly better than [Levenshtein distance](http://en.wikipedia.org/wiki/Levenshtein_distance).
## Usage
Install using:
```shell
npm install string-similarity --save
```
In your code:
```javascript
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
```
## API
Requiring the module gives an object with two methods:
### compareTwoStrings(string1, string2)
Returns a fraction between 0 and 1, which indicates the degree of similarity between the two strings. 0 indicates completely different strings, 1 indicates identical strings. The comparison is case-insensitive.
##### Arguments
1. string1 (string): The first string
2. string2 (string): The second string
Order does not make a difference.
##### Returns
(number): A fraction from 0 to 1, both inclusive. Higher number indicates more similarity.
##### Examples
```javascript
stringSimilarity.compareTwoStrings('healed', 'sealed');
// → 0.8
stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.',
'For sale: table in very good condition, olive green in colour.');
// → 0.7073170731707317
stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.',
'For sale: green Subaru Impreza, 210,000 miles');
// → 0.3013698630136986
stringSimilarity.compareTwoStrings('Olive-green table for sale, in extremely good condition.',
'Wanted: mountain bike with at least 21 gears.');
// → 0.11267605633802817
```
### findBestMatch(mainString, targetStrings)
Compares `mainString` against each string in `targetStrings`.
##### Arguments
1. mainString (string): The string to match each target string against.
2. targetStrings (Array): Each string in this array will be matched against the main string.
##### Returns
(Object): An object with a `ratings` property, which gives a similarity rating for each target string, and a `bestMatch` property, which specifies which target string was most similar to the main string.
##### Examples
```javascript
stringSimilarity.findBestMatch('Olive-green table for sale, in extremely good condition.', [
'For sale: green Subaru Impreza, 210,000 miles',
'For sale: table in very good condition, olive green in colour.',
'Wanted: mountain bike with at least 21 gears.'
]);
// →
{ ratings:
[ { target: 'For sale: green Subaru Impreza, 210,000 miles',
rating: 0.3013698630136986 },
{ target: 'For sale: table in very good condition, olive green in colour.',
rating: 0.7073170731707317 },
{ target: 'Wanted: mountain bike with at least 21 gears.',
rating: 0.11267605633802817 } ],
bestMatch:
{ target: 'For sale: table in very good condition, olive green in colour.',
rating: 0.7073170731707317 } }
```
![Build status](https://codeship.com/projects/2aa453d0-0959-0134-8a76-4abcb29fe9b4/status?branch=master)
[![Known Vulnerabilities](https://snyk.io/test/github/aceakash/string-similarity/badge.svg)](https://snyk.io/test/github/aceakash/string-similarity)

113
node_modules/string-similarity/compare-strings.js generated vendored Normal file
View File

@ -0,0 +1,113 @@
var _forEach = require('lodash.foreach');
var _map = require('lodash.map');
var _every = require('lodash.every');
var _maxBy = require('lodash.maxby');
var _flattenDeep = require('lodash.flattendeep');
exports.compareTwoStrings = compareTwoStrings;
exports.findBestMatch = findBestMatch;
function compareTwoStrings(str1, str2) {
var result = null;
result = calculateResultIfIdentical(str1, str2);
if (result != null) {
return result;
}
result = calculateResultIfEitherIsEmpty(str1, str2);
if (result != null) {
return result;
}
result = calculateResultIfBothAreSingleCharacter(str1, str2);
if (result != null) {
return result;
}
var pairs1 = wordLetterPairs(str1.toUpperCase());
var pairs2 = wordLetterPairs(str2.toUpperCase());
var intersection = 0;
var union = pairs1.length + pairs2.length;
_forEach(pairs1, function (pair1) {
for(var i = 0; i < pairs2.length; i++) {
var pair2 = pairs2[i];
if (pair1 === pair2) {
intersection++;
pairs2.splice(i, 1);
break;
}
}
});
return (2.0 * intersection) / union;
// private functions ---------------------------
function letterPairs(str) {
var numPairs = str.length - 1;
var pairs = [];
for(var i = 0; i < numPairs; i++) {
pairs[i] = str.substring(i, i + 2);
}
return pairs;
}
function wordLetterPairs(str) {
return _flattenDeep(_map(str.split(' '), letterPairs));
}
function calculateResultIfIdentical(str1, str2) {
if (str1.toUpperCase() == str2.toUpperCase()) {
return 1;
}
return null;
}
function calculateResultIfBothAreSingleCharacter(str1, str2) {
if (str1.length == 1 && str2.length == 1) {
return 0;
}
}
function calculateResultIfEitherIsEmpty(str1, str2) {
// if both are empty strings
if (str1.length == 0 && str2.length == 0) {
return 1;
}
// if only one is empty string
if ((str1.length + str2.length) > 0 && (str1.length * str2.length) == 0) {
return 0;
}
return null;
}
}
function findBestMatch(mainString, targetStrings) {
if (!areArgsValid(mainString, targetStrings)) {
throw new Error('Bad arguments: First argument should be a string, second should be an array of strings');
}
var ratings = _map(targetStrings, function (targetString) {
return {
target: targetString,
rating: compareTwoStrings(mainString, targetString)
};
});
return {
ratings: ratings,
bestMatch: _maxBy(ratings, 'rating')
};
// private functions ---------------------------
function areArgsValid(mainString, targetStrings) {
var mainStringIsAString = (typeof mainString === 'string');
var targetStringsIsAnArrayOfStrings = Array.isArray(targetStrings) &&
targetStrings.length > 0 &&
_every(targetStrings, function (targetString) {
return (typeof targetString === 'string');
});
return mainStringIsAString && targetStringsIsAnArrayOfStrings;
}
}

74
node_modules/string-similarity/package.json generated vendored Normal file
View File

@ -0,0 +1,74 @@
{
"_from": "string-similarity@^1.2.0",
"_id": "string-similarity@1.2.2",
"_inBundle": false,
"_integrity": "sha512-IoHUjcw3Srl8nsPlW04U3qwWPk3oG2ffLM0tN853d/E/JlIvcmZmDY2Kz5HzKp4lEi2T7QD7Zuvjq/1rDw+XcQ==",
"_location": "/string-similarity",
"_phantomChildren": {},
"_requested": {
"type": "range",
"registry": true,
"raw": "string-similarity@^1.2.0",
"name": "string-similarity",
"escapedName": "string-similarity",
"rawSpec": "^1.2.0",
"saveSpec": null,
"fetchSpec": "^1.2.0"
},
"_requiredBy": [
"/gatsby"
],
"_resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-1.2.2.tgz",
"_shasum": "99b2c20a3c9bbb3903964eae1d89856db3d8db9b",
"_spec": "string-similarity@^1.2.0",
"_where": "/Users/stefanfejes/Projects/30-seconds-of-python-code/node_modules/gatsby",
"author": {
"name": "Akash Kurdekar",
"email": "npm@kurdekar.com",
"url": "http://untilfalse.com/"
},
"bugs": {
"url": "https://github.com/aceakash/string-similarity/issues"
},
"bundleDependencies": false,
"dependencies": {
"lodash.every": "^4.6.0",
"lodash.flattendeep": "^4.4.0",
"lodash.foreach": "^4.5.0",
"lodash.map": "^4.6.0",
"lodash.maxby": "^4.6.0"
},
"deprecated": false,
"description": "Finds degree of similarity between strings, based on Dice's Coefficient, which is mostly better than Levenshtein distance.",
"devDependencies": {
"jasmine": "^3.2.0"
},
"files": [
"compare-strings.js"
],
"homepage": "https://github.com/aceakash/string-similarity#readme",
"keywords": [
"strings",
"similar",
"difference",
"similarity",
"compare",
"comparison",
"degree",
"match",
"matching",
"dice",
"levenshtein"
],
"license": "ISC",
"main": "compare-strings.js",
"name": "string-similarity",
"repository": {
"type": "git",
"url": "git://github.com/aceakash/string-similarity.git"
},
"scripts": {
"test": "jasmine"
},
"version": "1.2.2"
}