Files
30-seconds-of-code/node_modules/gatsby-source-filesystem/create-remote-file-node.js
2019-08-20 15:52:05 +02:00

376 lines
8.7 KiB
JavaScript

"use strict";
const fs = require(`fs-extra`);
const got = require(`got`);
const {
createContentDigest
} = require(`gatsby-core-utils`);
const path = require(`path`);
const {
isWebUri
} = require(`valid-url`);
const Queue = require(`better-queue`);
const readChunk = require(`read-chunk`);
const fileType = require(`file-type`);
const {
createProgress
} = require(`./utils`);
const {
createFileNode
} = require(`./create-file-node`);
const {
getRemoteFileExtension,
getRemoteFileName,
createFilePath
} = require(`./utils`);
const cacheId = url => `create-remote-file-node-${url}`;
let bar; // Keep track of the total number of jobs we push in the queue
let totalJobs = 0;
/********************
* Type Definitions *
********************/
/**
* @typedef {Redux}
* @see [Redux Docs]{@link https://redux.js.org/api-reference}
*/
/**
* @typedef {GatsbyCache}
* @see gatsby/packages/gatsby/utils/cache.js
*/
/**
* @typedef {Reporter}
* @see gatsby/packages/gatsby-cli/lib/reporter.js
*/
/**
* @typedef {Auth}
* @type {Object}
* @property {String} htaccess_pass
* @property {String} htaccess_user
*/
/**
* @typedef {CreateRemoteFileNodePayload}
* @typedef {Object}
* @description Create Remote File Node Payload
*
* @param {String} options.url
* @param {Redux} options.store
* @param {GatsbyCache} options.cache
* @param {Function} options.createNode
* @param {Auth} [options.auth]
* @param {Reporter} [options.reporter]
*/
const CACHE_DIR = `.cache`;
const FS_PLUGIN_DIR = `gatsby-source-filesystem`;
/********************
* Queue Management *
********************/
/**
* Queue
* Use the task's url as the id
* When pushing a task with a similar id, prefer the original task
* as it's already in the processing cache
*/
const queue = new Queue(pushToQueue, {
id: `url`,
merge: (old, _, cb) => cb(old),
concurrent: process.env.GATSBY_CONCURRENT_DOWNLOAD || 200
}); // when the queue is empty we stop the progressbar
queue.on(`drain`, () => {
if (bar) {
bar.done();
}
totalJobs = 0;
});
/**
* @callback {Queue~queueCallback}
* @param {*} error
* @param {*} result
*/
/**
* pushToQueue
* --
* Handle tasks that are pushed in to the Queue
*
*
* @param {CreateRemoteFileNodePayload} task
* @param {Queue~queueCallback} cb
* @return {Promise<null>}
*/
async function pushToQueue(task, cb) {
try {
const node = await processRemoteNode(task);
return cb(null, node);
} catch (e) {
return cb(e);
}
}
/******************
* Core Functions *
******************/
/**
* requestRemoteNode
* --
* Download the requested file
*
* @param {String} url
* @param {Headers} headers
* @param {String} tmpFilename
* @param {Object} httpOpts
* @return {Promise<Object>} Resolves with the [http Result Object]{@link https://nodejs.org/api/http.html#http_class_http_serverresponse}
*/
const requestRemoteNode = (url, headers, tmpFilename, httpOpts) => new Promise((resolve, reject) => {
const opts = Object.assign({}, {
timeout: 30000,
retries: 5
}, httpOpts);
const responseStream = got.stream(url, Object.assign({
headers
}, opts));
const fsWriteStream = fs.createWriteStream(tmpFilename);
responseStream.pipe(fsWriteStream);
responseStream.on(`downloadProgress`, pro => console.log(pro)); // If there's a 400/500 response or other error.
responseStream.on(`error`, (error, body, response) => {
fs.removeSync(tmpFilename);
reject(error);
});
fsWriteStream.on(`error`, error => {
reject(error);
});
responseStream.on(`response`, response => {
fsWriteStream.on(`finish`, () => {
resolve(response);
});
});
});
/**
* processRemoteNode
* --
* Request the remote file and return the fileNode
*
* @param {CreateRemoteFileNodePayload} options
* @return {Promise<Object>} Resolves with the fileNode
*/
async function processRemoteNode({
url,
store,
cache,
createNode,
parentNodeId,
auth = {},
httpHeaders = {},
createNodeId,
ext,
name
}) {
// Ensure our cache directory exists.
const pluginCacheDir = path.join(store.getState().program.directory, CACHE_DIR, FS_PLUGIN_DIR);
await fs.ensureDir(pluginCacheDir); // See if there's response headers for this url
// from a previous request.
const cachedHeaders = await cache.get(cacheId(url));
const headers = Object.assign({}, httpHeaders);
if (cachedHeaders && cachedHeaders.etag) {
headers[`If-None-Match`] = cachedHeaders.etag;
} // Add htaccess authentication if passed in. This isn't particularly
// extensible. We should define a proper API that we validate.
const httpOpts = {};
if (auth && (auth.htaccess_pass || auth.htaccess_user)) {
httpOpts.auth = `${auth.htaccess_user}:${auth.htaccess_pass}`;
} // Create the temp and permanent file names for the url.
const digest = createContentDigest(url);
if (!name) {
name = getRemoteFileName(url);
}
if (!ext) {
ext = getRemoteFileExtension(url);
}
const tmpFilename = createFilePath(pluginCacheDir, `tmp-${digest}`, ext); // Fetch the file.
const response = await requestRemoteNode(url, headers, tmpFilename, httpOpts);
if (response.statusCode == 200) {
// Save the response headers for future requests.
await cache.set(cacheId(url), response.headers);
} // If the user did not provide an extension and we couldn't get one from remote file, try and guess one
if (ext === ``) {
const buffer = readChunk.sync(tmpFilename, 0, fileType.minimumBytes);
const filetype = fileType(buffer);
if (filetype) {
ext = `.${filetype.ext}`;
}
}
const filename = createFilePath(path.join(pluginCacheDir, digest), name, ext); // If the status code is 200, move the piped temp file to the real name.
if (response.statusCode === 200) {
await fs.move(tmpFilename, filename, {
overwrite: true
}); // Else if 304, remove the empty response.
} else {
await fs.remove(tmpFilename);
} // Create the file node.
const fileNode = await createFileNode(filename, createNodeId, {});
fileNode.internal.description = `File "${url}"`;
fileNode.url = url;
fileNode.parent = parentNodeId; // Override the default plugin as gatsby-source-filesystem needs to
// be the owner of File nodes or there'll be conflicts if any other
// File nodes are created through normal usages of
// gatsby-source-filesystem.
await createNode(fileNode, {
name: `gatsby-source-filesystem`
});
return fileNode;
}
/**
* Index of promises resolving to File node from remote url
*/
const processingCache = {};
/**
* pushTask
* --
* pushes a task in to the Queue and the processing cache
*
* Promisfy a task in queue
* @param {CreateRemoteFileNodePayload} task
* @return {Promise<Object>}
*/
const pushTask = task => new Promise((resolve, reject) => {
queue.push(task).on(`finish`, task => {
resolve(task);
}).on(`failed`, err => {
reject(`failed to process ${task.url}\n${err}`);
});
});
/***************
* Entry Point *
***************/
/**
* createRemoteFileNode
* --
*
* Download a remote file
* First checks cache to ensure duplicate requests aren't processed
* Then pushes to a queue
*
* @param {CreateRemoteFileNodePayload} options
* @return {Promise<Object>} Returns the created node
*/
module.exports = ({
url,
store,
cache,
createNode,
parentNodeId = null,
auth = {},
httpHeaders = {},
createNodeId,
ext = null,
name = null,
reporter
}) => {
// validation of the input
// without this it's notoriously easy to pass in the wrong `createNodeId`
// see gatsbyjs/gatsby#6643
if (typeof createNodeId !== `function`) {
throw new Error(`createNodeId must be a function, was ${typeof createNodeId}`);
}
if (typeof createNode !== `function`) {
throw new Error(`createNode must be a function, was ${typeof createNode}`);
}
if (typeof store !== `object`) {
throw new Error(`store must be the redux store, was ${typeof store}`);
}
if (typeof cache !== `object`) {
throw new Error(`cache must be the Gatsby cache, was ${typeof cache}`);
} // Check if we already requested node for this remote file
// and return stored promise if we did.
if (processingCache[url]) {
return processingCache[url];
}
if (!url || isWebUri(url) === undefined) {
return Promise.reject(`wrong url: ${url}`);
}
if (totalJobs === 0) {
bar = createProgress(`Downloading remote files`, reporter);
bar.start();
}
totalJobs += 1;
bar.total = totalJobs;
const fileDownloadPromise = pushTask({
url,
store,
cache,
createNode,
parentNodeId,
createNodeId,
auth,
httpHeaders,
ext,
name
});
processingCache[url] = fileDownloadPromise.then(node => {
bar.tick();
return node;
});
return processingCache[url];
};