"use strict"; const fs = require(`fs-extra`); const got = require(`got`); const { createContentDigest } = require(`gatsby-core-utils`); const path = require(`path`); const { isWebUri } = require(`valid-url`); const Queue = require(`better-queue`); const readChunk = require(`read-chunk`); const fileType = require(`file-type`); const { createProgress } = require(`./utils`); const { createFileNode } = require(`./create-file-node`); const { getRemoteFileExtension, getRemoteFileName, createFilePath } = require(`./utils`); const cacheId = url => `create-remote-file-node-${url}`; let bar; // Keep track of the total number of jobs we push in the queue let totalJobs = 0; /******************** * Type Definitions * ********************/ /** * @typedef {Redux} * @see [Redux Docs]{@link https://redux.js.org/api-reference} */ /** * @typedef {GatsbyCache} * @see gatsby/packages/gatsby/utils/cache.js */ /** * @typedef {Reporter} * @see gatsby/packages/gatsby-cli/lib/reporter.js */ /** * @typedef {Auth} * @type {Object} * @property {String} htaccess_pass * @property {String} htaccess_user */ /** * @typedef {CreateRemoteFileNodePayload} * @typedef {Object} * @description Create Remote File Node Payload * * @param {String} options.url * @param {Redux} options.store * @param {GatsbyCache} options.cache * @param {Function} options.createNode * @param {Auth} [options.auth] * @param {Reporter} [options.reporter] */ const CACHE_DIR = `.cache`; const FS_PLUGIN_DIR = `gatsby-source-filesystem`; /******************** * Queue Management * ********************/ /** * Queue * Use the task's url as the id * When pushing a task with a similar id, prefer the original task * as it's already in the processing cache */ const queue = new Queue(pushToQueue, { id: `url`, merge: (old, _, cb) => cb(old), concurrent: process.env.GATSBY_CONCURRENT_DOWNLOAD || 200 }); // when the queue is empty we stop the progressbar queue.on(`drain`, () => { if (bar) { bar.done(); } totalJobs = 0; }); /** * @callback {Queue~queueCallback} * @param {*} error * @param {*} result */ /** * pushToQueue * -- * Handle tasks that are pushed in to the Queue * * * @param {CreateRemoteFileNodePayload} task * @param {Queue~queueCallback} cb * @return {Promise} */ async function pushToQueue(task, cb) { try { const node = await processRemoteNode(task); return cb(null, node); } catch (e) { return cb(e); } } /****************** * Core Functions * ******************/ /** * requestRemoteNode * -- * Download the requested file * * @param {String} url * @param {Headers} headers * @param {String} tmpFilename * @param {Object} httpOpts * @return {Promise} Resolves with the [http Result Object]{@link https://nodejs.org/api/http.html#http_class_http_serverresponse} */ const requestRemoteNode = (url, headers, tmpFilename, httpOpts) => new Promise((resolve, reject) => { const opts = Object.assign({}, { timeout: 30000, retries: 5 }, httpOpts); const responseStream = got.stream(url, Object.assign({ headers }, opts)); const fsWriteStream = fs.createWriteStream(tmpFilename); responseStream.pipe(fsWriteStream); responseStream.on(`downloadProgress`, pro => console.log(pro)); // If there's a 400/500 response or other error. responseStream.on(`error`, (error, body, response) => { fs.removeSync(tmpFilename); reject(error); }); fsWriteStream.on(`error`, error => { reject(error); }); responseStream.on(`response`, response => { fsWriteStream.on(`finish`, () => { resolve(response); }); }); }); /** * processRemoteNode * -- * Request the remote file and return the fileNode * * @param {CreateRemoteFileNodePayload} options * @return {Promise} Resolves with the fileNode */ async function processRemoteNode({ url, store, cache, createNode, parentNodeId, auth = {}, httpHeaders = {}, createNodeId, ext, name }) { // Ensure our cache directory exists. const pluginCacheDir = path.join(store.getState().program.directory, CACHE_DIR, FS_PLUGIN_DIR); await fs.ensureDir(pluginCacheDir); // See if there's response headers for this url // from a previous request. const cachedHeaders = await cache.get(cacheId(url)); const headers = Object.assign({}, httpHeaders); if (cachedHeaders && cachedHeaders.etag) { headers[`If-None-Match`] = cachedHeaders.etag; } // Add htaccess authentication if passed in. This isn't particularly // extensible. We should define a proper API that we validate. const httpOpts = {}; if (auth && (auth.htaccess_pass || auth.htaccess_user)) { httpOpts.auth = `${auth.htaccess_user}:${auth.htaccess_pass}`; } // Create the temp and permanent file names for the url. const digest = createContentDigest(url); if (!name) { name = getRemoteFileName(url); } if (!ext) { ext = getRemoteFileExtension(url); } const tmpFilename = createFilePath(pluginCacheDir, `tmp-${digest}`, ext); // Fetch the file. const response = await requestRemoteNode(url, headers, tmpFilename, httpOpts); if (response.statusCode == 200) { // Save the response headers for future requests. await cache.set(cacheId(url), response.headers); } // If the user did not provide an extension and we couldn't get one from remote file, try and guess one if (ext === ``) { const buffer = readChunk.sync(tmpFilename, 0, fileType.minimumBytes); const filetype = fileType(buffer); if (filetype) { ext = `.${filetype.ext}`; } } const filename = createFilePath(path.join(pluginCacheDir, digest), name, ext); // If the status code is 200, move the piped temp file to the real name. if (response.statusCode === 200) { await fs.move(tmpFilename, filename, { overwrite: true }); // Else if 304, remove the empty response. } else { await fs.remove(tmpFilename); } // Create the file node. const fileNode = await createFileNode(filename, createNodeId, {}); fileNode.internal.description = `File "${url}"`; fileNode.url = url; fileNode.parent = parentNodeId; // Override the default plugin as gatsby-source-filesystem needs to // be the owner of File nodes or there'll be conflicts if any other // File nodes are created through normal usages of // gatsby-source-filesystem. await createNode(fileNode, { name: `gatsby-source-filesystem` }); return fileNode; } /** * Index of promises resolving to File node from remote url */ const processingCache = {}; /** * pushTask * -- * pushes a task in to the Queue and the processing cache * * Promisfy a task in queue * @param {CreateRemoteFileNodePayload} task * @return {Promise} */ const pushTask = task => new Promise((resolve, reject) => { queue.push(task).on(`finish`, task => { resolve(task); }).on(`failed`, err => { reject(`failed to process ${task.url}\n${err}`); }); }); /*************** * Entry Point * ***************/ /** * createRemoteFileNode * -- * * Download a remote file * First checks cache to ensure duplicate requests aren't processed * Then pushes to a queue * * @param {CreateRemoteFileNodePayload} options * @return {Promise} Returns the created node */ module.exports = ({ url, store, cache, createNode, parentNodeId = null, auth = {}, httpHeaders = {}, createNodeId, ext = null, name = null, reporter }) => { // validation of the input // without this it's notoriously easy to pass in the wrong `createNodeId` // see gatsbyjs/gatsby#6643 if (typeof createNodeId !== `function`) { throw new Error(`createNodeId must be a function, was ${typeof createNodeId}`); } if (typeof createNode !== `function`) { throw new Error(`createNode must be a function, was ${typeof createNode}`); } if (typeof store !== `object`) { throw new Error(`store must be the redux store, was ${typeof store}`); } if (typeof cache !== `object`) { throw new Error(`cache must be the Gatsby cache, was ${typeof cache}`); } // Check if we already requested node for this remote file // and return stored promise if we did. if (processingCache[url]) { return processingCache[url]; } if (!url || isWebUri(url) === undefined) { return Promise.reject(`wrong url: ${url}`); } if (totalJobs === 0) { bar = createProgress(`Downloading remote files`, reporter); bar.start(); } totalJobs += 1; bar.total = totalJobs; const fileDownloadPromise = pushTask({ url, store, cache, createNode, parentNodeId, createNodeId, auth, httpHeaders, ext, name }); processingCache[url] = fileDownloadPromise.then(node => { bar.tick(); return node; }); return processingCache[url]; };