|
|
'use strict';
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
const DATA_URL_DEFAULT_MIME_TYPE = 'text/plain'; const DATA_URL_DEFAULT_CHARSET = 'us-ascii';
const testParameter = (name, filters) => { return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); };
const normalizeDataURL = (urlString, {stripHash}) => { const match = /^data:(?<type>[^,]*?),(?<data>[^#]*?)(?:#(?<hash>.*))?$/.exec(urlString);
if (!match) { throw new Error(`Invalid URL: ${urlString}`); }
let {type, data, hash} = match.groups; const mediaType = type.split(';'); hash = stripHash ? '' : hash;
let isBase64 = false; if (mediaType[mediaType.length - 1] === 'base64') { mediaType.pop(); isBase64 = true; }
// Lowercase MIME type
const mimeType = (mediaType.shift() || '').toLowerCase(); const attributes = mediaType .map(attribute => { let [key, value = ''] = attribute.split('=').map(string => string.trim());
// Lowercase `charset`
if (key === 'charset') { value = value.toLowerCase();
if (value === DATA_URL_DEFAULT_CHARSET) { return ''; } }
return `${key}${value ? `=${value}` : ''}`; }) .filter(Boolean);
const normalizedMediaType = [ ...attributes ];
if (isBase64) { normalizedMediaType.push('base64'); }
if (normalizedMediaType.length !== 0 || (mimeType && mimeType !== DATA_URL_DEFAULT_MIME_TYPE)) { normalizedMediaType.unshift(mimeType); }
return `data:${normalizedMediaType.join(';')},${isBase64 ? data.trim() : data}${hash ? `#${hash}` : ''}`; };
const normalizeUrl = (urlString, options) => { options = { defaultProtocol: 'http:', normalizeProtocol: true, forceHttp: false, forceHttps: false, stripAuthentication: true, stripHash: false, stripTextFragment: true, stripWWW: true, removeQueryParameters: [/^utm_\w+/i], removeTrailingSlash: true, removeSingleSlash: true, removeDirectoryIndex: false, sortQueryParameters: true, ...options };
urlString = urlString.trim();
// Data URL
if (/^data:/i.test(urlString)) { return normalizeDataURL(urlString, options); }
if (/^view-source:/i.test(urlString)) { throw new Error('`view-source:` is not supported as it is a non-standard protocol'); }
const hasRelativeProtocol = urlString.startsWith('//'); const isRelativeUrl = !hasRelativeProtocol && /^\.*\//.test(urlString);
// Prepend protocol
if (!isRelativeUrl) { urlString = urlString.replace(/^(?!(?:\w+:)?\/\/)|^\/\//, options.defaultProtocol); }
const urlObj = new URL(urlString);
if (options.forceHttp && options.forceHttps) { throw new Error('The `forceHttp` and `forceHttps` options cannot be used together'); }
if (options.forceHttp && urlObj.protocol === 'https:') { urlObj.protocol = 'http:'; }
if (options.forceHttps && urlObj.protocol === 'http:') { urlObj.protocol = 'https:'; }
// Remove auth
if (options.stripAuthentication) { urlObj.username = ''; urlObj.password = ''; }
// Remove hash
if (options.stripHash) { urlObj.hash = ''; } else if (options.stripTextFragment) { urlObj.hash = urlObj.hash.replace(/#?:~:text.*?$/i, ''); }
// Remove duplicate slashes if not preceded by a protocol
if (urlObj.pathname) { urlObj.pathname = urlObj.pathname.replace(/(?<!\b(?:[a-z][a-z\d+\-.]{1,50}:))\/{2,}/g, '/'); }
// Decode URI octets
if (urlObj.pathname) { try { urlObj.pathname = decodeURI(urlObj.pathname); } catch (_) {} }
// Remove directory index
if (options.removeDirectoryIndex === true) { options.removeDirectoryIndex = [/^index\.[a-z]+$/]; }
if (Array.isArray(options.removeDirectoryIndex) && options.removeDirectoryIndex.length > 0) { let pathComponents = urlObj.pathname.split('/'); const lastComponent = pathComponents[pathComponents.length - 1];
if (testParameter(lastComponent, options.removeDirectoryIndex)) { pathComponents = pathComponents.slice(0, pathComponents.length - 1); urlObj.pathname = pathComponents.slice(1).join('/') + '/'; } }
if (urlObj.hostname) { // Remove trailing dot
urlObj.hostname = urlObj.hostname.replace(/\.$/, '');
// Remove `www.`
if (options.stripWWW && /^www\.(?!www\.)(?:[a-z\-\d]{1,63})\.(?:[a-z.\-\d]{2,63})$/.test(urlObj.hostname)) { // Each label should be max 63 at length (min: 1).
// Source: https://en.wikipedia.org/wiki/Hostname#Restrictions_on_valid_host_names
// Each TLD should be up to 63 characters long (min: 2).
// It is technically possible to have a single character TLD, but none currently exist.
urlObj.hostname = urlObj.hostname.replace(/^www\./, ''); } }
// Remove query unwanted parameters
if (Array.isArray(options.removeQueryParameters)) { for (const key of [...urlObj.searchParams.keys()]) { if (testParameter(key, options.removeQueryParameters)) { urlObj.searchParams.delete(key); } } }
if (options.removeQueryParameters === true) { urlObj.search = ''; }
// Sort query parameters
if (options.sortQueryParameters) { urlObj.searchParams.sort(); }
if (options.removeTrailingSlash) { urlObj.pathname = urlObj.pathname.replace(/\/$/, ''); }
const oldUrlString = urlString;
// Take advantage of many of the Node `url` normalizations
urlString = urlObj.toString();
if (!options.removeSingleSlash && urlObj.pathname === '/' && !oldUrlString.endsWith('/') && urlObj.hash === '') { urlString = urlString.replace(/\/$/, ''); }
// Remove ending `/` unless removeSingleSlash is false
if ((options.removeTrailingSlash || urlObj.pathname === '/') && urlObj.hash === '' && options.removeSingleSlash) { urlString = urlString.replace(/\/$/, ''); }
// Restore relative protocol, if applicable
if (hasRelativeProtocol && !options.normalizeProtocol) { urlString = urlString.replace(/^http:\/\//, '//'); }
// Remove http/https
if (options.stripProtocol) { urlString = urlString.replace(/^(?:https?:)?\/\//, ''); }
return urlString; };
module.exports = normalizeUrl;
|