import functools import re from typing import Any, Callable from urllib.parse import urljoin, urlparse from requests.compat import urljoin def validate_url(func: Callable[..., Any]) -> Any: """The method decorator validate_url is used to validate urls for any command that requires a url as an argument""" @functools.wraps(func) def wrapper(url: str, *args, **kwargs) -> Any: """Check if the URL is valid using a basic check, urllib check, and local file check Args: url (str): The URL to check Returns: the result of the wrapped function Raises: ValueError if the url fails any of the validation tests """ # Most basic check if the URL is valid: if not re.match(r"^https?://", url): raise ValueError("Invalid URL format") if not is_valid_url(url): raise ValueError("Missing Scheme or Network location") # Restrict access to local files if check_local_file_access(url): raise ValueError("Access to local files is restricted") # Check URL length if len(url) > 2000: raise ValueError("URL is too long") return func(sanitize_url(url), *args, **kwargs) return wrapper def is_valid_url(url: str) -> bool: """Check if the URL is valid Args: url (str): The URL to check Returns: bool: True if the URL is valid, False otherwise """ try: result = urlparse(url) return all([result.scheme, result.netloc]) except ValueError: return False def sanitize_url(url: str) -> str: """Sanitize the URL Args: url (str): The URL to sanitize Returns: str: The sanitized URL """ parsed_url = urlparse(url) reconstructed_url = f"{parsed_url.path}{parsed_url.params}?{parsed_url.query}" return urljoin(url, reconstructed_url) def check_local_file_access(url: str) -> bool: """Check if the URL is a local file Args: url (str): The URL to check Returns: bool: True if the URL is a local file, False otherwise """ local_prefixes = [ "file:///", "file://localhost/", "file://localhost", "http://localhost", "http://localhost/", "https://localhost", "https://localhost/", "http://2130706433", "http://2130706433/", "https://2130706433", "https://2130706433/", "http://127.0.0.1/", "http://127.0.0.1", "https://127.0.0.1/", "https://127.0.0.1", "https://0.0.0.0/", "https://0.0.0.0", "http://0.0.0.0/", "http://0.0.0.0", "http://0000", "http://0000/", "https://0000", "https://0000/", ] return any(url.startswith(prefix) for prefix in local_prefixes)