From c9878b51c8c323667616e497293637da31904487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Wed, 31 Jan 2024 00:03:17 +0100 Subject: [PATCH] Remove regexes from is_local() --- .vscode/settings.json | 1 + feeds/tests.py | 50 +++++++++++++++++++++++++++++++++++++++++++ feeds/validator.py | 46 +++++++++++++++++++-------------------- 3 files changed, 73 insertions(+), 24 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index f933597..b02bb58 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,6 +7,7 @@ "feedburner", "feedparser", "feedvault", + "gaierror", "leftright", "levelname", "PGHOST", diff --git a/feeds/tests.py b/feeds/tests.py index 9be4eef..1e75f1e 100644 --- a/feeds/tests.py +++ b/feeds/tests.py @@ -1 +1,51 @@ """https://docs.djangoproject.com/en/5.0/topics/testing/.""" + +from __future__ import annotations + +import random +from typing import TYPE_CHECKING + +from django.test import Client, TestCase + +from feeds.validator import is_ip, validate_scheme + +if TYPE_CHECKING: + from django.http import HttpResponse + + +class TestHomePage(TestCase): + """Test case for the home page view.""" + + def setUp(self: TestHomePage) -> None: + """Set up the test client for the test case.""" + self.client = Client() + + def test_home_page(self: TestHomePage) -> None: + """Test that a GET request to the home page returns a 200 status code.""" + response: HttpResponse = self.client.get("/") + assert response.status_code == 200 + + +class TestValidator(TestCase): + """Test case for the validator.""" + + def setUp(self: TestValidator) -> None: + """Set up the test client for the test case.""" + self.client = Client() + + def test_is_ip(self: TestValidator) -> None: + """Test that is_ip() returns True for a valid IP address.""" + # Test random IP address + random_ip: str = ".".join(str(random.randint(0, 255)) for _ in range(4)) # noqa: S311 + assert is_ip(feed_url=random_ip) + + # Test domain name + assert not is_ip(feed_url="https://example.com") + + def test_validate_scheme(self: TestValidator) -> None: + """Test that validate_scheme() returns True for a valid scheme.""" + assert validate_scheme(feed_url="https://example.com") + assert validate_scheme(feed_url="http://example.com") + assert not validate_scheme(feed_url="ftp://example.com") + assert not validate_scheme(feed_url="example.com") + assert not validate_scheme(feed_url="127.0.0.1") diff --git a/feeds/validator.py b/feeds/validator.py index 6a1a8da..1bc2721 100644 --- a/feeds/validator.py +++ b/feeds/validator.py @@ -4,7 +4,7 @@ from __future__ import annotations import ipaddress import logging -import re +import socket from urllib.parse import urlparse import requests @@ -45,7 +45,7 @@ def is_ip(feed_url: str) -> bool: try: ipaddress.ip_address(feed_url) except ValueError: - logger.info(f"{feed_url} is not an IP address") # noqa: G004 + logger.info(f"{feed_url} passed isn't either a v4 or a v6 address") # noqa: G004 return False else: logger.info(f"{feed_url} is an IP address") # noqa: G004 @@ -97,30 +97,28 @@ def update_blocklist() -> str: def is_local(feed_url: str) -> bool: """Check if feed is a local address.""" - # Regexes from https://github.com/gwarser/filter-lists - regexes: list[str] = [ - # 10.0.0.0 - 10.255.255.255 - r"^\w+:\/\/10\.(?:(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))\.){2}(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))[:/]", - # 172.16.0.0 - 172.31.255.255 - r"^\w+:\/\/172\.(?:1[6-9]|2\d|3[01])(?:\.(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))){2}[:/]", - # 192.168.0.0 - 192.168.255.255 - r"^\w+:\/\/192\.168(?:\.(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))){2}[:/]", - # https://en.wikipedia.org/wiki/Private_network#Link-local_addresses - r"^\w+:\/\/169\.254\.(?:[1-9]\d?|1\d{2}|2(?:[0-4]\d|5[0-4]))\.(?:[1-9]?\d|1\d{2}|2(?:[0-4]\d|5[0-5]))[:/]", - # https://en.wikipedia.org/wiki/IPv6_address#Transition_from_IPv4 - r"^\w+:\/\/\[::ffff:(?:7f[0-9a-f]{2}|a[0-9a-f]{2}|ac1[0-9a-f]|c0a8|a9fe):[0-9a-f]{1,4}\][:/]", - # localhost - r"^\w+:\/\/127\.(?:(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))\.){2}(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))[:/]", - ] + network_location: str = urlparse(url=feed_url).netloc - domain: str | None = urlparse(feed_url).hostname - if not domain: - return False + # Check if network location is an IP address + if is_ip(feed_url=network_location): + try: + ip: ipaddress.IPv4Address | ipaddress.IPv6Address = ipaddress.ip_address(address=network_location) + except ValueError: + return False + else: + return ip.is_private - if domain in {"localhost", "127.0.0.1", "::1", "0.0.0.0", "::", "local", "[::1]"}: # noqa: S104 + try: + ip_address: str = socket.gethostbyname(network_location) + is_private: bool = ipaddress.ip_address(address=ip_address).is_private + except socket.gaierror as e: + logger.info(f"{feed_url} failed to resolve: {e}") # noqa: G004 + return True + except ValueError as e: + logger.info(f"{feed_url} failed to resolve: {e}") # noqa: G004 return True - if domain.endswith((".local", ".home.arpa")): - return True + msg: str = f"{feed_url} is a local URL" if is_private else f"{feed_url} is not a local URL" + logger.info(msg) - return any(re.match(regex, feed_url) for regex in regexes) + return is_private