Validate URLs before adding
This commit is contained in:
parent
6f544db209
commit
c41780fca0
12 changed files with 386 additions and 16 deletions
|
|
@ -4,3 +4,66 @@ https://docs.djangoproject.com/en/5.0/ref/contrib/admin/
|
|||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, ClassVar
|
||||
|
||||
from django.contrib import admin
|
||||
|
||||
from feeds.models import (
|
||||
Author,
|
||||
Blocklist,
|
||||
Cloud,
|
||||
Contributor,
|
||||
Feed,
|
||||
Generator,
|
||||
Image,
|
||||
Info,
|
||||
Link,
|
||||
Publisher,
|
||||
Rights,
|
||||
Subtitle,
|
||||
Tags,
|
||||
TextInput,
|
||||
Title,
|
||||
)
|
||||
from feeds.validator import update_blocklist
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models.query import QuerySet
|
||||
from django.http import HttpRequest
|
||||
|
||||
admin.site.register(Author)
|
||||
admin.site.register(Cloud)
|
||||
admin.site.register(Contributor)
|
||||
admin.site.register(Feed)
|
||||
admin.site.register(Generator)
|
||||
admin.site.register(Image)
|
||||
admin.site.register(Info)
|
||||
admin.site.register(Link)
|
||||
admin.site.register(Publisher)
|
||||
admin.site.register(Rights)
|
||||
admin.site.register(Subtitle)
|
||||
admin.site.register(Tags)
|
||||
admin.site.register(TextInput)
|
||||
admin.site.register(Title)
|
||||
|
||||
|
||||
# Add button to update blocklist on the admin page
|
||||
@admin.register(Blocklist)
|
||||
class BlocklistAdmin(admin.ModelAdmin):
|
||||
"""Admin interface for blocklist."""
|
||||
|
||||
actions: ClassVar[list[str]] = ["_update_blocklist", "delete_all_blocklist"]
|
||||
list_display: ClassVar[list[str]] = ["url", "active"]
|
||||
|
||||
@admin.action(description="Update blocklist")
|
||||
def _update_blocklist(self: admin.ModelAdmin, request: HttpRequest, queryset: QuerySet) -> None: # noqa: ARG002
|
||||
"""Update blocklist."""
|
||||
msg: str = update_blocklist()
|
||||
self.message_user(request=request, message=msg)
|
||||
|
||||
@admin.action(description="Delete all blocklists")
|
||||
def delete_all_blocklist(self: admin.ModelAdmin, request: HttpRequest, queryset: QuerySet) -> None: # noqa: ARG002
|
||||
"""Delete all blocklist from database."""
|
||||
Blocklist.objects.all().delete()
|
||||
self.message_user(request=request, message="Deleted all blocklists")
|
||||
|
|
|
|||
25
feeds/migrations/0002_blocklist.py
Normal file
25
feeds/migrations/0002_blocklist.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# Generated by Django 5.0.1 on 2024-01-30 16:41
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('feeds', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name='Blocklist',
|
||||
fields=[
|
||||
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||
('url', models.URLField(help_text='The URL to block.', unique=True)),
|
||||
],
|
||||
options={
|
||||
'verbose_name': 'Blocklist',
|
||||
'verbose_name_plural': 'Blocklists',
|
||||
'db_table_comment': 'A list of URLs to block.',
|
||||
},
|
||||
),
|
||||
]
|
||||
18
feeds/migrations/0003_blocklist_active.py
Normal file
18
feeds/migrations/0003_blocklist_active.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 5.0.1 on 2024-01-30 16:45
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('feeds', '0002_blocklist'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='blocklist',
|
||||
name='active',
|
||||
field=models.BooleanField(default=True, help_text='Is this URL still blocked?'),
|
||||
),
|
||||
]
|
||||
18
feeds/migrations/0004_alter_blocklist_url.py
Normal file
18
feeds/migrations/0004_alter_blocklist_url.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Generated by Django 5.0.1 on 2024-01-30 18:22
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('feeds', '0003_blocklist_active'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name='blocklist',
|
||||
name='url',
|
||||
field=models.CharField(help_text='The URL to block.', max_length=2000, unique=True),
|
||||
),
|
||||
]
|
||||
|
|
@ -963,3 +963,21 @@ class Feed(models.Model):
|
|||
def __str__(self: Feed) -> str:
|
||||
"""Feed URL."""
|
||||
return f"{self.url}"
|
||||
|
||||
|
||||
class Blocklist(models.Model):
|
||||
"""A list of URLs to block."""
|
||||
|
||||
url = models.CharField(max_length=2000, unique=True, help_text="The URL to block.")
|
||||
active = models.BooleanField(default=True, help_text="Is this URL still blocked?")
|
||||
|
||||
class Meta:
|
||||
"""Blocklist meta."""
|
||||
|
||||
verbose_name: typing.ClassVar[str] = "Blocklist"
|
||||
verbose_name_plural: typing.ClassVar[str] = "Blocklists"
|
||||
db_table_comment: typing.ClassVar[str] = "A list of URLs to block."
|
||||
|
||||
def __str__(self: Blocklist) -> str:
|
||||
"""Blocklist URL."""
|
||||
return f"{self.url}"
|
||||
|
|
|
|||
126
feeds/validator.py
Normal file
126
feeds/validator.py
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
"""Validate feeds before adding them to the database."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.validators import URLValidator
|
||||
|
||||
from feeds.models import Blocklist
|
||||
|
||||
BLOCKLISTS: list[str] = [
|
||||
"https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names.txt",
|
||||
"https://malware-filter.gitlab.io/malware-filter/phishing-filter-dnscrypt-blocked-names.txt",
|
||||
]
|
||||
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def validate_scheme(feed_url: str) -> bool:
|
||||
"""Validate the scheme of a URL. Only allow http and https.
|
||||
|
||||
Args:
|
||||
feed_url: The URL to validate.
|
||||
|
||||
Returns:
|
||||
True if the URL is valid, False otherwise.
|
||||
"""
|
||||
validator = URLValidator(schemes=["http", "https"])
|
||||
# TODO(TheLovinator): Should we allow other schemes? # noqa: TD003
|
||||
try:
|
||||
validator(feed_url)
|
||||
except ValidationError:
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def is_ip(feed_url: str) -> bool:
|
||||
"""Check if feed is an IP address."""
|
||||
try:
|
||||
ipaddress.ip_address(feed_url)
|
||||
except ValueError:
|
||||
logger.info(f"{feed_url} is not an IP address") # noqa: G004
|
||||
return False
|
||||
else:
|
||||
logger.info(f"{feed_url} is an IP address") # noqa: G004
|
||||
return True
|
||||
|
||||
|
||||
def update_blocklist() -> str:
|
||||
"""Download the blocklist and add to database."""
|
||||
# URLs found in the blocklist
|
||||
found_urls = set()
|
||||
|
||||
for _blocklist in BLOCKLISTS:
|
||||
with requests.get(url=_blocklist, timeout=10) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
logger.debug(f"Downloaded {_blocklist}") # noqa: G004
|
||||
|
||||
# Split the blocklist into a list of URLs
|
||||
blocked_urls = set(r.text.splitlines())
|
||||
|
||||
# Remove comments and whitespace
|
||||
blocked_urls = {url for url in blocked_urls if not url.startswith("#")}
|
||||
blocked_urls = {url.strip() for url in blocked_urls}
|
||||
|
||||
logger.debug(f"Found {len(blocked_urls)} URLs in {_blocklist}") # noqa: G004
|
||||
|
||||
# Add URLs to the found URLs set
|
||||
found_urls.update(blocked_urls)
|
||||
|
||||
logger.debug(f"Found {len(found_urls)} URLs in total") # noqa: G004
|
||||
|
||||
# Mark all URLs as inactive
|
||||
Blocklist.objects.all().update(active=False)
|
||||
|
||||
logger.debug("Marked all URLs as inactive")
|
||||
|
||||
# Bulk create the blocklist
|
||||
Blocklist.objects.bulk_create(
|
||||
[Blocklist(url=url, active=True) for url in found_urls],
|
||||
update_conflicts=True,
|
||||
unique_fields=["url"],
|
||||
update_fields=["active"],
|
||||
batch_size=1000,
|
||||
)
|
||||
|
||||
logger.debug(f"Added {len(found_urls)} URLs to the blocklist") # noqa: G004
|
||||
return f"Added {len(found_urls)} URLs to the blocklist"
|
||||
|
||||
|
||||
def is_local(feed_url: str) -> bool:
|
||||
"""Check if feed is a local address."""
|
||||
# Regexes from https://github.com/gwarser/filter-lists
|
||||
regexes: list[str] = [
|
||||
# 10.0.0.0 - 10.255.255.255
|
||||
r"^\w+:\/\/10\.(?:(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))\.){2}(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))[:/]",
|
||||
# 172.16.0.0 - 172.31.255.255
|
||||
r"^\w+:\/\/172\.(?:1[6-9]|2\d|3[01])(?:\.(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))){2}[:/]",
|
||||
# 192.168.0.0 - 192.168.255.255
|
||||
r"^\w+:\/\/192\.168(?:\.(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))){2}[:/]",
|
||||
# https://en.wikipedia.org/wiki/Private_network#Link-local_addresses
|
||||
r"^\w+:\/\/169\.254\.(?:[1-9]\d?|1\d{2}|2(?:[0-4]\d|5[0-4]))\.(?:[1-9]?\d|1\d{2}|2(?:[0-4]\d|5[0-5]))[:/]",
|
||||
# https://en.wikipedia.org/wiki/IPv6_address#Transition_from_IPv4
|
||||
r"^\w+:\/\/\[::ffff:(?:7f[0-9a-f]{2}|a[0-9a-f]{2}|ac1[0-9a-f]|c0a8|a9fe):[0-9a-f]{1,4}\][:/]",
|
||||
# localhost
|
||||
r"^\w+:\/\/127\.(?:(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))\.){2}(?:[1-9]?\d|1\d\d|2(?:[0-4]\d|5[0-5]))[:/]",
|
||||
]
|
||||
|
||||
domain: str | None = urlparse(feed_url).hostname
|
||||
if not domain:
|
||||
return False
|
||||
|
||||
if domain in {"localhost", "127.0.0.1", "::1", "0.0.0.0", "::", "local", "[::1]"}: # noqa: S104
|
||||
return True
|
||||
|
||||
if domain.endswith((".local", ".home.arpa")):
|
||||
return True
|
||||
|
||||
return any(re.match(regex, feed_url) for regex in regexes)
|
||||
|
|
@ -6,18 +6,24 @@ FeedsView - /feeds
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import typing
|
||||
from urllib import parse
|
||||
|
||||
from django.contrib import messages
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.db import connection
|
||||
from django.shortcuts import redirect
|
||||
from django.shortcuts import redirect, render
|
||||
from django.views.generic.base import TemplateView
|
||||
from django.views.generic.list import ListView
|
||||
|
||||
from feeds.models import Feed
|
||||
from feeds.models import Blocklist, Feed
|
||||
from feeds.validator import is_ip, is_local, validate_scheme
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from django.http import HttpRequest, HttpResponseRedirect
|
||||
from django.http import HttpRequest, HttpResponse
|
||||
|
||||
logger: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_database_size() -> int:
|
||||
|
|
@ -52,6 +58,9 @@ class IndexView(TemplateView):
|
|||
context: dict = super().get_context_data(**kwargs)
|
||||
context["feed_count"] = Feed.objects.count()
|
||||
context["database_size"] = get_database_size()
|
||||
|
||||
logger.info(f"Found {context['feed_count']} feeds in the database") # noqa: G004
|
||||
logger.info(f"Database size is {context['database_size']} MB") # noqa: G004
|
||||
return context
|
||||
|
||||
|
||||
|
|
@ -72,33 +81,90 @@ class FeedsView(ListView):
|
|||
return context
|
||||
|
||||
|
||||
def add_feeds(request: HttpRequest) -> HttpResponseRedirect:
|
||||
def add_feeds(request: HttpRequest) -> HttpResponse:
|
||||
"""Add feeds to the database.
|
||||
|
||||
Args:
|
||||
request: The request object.
|
||||
|
||||
Returns:
|
||||
A redirect to the index page.
|
||||
A redirect to the index page if there are errors, otherwise a redirect to the feeds page.
|
||||
"""
|
||||
if request.method == "POST":
|
||||
urls = request.POST.get("urls")
|
||||
urls: str | None = request.POST.get("urls")
|
||||
if not urls:
|
||||
messages.error(request, "No URLs provided")
|
||||
return redirect("feeds:index", permanent=False)
|
||||
return render(request, "index.html")
|
||||
|
||||
if urls == "Test":
|
||||
messages.error(request, "Hello, world!")
|
||||
return redirect("feeds:index", permanent=False)
|
||||
messages.error(request, "Test test hello")
|
||||
return render(request, "index.html")
|
||||
|
||||
for url in urls.splitlines():
|
||||
print(f"Adding {url} to the database...") # noqa: T201
|
||||
check_feeds(feed_urls=[url], request=request)
|
||||
|
||||
return redirect("feeds:feeds", permanent=False)
|
||||
return render(request, "index.html")
|
||||
|
||||
msg: str = f"You must use a POST request. You used a {request.method} request. You can find out how to use this endpoint here: <a href=''>http://127.0.0.1:8000/</a>. If you think this is a mistake, please contact the administrator." # noqa: E501
|
||||
messages.error(request, msg)
|
||||
return redirect("feeds:index", permanent=False)
|
||||
return render(request, "index.html")
|
||||
|
||||
|
||||
def check_feeds(feed_urls: list[str], request: HttpRequest) -> HttpResponse:
|
||||
"""Check feeds before adding them to the database.
|
||||
|
||||
Args:
|
||||
feed_urls: The feed URLs to check.
|
||||
request: The request object.
|
||||
|
||||
Returns:
|
||||
A redirect to the index page if there are errors, otherwise a redirect to the feeds page.
|
||||
"""
|
||||
for url in feed_urls:
|
||||
url_html: str = f"<a href='{url}'>{url}</a>"
|
||||
if Feed.objects.filter(url=url).exists():
|
||||
msg: str = f"{url_html} is already in the database."
|
||||
messages.error(request, msg)
|
||||
continue
|
||||
|
||||
# Only allow HTTP and HTTPS URLs
|
||||
if not validate_scheme(feed_url=url):
|
||||
msg = f"{url_html} is not a HTTP or HTTPS URL."
|
||||
messages.error(request, msg)
|
||||
continue
|
||||
|
||||
# Don't allow IP addresses
|
||||
if is_ip(feed_url=url):
|
||||
msg = f"{url_html} is an IP address. IP addresses are not allowed."
|
||||
messages.error(request, msg)
|
||||
continue
|
||||
|
||||
# Check if in blocklist
|
||||
domain: str = parse.urlparse(url).netloc
|
||||
if Blocklist.objects.filter(url=domain).exists():
|
||||
msg = f"{url_html} is in the blocklist."
|
||||
messages.error(request, msg)
|
||||
continue
|
||||
|
||||
# Check if local URL
|
||||
if is_local(feed_url=url):
|
||||
msg = f"{url_html} is a local URL."
|
||||
messages.error(request, msg)
|
||||
continue
|
||||
|
||||
# Create feed
|
||||
try:
|
||||
Feed.objects.create(url=url)
|
||||
msg = f"{url_html} was added to the database."
|
||||
messages.success(request, msg)
|
||||
except ValidationError:
|
||||
msg = f"{url_html} is not a valid URL."
|
||||
messages.error(request, msg)
|
||||
|
||||
# Return to feeds page if no errors
|
||||
# TODO(TheLovinator): Return to search page with our new feeds # noqa: TD003
|
||||
logger.info(f"Added {len(feed_urls)} feeds to the database") # noqa: G004
|
||||
return redirect("feeds:feeds")
|
||||
|
||||
|
||||
class APIView(TemplateView):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue