Don't filter URLs

2024-02-08 20:09:49 +01:00 · 2024-02-08 20:09:49 +01:00 · b70b139c42
commit b70b139c42
parent c8ddde9c12
3 changed files with 1 additions and 104 deletions
--- a/validate.go
+++ b/validate.go
@ -1,96 +1,13 @@
 package main

 import (
-	"bufio"
 	"errors"
-	"log"
 	"net"
 	"net/http"
 	"net/url"
 	"strings"
-	"time"
 )

-func scrapeBadURLs() {
-	// TODO: We should only scrape the bad URLs if the file has been updated
-	// TODO: Use brotli compression https://gitlab.com/malware-filter/urlhaus-filter#compressed-version
-	filterListURLs := []string{
-		"https://malware-filter.gitlab.io/malware-filter/phishing-filter-dnscrypt-blocked-names.txt",
-		"https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt",
-	}
-
-	// Scrape the bad URLs
-	badURLs := []BadURLs{}
-	for _, url := range filterListURLs {
-		// Check if we have scraped the bad URLs in the last 24 hours
-		var meta BadURLsMeta
-		db.Where("url = ?", url).First(&meta)
-		if time.Since(meta.LastScraped).Hours() < 24 {
-			log.Printf("%s was last scraped %.1f hours ago\n", url, time.Since(meta.LastScraped).Hours())
-			continue
-		}
-
-		// Create the meta if it doesn't exist
-		if meta.ID == 0 {
-			meta = BadURLsMeta{URL: url}
-			db.Create(&meta)
-		}
-
-		// Update the last scraped time
-		db.Model(&meta).Update("last_scraped", time.Now())
-
-		// Get the filter list
-		resp, err := http.Get(url)
-		if err != nil {
-			log.Println("Failed to get filter list:", err)
-			continue
-		}
-		defer resp.Body.Close()
-
-		scanner := bufio.NewScanner(resp.Body)
-		for scanner.Scan() {
-			line := scanner.Text()
-			if strings.HasPrefix(line, "#") {
-				log.Println("Comment:", line)
-				continue
-			}
-
-			// Skip the URL if it already exists in the database
-			var count int64
-			db.Model(&BadURLs{}).Where("url = ?", line).Count(&count)
-			if count > 0 {
-				log.Println("URL already exists:", line)
-				continue
-			}
-
-			// Add the bad URL to the list
-			badURLs = append(badURLs, BadURLs{URL: line, Active: true})
-		}
-
-		if err := scanner.Err(); err != nil {
-			log.Println("Failed to scan filter list:", err)
-		}
-	}
-
-	if len(badURLs) == 0 {
-		log.Println("No new URLs found in", len(filterListURLs), "filter lists")
-		return
-	}
-
-	// Log how many bad URLs we found
-	log.Println("Found", len(badURLs), "bad URLs")
-
-	// Mark all the bad URLs as inactive if we have any in the database
-	var count int64
-	db.Model(&BadURLs{}).Count(&count)
-	if count > 0 {
-		db.Model(&BadURLs{}).Update("active", false)
-	}
-
-	// Save the bad URLs to the database
-	db.Create(&badURLs)
-}
-
 // Run some simple validation on the URL
 func validateURL(feed_url string) error {
 	// Check if URL starts with http or https
@ -174,13 +91,6 @@ func validateURL(feed_url string) error {
 		}
 	}

-	// Check if the domain is in BadURLs
-	var count int64
-	db.Model(&BadURLs{}).Where("url = ?", domain).Count(&count)
-	if count > 0 {
-		return errors.New("URL is in the bad URLs list")
-	}
-
 	// Don't allow URLs that end with .local
 	if strings.HasSuffix(domain, ".local") {
 		return errors.New("URLs ending with .local are not allowed")