From b70b139c426113c06e649122d7b26cf852423be9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joakim=20Hells=C3=A9n?= Date: Thu, 8 Feb 2024 20:09:49 +0100 Subject: [PATCH] Don't filter URLs --- main.go | 3 +- models.go | 12 ------- validate.go | 90 ----------------------------------------------------- 3 files changed, 1 insertion(+), 104 deletions(-) diff --git a/main.go b/main.go index c3f7923..8969ca7 100644 --- a/main.go +++ b/main.go @@ -25,7 +25,7 @@ func init() { log.Println("Connected to database") // Migrate the schema - err = db.AutoMigrate(&BadURLsMeta{}, &BadURLs{}, &Feed{}, &Item{}, &Person{}, &Image{}, &Enclosure{}, &DublinCoreExtension{}, &ITunesFeedExtension{}, &ITunesItemExtension{}, &ITunesCategory{}, &ITunesOwner{}, &Extension{}) + err = db.AutoMigrate(&Feed{}, &Item{}, &Person{}, &Image{}, &Enclosure{}, &DublinCoreExtension{}, &ITunesFeedExtension{}, &ITunesItemExtension{}, &ITunesCategory{}, &ITunesOwner{}, &Extension{}) if err != nil { panic("Failed to migrate the database") } @@ -36,7 +36,6 @@ func main() { // Scrape the bad URLs in the background // TODO: Run this in a goroutine - scrapeBadURLs() // Create a new router r := chi.NewRouter() diff --git a/models.go b/models.go index e8dc6dc..dc1db16 100644 --- a/models.go +++ b/models.go @@ -178,15 +178,3 @@ type ParseResult struct { func (d *TemplateData) GetDatabaseSizeAndFeedCount() { d.DatabaseSize = GetDBSize() } - -type BadURLs struct { - gorm.Model - URL string `json:"url"` - Active bool `json:"active"` -} - -type BadURLsMeta struct { - gorm.Model - URL string `json:"url"` - LastScraped time.Time `json:"lastScraped"` -} diff --git a/validate.go b/validate.go index 663c6cb..722709b 100644 --- a/validate.go +++ b/validate.go @@ -1,96 +1,13 @@ package main import ( - "bufio" "errors" - "log" "net" "net/http" "net/url" "strings" - "time" ) -func scrapeBadURLs() { - // TODO: We should only scrape the bad URLs if the file has been updated - // TODO: Use brotli compression https://gitlab.com/malware-filter/urlhaus-filter#compressed-version - filterListURLs := []string{ - "https://malware-filter.gitlab.io/malware-filter/phishing-filter-dnscrypt-blocked-names.txt", - "https://malware-filter.gitlab.io/malware-filter/urlhaus-filter-dnscrypt-blocked-names-online.txt", - } - - // Scrape the bad URLs - badURLs := []BadURLs{} - for _, url := range filterListURLs { - // Check if we have scraped the bad URLs in the last 24 hours - var meta BadURLsMeta - db.Where("url = ?", url).First(&meta) - if time.Since(meta.LastScraped).Hours() < 24 { - log.Printf("%s was last scraped %.1f hours ago\n", url, time.Since(meta.LastScraped).Hours()) - continue - } - - // Create the meta if it doesn't exist - if meta.ID == 0 { - meta = BadURLsMeta{URL: url} - db.Create(&meta) - } - - // Update the last scraped time - db.Model(&meta).Update("last_scraped", time.Now()) - - // Get the filter list - resp, err := http.Get(url) - if err != nil { - log.Println("Failed to get filter list:", err) - continue - } - defer resp.Body.Close() - - scanner := bufio.NewScanner(resp.Body) - for scanner.Scan() { - line := scanner.Text() - if strings.HasPrefix(line, "#") { - log.Println("Comment:", line) - continue - } - - // Skip the URL if it already exists in the database - var count int64 - db.Model(&BadURLs{}).Where("url = ?", line).Count(&count) - if count > 0 { - log.Println("URL already exists:", line) - continue - } - - // Add the bad URL to the list - badURLs = append(badURLs, BadURLs{URL: line, Active: true}) - } - - if err := scanner.Err(); err != nil { - log.Println("Failed to scan filter list:", err) - } - } - - if len(badURLs) == 0 { - log.Println("No new URLs found in", len(filterListURLs), "filter lists") - return - } - - // Log how many bad URLs we found - log.Println("Found", len(badURLs), "bad URLs") - - // Mark all the bad URLs as inactive if we have any in the database - var count int64 - db.Model(&BadURLs{}).Count(&count) - if count > 0 { - db.Model(&BadURLs{}).Update("active", false) - } - - // Save the bad URLs to the database - db.Create(&badURLs) -} - // Run some simple validation on the URL func validateURL(feed_url string) error { // Check if URL starts with http or https @@ -174,13 +91,6 @@ func validateURL(feed_url string) error { } } - // Check if the domain is in BadURLs - var count int64 - db.Model(&BadURLs{}).Where("url = ?", domain).Count(&count) - if count > 0 { - return errors.New("URL is in the bad URLs list") - } - // Don't allow URLs that end with .local if strings.HasSuffix(domain, ".local") { return errors.New("URLs ending with .local are not allowed")