Improved crawler
This commit is contained in:
parent
13ad0b78da
commit
85d5d1bc36
@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/animenotifier/arn"
|
"github.com/animenotifier/arn"
|
||||||
@ -10,13 +11,17 @@ import (
|
|||||||
"github.com/aerogo/crawler"
|
"github.com/aerogo/crawler"
|
||||||
)
|
)
|
||||||
|
|
||||||
const userAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.166 Safari/537.36"
|
const (
|
||||||
|
// The maximum age of files we accept until we force a refresh.
|
||||||
|
maxAge = 30 * 24 * time.Hour
|
||||||
|
userAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.166 Safari/537.36"
|
||||||
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
defer arn.Node.Close()
|
defer arn.Node.Close()
|
||||||
|
|
||||||
malCrawler := crawler.New(userAgent, 1*time.Second, 20000)
|
// Filter anime with MAL ID
|
||||||
count := 0
|
animes := []*arn.Anime{}
|
||||||
|
|
||||||
for anime := range arn.StreamAnime() {
|
for anime := range arn.StreamAnime() {
|
||||||
malID := anime.GetMapping("myanimelist/anime")
|
malID := anime.GetMapping("myanimelist/anime")
|
||||||
@ -25,9 +30,40 @@ func main() {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
animes = append(animes, anime)
|
||||||
|
}
|
||||||
|
|
||||||
|
color.Yellow("Found %d anime", len(animes))
|
||||||
|
|
||||||
|
// Create crawler
|
||||||
|
malCrawler := crawler.New(
|
||||||
|
map[string]string{
|
||||||
|
"User-Agent": userAgent,
|
||||||
|
},
|
||||||
|
1500*time.Millisecond,
|
||||||
|
len(animes),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Sort so that we download the most important ones first
|
||||||
|
arn.SortAnimeByQuality(animes, "")
|
||||||
|
|
||||||
|
// Queue up URLs
|
||||||
|
count := 0
|
||||||
|
|
||||||
|
for _, anime := range animes {
|
||||||
|
malID := anime.GetMapping("myanimelist/anime")
|
||||||
|
url := "https://myanimelist.net/anime/" + malID
|
||||||
|
filePath := fmt.Sprintf("mal/anime-%s.html", malID)
|
||||||
|
fileInfo, err := os.Stat(filePath)
|
||||||
|
|
||||||
|
if err == nil && time.Since(fileInfo.ModTime()) <= maxAge {
|
||||||
|
// fmt.Println(color.YellowString(url), "skip")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
malCrawler.Queue(&crawler.Task{
|
malCrawler.Queue(&crawler.Task{
|
||||||
URL: "https://myanimelist.net/anime/" + malID,
|
URL: url,
|
||||||
Destination: fmt.Sprintf("mal/anime-%s.html", malID),
|
Destination: filePath,
|
||||||
})
|
})
|
||||||
|
|
||||||
count++
|
count++
|
||||||
|
Loading…
Reference in New Issue
Block a user