78 lines
1.6 KiB
Go
Raw Normal View History

2018-03-08 18:54:08 +00:00
package main
import (
2018-03-08 19:07:03 +00:00
"fmt"
2018-03-08 20:00:51 +00:00
"os"
2018-03-08 18:54:08 +00:00
"time"
2018-03-08 19:07:03 +00:00
"github.com/animenotifier/arn"
"github.com/fatih/color"
2018-03-08 18:54:08 +00:00
"github.com/aerogo/crawler"
)
2018-03-08 20:00:51 +00:00
const (
// The maximum age of files we accept until we force a refresh.
maxAge = 7 * 24 * time.Hour
2018-03-08 23:22:36 +00:00
delayBetweenRequests = 1000 * time.Millisecond
userAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.166 Safari/537.36"
2018-03-08 20:00:51 +00:00
)
2018-03-08 18:54:08 +00:00
func main() {
2018-03-08 20:00:51 +00:00
// Filter anime with MAL ID
animes := []*arn.Anime{}
2018-03-08 19:07:03 +00:00
for anime := range arn.StreamAnime() {
malID := anime.GetMapping("myanimelist/anime")
if malID == "" {
continue
}
2018-03-08 20:00:51 +00:00
animes = append(animes, anime)
}
color.Yellow("Found %d anime", len(animes))
2018-03-09 01:34:50 +00:00
// We don't need the database anymore
arn.Node.Close()
2018-03-08 20:00:51 +00:00
// Create crawler
malCrawler := crawler.New(
map[string]string{
2018-03-08 20:40:48 +00:00
"User-Agent": userAgent,
"Accept-Encoding": "gzip",
2018-03-08 20:00:51 +00:00
},
2018-03-08 23:22:36 +00:00
delayBetweenRequests,
2018-03-08 20:00:51 +00:00
len(animes),
)
// Sort so that we download the most important ones first
2018-03-14 00:02:41 +00:00
arn.SortAnimeByQuality(animes)
2018-03-08 20:00:51 +00:00
// Queue up URLs
count := 0
for _, anime := range animes {
malID := anime.GetMapping("myanimelist/anime")
url := "https://myanimelist.net/anime/" + malID
2018-03-08 23:07:48 +00:00
filePath := fmt.Sprintf("files/anime-%s.html", malID)
2018-03-08 20:00:51 +00:00
fileInfo, err := os.Stat(filePath)
if err == nil && time.Since(fileInfo.ModTime()) <= maxAge {
// fmt.Println(color.YellowString(url), "skip")
continue
}
2018-03-08 19:07:03 +00:00
malCrawler.Queue(&crawler.Task{
2018-03-08 20:00:51 +00:00
URL: url,
Destination: filePath,
2018-03-08 19:07:03 +00:00
})
2018-03-08 18:54:08 +00:00
2018-03-08 19:07:03 +00:00
count++
}
2018-03-08 18:54:08 +00:00
2018-03-08 19:07:03 +00:00
color.Yellow("Queued up %d links", count)
2018-03-08 18:54:08 +00:00
malCrawler.Wait()
}