2018-03-08 18:54:08 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2018-03-08 19:07:03 +00:00
|
|
|
"fmt"
|
2018-03-08 20:00:51 +00:00
|
|
|
"os"
|
2018-03-08 18:54:08 +00:00
|
|
|
"time"
|
|
|
|
|
2018-11-13 07:22:24 +00:00
|
|
|
"github.com/animenotifier/arn/osutils"
|
|
|
|
|
2019-04-23 05:45:17 +00:00
|
|
|
"github.com/akyoto/color"
|
2019-04-23 05:52:55 +00:00
|
|
|
"github.com/animenotifier/arn"
|
2018-03-08 19:07:03 +00:00
|
|
|
|
2018-03-08 18:54:08 +00:00
|
|
|
"github.com/aerogo/crawler"
|
|
|
|
)
|
|
|
|
|
2018-03-08 20:00:51 +00:00
|
|
|
const (
|
|
|
|
// The maximum age of files we accept until we force a refresh.
|
2018-10-30 20:24:12 +00:00
|
|
|
maxAge = 7 * 24 * time.Hour
|
2018-03-26 00:12:06 +00:00
|
|
|
delayBetweenRequests = 1100 * time.Millisecond
|
2018-10-29 23:59:35 +00:00
|
|
|
userAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.20 Safari/537.36"
|
|
|
|
animeDirectory = "anime"
|
2018-10-30 00:35:30 +00:00
|
|
|
characterDirectory = "character"
|
2018-03-08 20:00:51 +00:00
|
|
|
)
|
2018-03-08 18:54:08 +00:00
|
|
|
|
2018-04-12 16:57:28 +00:00
|
|
|
var headers = map[string]string{
|
|
|
|
"User-Agent": userAgent,
|
|
|
|
"Accept-Encoding": "gzip",
|
|
|
|
}
|
|
|
|
|
2018-03-08 18:54:08 +00:00
|
|
|
func main() {
|
2018-04-12 16:57:28 +00:00
|
|
|
defer color.Green("Finished.")
|
|
|
|
|
2018-10-31 15:04:47 +00:00
|
|
|
// Create directories in case they're missing
|
|
|
|
os.Mkdir(animeDirectory, 0777)
|
|
|
|
os.Mkdir(characterDirectory, 0777)
|
|
|
|
|
2018-04-12 16:57:28 +00:00
|
|
|
// Called with arguments?
|
|
|
|
if InvokeShellArgs() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2018-03-08 20:00:51 +00:00
|
|
|
// Filter anime with MAL ID
|
2018-10-30 00:35:30 +00:00
|
|
|
var animes []*arn.Anime
|
2018-03-08 19:07:03 +00:00
|
|
|
|
2018-10-30 00:35:30 +00:00
|
|
|
if objectType == "all" || objectType == "anime" {
|
|
|
|
animes = arn.FilterAnime(func(anime *arn.Anime) bool {
|
2018-11-13 07:22:24 +00:00
|
|
|
malID := anime.GetMapping("myanimelist/anime")
|
|
|
|
|
|
|
|
if malID == "" {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return !newOnly || !osutils.Exists(animeFilePath(malID))
|
2018-10-30 00:35:30 +00:00
|
|
|
})
|
2018-03-08 19:07:03 +00:00
|
|
|
|
2018-10-30 00:35:30 +00:00
|
|
|
color.Yellow("Found %d anime", len(animes))
|
2018-03-08 19:07:03 +00:00
|
|
|
|
2018-10-30 00:35:30 +00:00
|
|
|
// Sort so that we download the most important ones first
|
|
|
|
arn.SortAnimeByQuality(animes)
|
2018-03-08 20:00:51 +00:00
|
|
|
}
|
|
|
|
|
2018-10-30 00:35:30 +00:00
|
|
|
// Filter characters with MAL ID
|
|
|
|
var characters []*arn.Character
|
|
|
|
|
|
|
|
if objectType == "all" || objectType == "character" {
|
|
|
|
characters = arn.FilterCharacters(func(character *arn.Character) bool {
|
2018-11-13 07:22:24 +00:00
|
|
|
malID := character.GetMapping("myanimelist/character")
|
|
|
|
|
|
|
|
if malID == "" {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return !newOnly || !osutils.Exists(characterFilePath(malID))
|
2018-10-30 00:35:30 +00:00
|
|
|
})
|
|
|
|
|
|
|
|
color.Yellow("Found %d characters", len(characters))
|
|
|
|
|
|
|
|
// Sort so that we download the most important ones first
|
|
|
|
arn.SortCharactersByLikes(characters)
|
|
|
|
}
|
2018-03-08 20:00:51 +00:00
|
|
|
|
2018-03-09 01:34:50 +00:00
|
|
|
// We don't need the database anymore
|
|
|
|
arn.Node.Close()
|
|
|
|
|
2018-03-08 20:00:51 +00:00
|
|
|
// Create crawler
|
|
|
|
malCrawler := crawler.New(
|
2018-04-12 16:57:28 +00:00
|
|
|
headers,
|
2018-03-08 23:22:36 +00:00
|
|
|
delayBetweenRequests,
|
2018-10-30 00:35:30 +00:00
|
|
|
len(animes)+len(characters),
|
2018-03-08 20:00:51 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Queue up URLs
|
|
|
|
count := 0
|
|
|
|
|
|
|
|
for _, anime := range animes {
|
2018-10-30 00:35:30 +00:00
|
|
|
queueAnime(anime, malCrawler)
|
|
|
|
count++
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, character := range characters {
|
|
|
|
queueCharacter(character, malCrawler)
|
2018-03-08 19:07:03 +00:00
|
|
|
count++
|
|
|
|
}
|
2018-03-08 18:54:08 +00:00
|
|
|
|
2018-03-28 22:26:19 +00:00
|
|
|
// Log number of links
|
2018-03-08 19:07:03 +00:00
|
|
|
color.Yellow("Queued up %d links", count)
|
2018-03-28 22:26:19 +00:00
|
|
|
|
|
|
|
// Wait for completion
|
2018-03-08 18:54:08 +00:00
|
|
|
malCrawler.Wait()
|
2018-04-12 16:57:28 +00:00
|
|
|
}
|
|
|
|
|
2018-11-13 07:22:24 +00:00
|
|
|
func animeFilePath(malID string) string {
|
|
|
|
return fmt.Sprintf("%s/%s.html.gz", animeDirectory, malID)
|
|
|
|
}
|
|
|
|
|
|
|
|
func characterFilePath(malID string) string {
|
|
|
|
return fmt.Sprintf("%s/%s.html.gz", characterDirectory, malID)
|
|
|
|
}
|
|
|
|
|
2018-10-30 00:35:30 +00:00
|
|
|
func queueAnime(anime *arn.Anime, malCrawler *crawler.Crawler) {
|
2018-04-12 16:57:28 +00:00
|
|
|
malID := anime.GetMapping("myanimelist/anime")
|
|
|
|
url := "https://myanimelist.net/anime/" + malID
|
2018-11-13 07:22:24 +00:00
|
|
|
filePath := animeFilePath(malID)
|
2018-04-12 16:57:28 +00:00
|
|
|
fileInfo, err := os.Stat(filePath)
|
|
|
|
|
|
|
|
if err == nil && time.Since(fileInfo.ModTime()) <= maxAge {
|
|
|
|
// fmt.Println(color.YellowString(url), "skip")
|
|
|
|
return
|
|
|
|
}
|
2018-03-28 22:26:19 +00:00
|
|
|
|
2018-04-12 16:57:28 +00:00
|
|
|
malCrawler.Queue(&crawler.Task{
|
|
|
|
URL: url,
|
|
|
|
Destination: filePath,
|
2018-10-29 23:59:35 +00:00
|
|
|
Raw: true,
|
2018-04-12 16:57:28 +00:00
|
|
|
})
|
2018-03-08 18:54:08 +00:00
|
|
|
}
|
2018-10-30 00:35:30 +00:00
|
|
|
|
|
|
|
func queueCharacter(character *arn.Character, malCrawler *crawler.Crawler) {
|
|
|
|
malID := character.GetMapping("myanimelist/character")
|
|
|
|
url := "https://myanimelist.net/character/" + malID
|
2018-11-13 07:22:24 +00:00
|
|
|
filePath := characterFilePath(malID)
|
2018-10-30 00:35:30 +00:00
|
|
|
fileInfo, err := os.Stat(filePath)
|
|
|
|
|
|
|
|
if err == nil && time.Since(fileInfo.ModTime()) <= maxAge {
|
|
|
|
// fmt.Println(color.YellowString(url), "skip")
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
malCrawler.Queue(&crawler.Task{
|
|
|
|
URL: url,
|
|
|
|
Destination: filePath,
|
|
|
|
Raw: true,
|
|
|
|
})
|
|
|
|
}
|