From 3ccf470cf0a6eb3fda99a6f1fed4cc9d4e6293c0 Mon Sep 17 00:00:00 2001 From: Eduard Urbach Date: Thu, 1 Nov 2018 02:21:32 +0900 Subject: [PATCH] Started working on improved character parsing --- jobs/mal-parse/mal-parse.go | 17 ++++++++++++--- jobs/mal-sync/character.go | 41 +++++++++++++++++++++++++++++++++++++ jobs/mal-sync/mal-sync.go | 35 +++++++++++++++---------------- 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/jobs/mal-parse/mal-parse.go b/jobs/mal-parse/mal-parse.go index 2a8e8540..aa6df8bd 100644 --- a/jobs/mal-parse/mal-parse.go +++ b/jobs/mal-parse/mal-parse.go @@ -3,7 +3,6 @@ package main import ( "compress/gzip" "errors" - "fmt" "os" "path" "path/filepath" @@ -37,9 +36,11 @@ func main() { } func readFiles(root string, onFile func(string) error) { + count := 0 + filepath.Walk(root, func(name string, info os.FileInfo, err error) error { if err != nil { - fmt.Println(err) + color.Red(err.Error()) return err } @@ -51,8 +52,18 @@ func readFiles(root string, onFile func(string) error) { return nil } - return onFile(name) + count++ + err = onFile(name) + + if err != nil { + color.Red(err.Error()) + } + + // Always continue traversing the directory + return nil }) + + color.Cyan("%d files found", count) } func readAnimeFile(name string) error { diff --git a/jobs/mal-sync/character.go b/jobs/mal-sync/character.go index 916153e0..a14fbe79 100644 --- a/jobs/mal-sync/character.go +++ b/jobs/mal-sync/character.go @@ -21,6 +21,47 @@ func parseCharacterDescription(input string) (output string, attributes []*arn.C for _, paragraph := range paragraphs { // Is paragraph full of attributes? if strings.Contains(paragraph, "\n") { + lines := strings.Split(paragraph, "\n") + var lastAttribute *arn.CharacterAttribute + + for _, line := range lines { + line = strings.Replace(line, " (\n)", "", -1) + + // Remove all kinds of starting and ending parantheses. + if strings.HasPrefix(line, "(") { + line = strings.TrimPrefix(line, "(") + line = strings.TrimSuffix(line, ")") + } + + line = strings.TrimSuffix(line, " (") + line = strings.TrimPrefix(line, ")") + + parts := strings.Split(line, ":") + + if len(parts) < 2 { + // Add to previous attribute + if lastAttribute != nil { + lastAttribute.Value += ", " + line + } + + continue + } + + name := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + + if value == "" || value == `"` { + continue + } + + lastAttribute = &arn.CharacterAttribute{ + Name: name, + Value: value, + } + + attributes = append(attributes, lastAttribute) + } + continue } diff --git a/jobs/mal-sync/mal-sync.go b/jobs/mal-sync/mal-sync.go index 4049d3b6..509e0bb1 100644 --- a/jobs/mal-sync/mal-sync.go +++ b/jobs/mal-sync/mal-sync.go @@ -27,31 +27,27 @@ func main() { } // Sync the most important ones first - allAnime := arn.AllAnime() + allAnime := arn.FilterAnime(func(anime *arn.Anime) bool { + return anime.GetMapping("myanimelist/anime") != "" + }) + arn.SortAnimeByQuality(allAnime) + color.Yellow("%d anime found", len(allAnime)) for _, anime := range allAnime { - malID := anime.GetMapping("myanimelist/anime") - - if malID == "" { - continue - } - - syncAnime(anime, malID) + syncAnime(anime, anime.GetMapping("myanimelist/anime")) } // Sync the most important ones first - allCharacters := arn.AllCharacters() + allCharacters := arn.FilterCharacters(func(character *arn.Character) bool { + return character.GetMapping("myanimelist/character") != "" + }) + arn.SortCharactersByLikes(allCharacters) + color.Yellow("%d characters found", len(allCharacters)) for _, character := range allCharacters { - malID := character.GetMapping("myanimelist/character") - - if malID == "" { - continue - } - - syncCharacter(character, malID) + syncCharacter(character, character.GetMapping("myanimelist/character")) } } @@ -59,7 +55,6 @@ func syncAnime(anime *arn.Anime, malID string) { obj, err := malDB.Get("Anime", malID) if err != nil { - fmt.Println(err) return } @@ -91,7 +86,11 @@ func syncCharacter(character *arn.Character, malID string) { obj, err := malDB.Get("Character", malID) if err != nil { - fmt.Println(err) + return + } + + // Skip manually created characters + if character.CreatedBy != "" { return }