From b7541fbb03157d111d9056c439191ceee1d85a10 Mon Sep 17 00:00:00 2001 From: cxfksword <718792+cxfksword@users.noreply.github.com> Date: Sat, 6 Jan 2024 10:15:52 +0800 Subject: [PATCH] tweak(anitomy): update version v0.4.0 --- AnitomySharp/AnitomySharp.csproj | 16 +++--- AnitomySharp/Keyword.cs | 37 ++++++++----- AnitomySharp/Parser.cs | 35 ++++++++++-- AnitomySharp/ParserHelper.cs | 42 +++++++++++++- AnitomySharp/ParserNumber.cs | 95 ++++++++++++++++++++++++++++++-- AnitomySharp/StringHelper.cs | 11 ++++ 6 files changed, 203 insertions(+), 33 deletions(-) diff --git a/AnitomySharp/AnitomySharp.csproj b/AnitomySharp/AnitomySharp.csproj index 5f9aa8e..eb7f21b 100644 --- a/AnitomySharp/AnitomySharp.csproj +++ b/AnitomySharp/AnitomySharp.csproj @@ -4,23 +4,25 @@ net6.0 true AnitomySharp.NET6 - tabratton;senritsu + 0.4.0 + 0.4.0 + tabratton;senritsu;chu-shen AnitomySharp is a C# port of Anitomy by erengy, a library for parsing anime video filenames. All credit to erengy for the actual library and logic. + This fork of AnitomySharp is inspired by tabratton and senritsu, which adds more custom rules. https://github.com/chu-shen/AnitomySharp.git git - LICENSE + Anitomy Anime true - 0.3.0 - 0.3.0 - 0.3.0 - false + LICENSE + README.md AnitomySharp.xml + false - + diff --git a/AnitomySharp/Keyword.cs b/AnitomySharp/Keyword.cs index ecca119..a0adcae 100644 --- a/AnitomySharp/Keyword.cs +++ b/AnitomySharp/Keyword.cs @@ -24,13 +24,13 @@ namespace AnitomySharp public static class KeywordManager { /// - /// 包含所有关键词(大写)的内部关键词元素词典 + /// 包含所有关键词的内部关键词元素词典,比较器忽略大小写 /// - private static readonly Dictionary Keys = new Dictionary(); + private static readonly Dictionary Keys = new Dictionary(StringComparer.OrdinalIgnoreCase); /// - /// 文件扩展名,无值 + /// 文件扩展名,无值,比较器忽略大小写 /// - private static readonly Dictionary Extensions = new Dictionary(); + private static readonly Dictionary Extensions = new Dictionary(StringComparer.OrdinalIgnoreCase); /// /// ~~一眼真~~ @@ -64,18 +64,19 @@ namespace AnitomySharp "GEKIJOUBAN", "MOVIE", "OAD", "OAV", "ONA", "OVA", "TV", - "番外編", "總集編","映像特典","特典","特典アニメ", + "番外編", "總集編","DRAMA", + "映像特典","特典","特典アニメ", // 特典 Special 剩下的各种类型可以全部命名成 SP,对于较特殊意义的特典也可以自定义命名 - "SPECIAL", "SPECIALS", "SP", + "SPECIAL", "SPECIALS", "SP", "SPs", // 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。 "IV", // 音乐视频 Music Video "MV"}); - // add "SP" to ElementAnimeType with optionsUnidentifiable - // Add(Element.ElementCategory.ElementAnimeType, - // optionsUnidentifiableUnsearchable, - // new List {"SP"}); // e.g. "Yumeiro Patissiere SP Professional" + // add "SP" to ElementAnimeType with optionsUnidentifiable + // Add(Element.ElementCategory.ElementAnimeType, + // optionsUnidentifiableUnsearchable, + // new List { "SP" }); // e.g. "Yumeiro Patissiere SP Professional", but it is widely used to represent special Add(Element.ElementCategory.ElementAnimeType, optionsUnidentifiableInvalid, @@ -84,7 +85,7 @@ namespace AnitomySharp // 无字 OP/ED Non-Credit Opening/Ending "ED", "ENDING", "NCED", "NCOP", "OP", "OPENING", // 预告 Preview 预告下一话内容 注意编号表示其预告的是第几话的内容而不是跟在哪一话后面 - "PREVIEW", + "PREVIEW", "YOKOKU", // 菜单 Menu BD/DVD 播放选择菜单 "MENU", // 广告 Commercial Message 电视放送广告,时长一般在 7s/15s/30s/45s/... 左右 @@ -92,7 +93,7 @@ namespace AnitomySharp // 语音信息 "MESSAGE", // 宣传片/预告片 Promotion Video / Trailer 一般时长在 1~2min 命名参考原盘和 jsum - "PV", "Teaser","TRAILER", "DRAMA", + "PV", "Teaser","TRAILER", // 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。 "INTERVIEW", "EVENT", "TOKUTEN", "LOGO"}); @@ -150,7 +151,7 @@ namespace AnitomySharp Add(Element.ElementCategory.ElementOther, optionsDefault, - new List { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE" }); + new List { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE","DISC" }); Add(Element.ElementCategory.ElementReleaseGroup, optionsDefault, @@ -281,6 +282,16 @@ namespace AnitomySharp return false; } + /// + /// 判断预处理元素列表中是否包含给定的字符串() + /// + /// 元素类别 + /// 待判断的字符串 + /// `true`表示包含 + public static bool ContainsInPeekEntries(Element.ElementCategory category, string keyword) + { + return PeekEntries.Any(entry => entry.Item1 == category && entry.Item2.Contains(keyword, StringComparer.OrdinalIgnoreCase)); + } /// /// Finds a particular keyword. If found sets category and options to the found search result. diff --git a/AnitomySharp/Parser.cs b/AnitomySharp/Parser.cs index 5bd1b52..afb490c 100644 --- a/AnitomySharp/Parser.cs +++ b/AnitomySharp/Parser.cs @@ -179,6 +179,7 @@ namespace AnitomySharp private void SearchForEpisodeNumber() { var tokens = new List(); + var allTokens = new List(); for (var i = 0; i < Tokens.Count; i++) { var token = Tokens[i]; @@ -187,6 +188,7 @@ namespace AnitomySharp ParserHelper.IndexOfFirstDigit(token.Content) != -1) { tokens.Add(i); + allTokens.Add(i); } } @@ -228,6 +230,12 @@ namespace AnitomySharp // "e.g. "[12]", "(2006)" if (ParseNumber.SearchForIsolatedNumbers(tokens)) return; + // e.g. "OVA 3", "OtherToken[Hint05]", "[Web Preview 06]": maybe incorrect, so put the last + if (ParseNumber.SearchForSymbolWithEpisode(allTokens)) return; + + // e.g. [13(341)], [13 (341)] + if (ParseNumber.SearchForEquivalentNumbersWithBracket(allTokens)) return; + // Consider using the last number as a last resort ParseNumber.SearchForLastNumber(tokens); } @@ -235,7 +243,7 @@ namespace AnitomySharp /// /// Search for anime title /// - /// 搜索动画名 + /// 搜索动画名 /// private void SearchForAnimeTitle() { @@ -283,6 +291,13 @@ namespace AnitomySharp { tokenBegin = tokenBeginWithNoReleaseGroup; } + // 去除纯数字标题 + // skip token with only number + if (Regex.Match(Tokens[tokenBegin].Content, ParserNumber.RegexMatchOnlyStart + @"^[0-9]+$" + ParserNumber.RegexMatchOnlyEnd).Success) + { + tokenBegin = tokenBeginWithNoReleaseGroup; + } + skippedPreviousGroup = true; } while (Token.InListRange(tokenBegin, Tokens)); } @@ -398,7 +413,7 @@ namespace AnitomySharp { var token = Tokens[i]; /** 跳过括号标记类型的标记 */ - if (token.Category == Token.TokenCategory.Bracket) continue; + if (token.Category != Token.TokenCategory.Unknown) continue; var tokenContent = token.Content; // e.g. "2016-17" @@ -408,13 +423,21 @@ namespace AnitomySharp { tokenContent = tokenContent.Split(match.Groups[2].Value)[0]; } - // add newtype e.g. "2021 OVA" - if (token.Category != Token.TokenCategory.Unknown || !StringHelper.IsNumericString(tokenContent) || - !(ParseHelper.IsTokenContainAnimeType(i) ^ ParseHelper.IsTokenIsolated(i))) + + if (!StringHelper.IsNumericString(tokenContent)) { continue; } + // e.g. "[2021 OVA]" + if(ParseHelper.IsNextTokenContainAnimeType(i)&&!ParseHelper.IsTokenIsolated(i)){} + + // TODO may not be necessary + // if (!ParseHelper.IsTokenIsolated(i)) + // { + // continue; + // } + var number = StringHelper.StringToInt(tokenContent); // Anime year @@ -422,7 +445,7 @@ namespace AnitomySharp { if (Empty(Element.ElementCategory.ElementAnimeYear)) { - Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, token.Content)); + Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, tokenContent)); token.Category = Token.TokenCategory.Identifier; continue; } diff --git a/AnitomySharp/ParserHelper.cs b/AnitomySharp/ParserHelper.cs index a75accb..fc9d2c7 100644 --- a/AnitomySharp/ParserHelper.cs +++ b/AnitomySharp/ParserHelper.cs @@ -235,7 +235,7 @@ namespace AnitomySharp /// /// Returns whether or not a token at the current pos is isolated(surrounded by braces). /// - /// 判断当前位置标记(token)是否孤立,即是否被括号包裹 + /// 判断当前位置标记(token)是否孤立,是否被括号包裹 /// /// /// @@ -246,6 +246,20 @@ namespace AnitomySharp var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); return IsTokenCategory(nextToken, Token.TokenCategory.Bracket); } + /// + /// Returns whether or not a token at the current pos is isolated(surrounded by braces, delimiter). + /// + /// 判断当前位置标记(token)是否孤立,前面是否为分隔符,后面是否为括号包裹 + /// + /// + /// + public bool IsTokenIsolatedWithDelimiterAndBracket(int pos) + { + var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNone); + if (!IsTokenCategory(prevToken, Token.TokenCategory.Delimiter)) return false; + var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + return IsTokenCategory(nextToken, Token.TokenCategory.Bracket); + } /// /// Returns whether or not a token at the current pos+1 is ElementAnimeType. @@ -254,13 +268,37 @@ namespace AnitomySharp /// /// /// - public bool IsTokenContainAnimeType(int pos) + public bool IsNextTokenContainAnimeType(int pos) { var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); if (!IsTokenCategory(prevToken, Token.TokenCategory.Bracket)) return false; var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[nextToken].Content); } + /// + /// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType。如果是,则返回`true` + /// + /// + /// + public bool IsPrevTokenContainAnimeType(int pos) + { + var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false; + return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content); + } + /// + /// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType(在 PeekEntries 中)。如果是,则返回`true` + /// + /// + /// + public bool IsPrevTokenContainAnimeTypeInPeekEntries(int pos) + { + var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter); + if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false; + return KeywordManager.ContainsInPeekEntries(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content); + } /// /// Finds and sets the anime season keyword. diff --git a/AnitomySharp/ParserNumber.cs b/AnitomySharp/ParserNumber.cs index 4e10317..de3174f 100644 --- a/AnitomySharp/ParserNumber.cs +++ b/AnitomySharp/ParserNumber.cs @@ -412,7 +412,7 @@ namespace AnitomySharp _parser.Tokens.Insert(foundIdx, new Token(options.Identifiable ? Token.TokenCategory.Identifier : Token.TokenCategory.Unknown, token.Enclosed, prefix)); - return true; + return true; } @@ -698,6 +698,50 @@ namespace AnitomySharp return false; } + /// + /// 搜索同动画类型同时出现的集数 + /// + /// + /// + public bool SearchForSymbolWithEpisode(List tokens) + { + // Match from back to front + for (int i = tokens.Count - 1; i >= 0; i--) + { + var it = tokens[i]; + + // e.g. OVA 3, [Web Preview 06]: Web Preview in PeekEntries + if ((_parser.ParseHelper.IsPrevTokenContainAnimeType(it) || _parser.ParseHelper.IsPrevTokenContainAnimeTypeInPeekEntries(it)) && !_parser.ParseHelper.IsTokenIsolated(it)) + { + SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], false); + return true; + } + // e.g. OtherToken[Hint05] + // it>1: makesure this token is not first one + if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it)) + { + var tokenContent = _parser.Tokens[it].Content; + var numberBegin = ParserHelper.IndexOfFirstDigit(tokenContent); + var prefix = StringHelper.SubstringWithCheck(tokenContent, 0, numberBegin); + var number = StringHelper.SubstringWithCheck(tokenContent, numberBegin, tokenContent.Length - numberBegin); + // token should be: alphaNumeric + if (prefix != "" && StringHelper.IsAlphaString(prefix) && StringHelper.IsNumericString(number)) + { + SetEpisodeNumber(number, _parser.Tokens[it], true); + return true; + } + } + // e.g. OtherToken[Disc 01] + if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(it) && StringHelper.IsNumericString(_parser.Tokens[it].Content)) + { + SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true); + return true; + } + } + + return false; + } + /// /// Searches for equivalent number in a list of tokens. e.g. 08(114) /// @@ -730,10 +774,7 @@ namespace AnitomySharp continue; } - var list = new List - { - _parser.Tokens[it], _parser.Tokens[nextToken] - }; + var list = new List { _parser.Tokens[it], _parser.Tokens[nextToken] }; list.Sort((o1, o2) => StringHelper.StringToInt(o1.Content) - StringHelper.StringToInt(o2.Content)); SetEpisodeNumber(list[0].Content, list[0], false); @@ -743,6 +784,50 @@ namespace AnitomySharp return false; } + /// + /// Searches for equivalent number in a list of tokens. e.g. 08(114) + /// + /// 匹配自带等效集数的数字,常见于分割放送,匹配括号包裹的数字 + /// + /// the list of tokens + /// true if an equivalent number was found + public bool SearchForEquivalentNumbersWithBracket(List tokens) + { + foreach (var it in tokens) + { + // Find the first enclosed, non-delimiter token + var nextToken = Token.FindNextToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter); + if (!Token.InListRange(nextToken, _parser.Tokens) || !(_parser.Tokens[it].Content.Contains("(") || _parser.Tokens[nextToken].Content.Contains(")"))) + { + continue; + } + + // e.g. [13(341)] + if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it)) + { + string[] episodes = _parser.Tokens[it].Content.Split(new string[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries); + if (StringHelper.IsNumericString(episodes[0]) && StringHelper.IsNumericString(episodes[1])) + { + SetEpisodeNumber(episodes[0], _parser.Tokens[it], false); + SetAlternativeEpisodeNumber(episodes[1], _parser.Tokens[it]); + return true; + } + } + + // e.g. [13 (341)] + if (it > 1 && _parser.Tokens[nextToken].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(nextToken)) + { + string episode = _parser.Tokens[nextToken].Content.Replace("(", "").Replace(")", ""); + if (StringHelper.IsNumericString(_parser.Tokens[it].Content) && StringHelper.IsNumericString(episode)) + { + SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true); + SetAlternativeEpisodeNumber(episode, _parser.Tokens[nextToken]); + return true; + } + } + } + return false; + } /// /// Searches for the last number token in a list of tokens diff --git a/AnitomySharp/StringHelper.cs b/AnitomySharp/StringHelper.cs index 2f50953..8f4413b 100644 --- a/AnitomySharp/StringHelper.cs +++ b/AnitomySharp/StringHelper.cs @@ -120,6 +120,17 @@ namespace AnitomySharp { return str.All(char.IsDigit); } + /// + /// Returns whether or not the str is a alpha string. + /// + /// 判断字符串是否全字母 + /// + /// + /// + public static bool IsAlphaString(string str) + { + return str.All(char.IsLetter); + } /// /// Returns the int value of the str; 0 otherwise.