diff --git a/AnitomySharp/AnitomySharp.csproj b/AnitomySharp/AnitomySharp.csproj
index 5f9aa8e..eb7f21b 100644
--- a/AnitomySharp/AnitomySharp.csproj
+++ b/AnitomySharp/AnitomySharp.csproj
@@ -4,23 +4,25 @@
net6.0
true
AnitomySharp.NET6
- tabratton;senritsu
+ 0.4.0
+ 0.4.0
+ tabratton;senritsu;chu-shen
AnitomySharp is a C# port of Anitomy by erengy, a library for parsing anime video filenames. All credit to erengy for the actual library and logic.
+ This fork of AnitomySharp is inspired by tabratton and senritsu, which adds more custom rules.
https://github.com/chu-shen/AnitomySharp.git
git
- LICENSE
+ Anitomy Anime
true
- 0.3.0
- 0.3.0
- 0.3.0
- false
+ LICENSE
+ README.md
AnitomySharp.xml
+ false
-
+
diff --git a/AnitomySharp/Keyword.cs b/AnitomySharp/Keyword.cs
index ecca119..a0adcae 100644
--- a/AnitomySharp/Keyword.cs
+++ b/AnitomySharp/Keyword.cs
@@ -24,13 +24,13 @@ namespace AnitomySharp
public static class KeywordManager
{
///
- /// 包含所有关键词(大写)的内部关键词元素词典
+ /// 包含所有关键词的内部关键词元素词典,比较器忽略大小写
///
- private static readonly Dictionary Keys = new Dictionary();
+ private static readonly Dictionary Keys = new Dictionary(StringComparer.OrdinalIgnoreCase);
///
- /// 文件扩展名,无值
+ /// 文件扩展名,无值,比较器忽略大小写
///
- private static readonly Dictionary Extensions = new Dictionary();
+ private static readonly Dictionary Extensions = new Dictionary(StringComparer.OrdinalIgnoreCase);
///
/// ~~一眼真~~
@@ -64,18 +64,19 @@ namespace AnitomySharp
"GEKIJOUBAN", "MOVIE",
"OAD", "OAV", "ONA", "OVA",
"TV",
- "番外編", "總集編","映像特典","特典","特典アニメ",
+ "番外編", "總集編","DRAMA",
+ "映像特典","特典","特典アニメ",
// 特典 Special 剩下的各种类型可以全部命名成 SP,对于较特殊意义的特典也可以自定义命名
- "SPECIAL", "SPECIALS", "SP",
+ "SPECIAL", "SPECIALS", "SP", "SPs",
// 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。
"IV",
// 音乐视频 Music Video
"MV"});
- // add "SP" to ElementAnimeType with optionsUnidentifiable
- // Add(Element.ElementCategory.ElementAnimeType,
- // optionsUnidentifiableUnsearchable,
- // new List {"SP"}); // e.g. "Yumeiro Patissiere SP Professional"
+ // add "SP" to ElementAnimeType with optionsUnidentifiable
+ // Add(Element.ElementCategory.ElementAnimeType,
+ // optionsUnidentifiableUnsearchable,
+ // new List { "SP" }); // e.g. "Yumeiro Patissiere SP Professional", but it is widely used to represent special
Add(Element.ElementCategory.ElementAnimeType,
optionsUnidentifiableInvalid,
@@ -84,7 +85,7 @@ namespace AnitomySharp
// 无字 OP/ED Non-Credit Opening/Ending
"ED", "ENDING", "NCED", "NCOP", "OP", "OPENING",
// 预告 Preview 预告下一话内容 注意编号表示其预告的是第几话的内容而不是跟在哪一话后面
- "PREVIEW",
+ "PREVIEW", "YOKOKU",
// 菜单 Menu BD/DVD 播放选择菜单
"MENU",
// 广告 Commercial Message 电视放送广告,时长一般在 7s/15s/30s/45s/... 左右
@@ -92,7 +93,7 @@ namespace AnitomySharp
// 语音信息
"MESSAGE",
// 宣传片/预告片 Promotion Video / Trailer 一般时长在 1~2min 命名参考原盘和 jsum
- "PV", "Teaser","TRAILER", "DRAMA",
+ "PV", "Teaser","TRAILER",
// 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。
"INTERVIEW",
"EVENT", "TOKUTEN", "LOGO"});
@@ -150,7 +151,7 @@ namespace AnitomySharp
Add(Element.ElementCategory.ElementOther,
optionsDefault,
- new List { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE" });
+ new List { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE","DISC" });
Add(Element.ElementCategory.ElementReleaseGroup,
optionsDefault,
@@ -281,6 +282,16 @@ namespace AnitomySharp
return false;
}
+ ///
+ /// 判断预处理元素列表中是否包含给定的字符串()
+ ///
+ /// 元素类别
+ /// 待判断的字符串
+ /// `true`表示包含
+ public static bool ContainsInPeekEntries(Element.ElementCategory category, string keyword)
+ {
+ return PeekEntries.Any(entry => entry.Item1 == category && entry.Item2.Contains(keyword, StringComparer.OrdinalIgnoreCase));
+ }
///
/// Finds a particular keyword. If found sets category and options to the found search result.
diff --git a/AnitomySharp/Parser.cs b/AnitomySharp/Parser.cs
index 5bd1b52..afb490c 100644
--- a/AnitomySharp/Parser.cs
+++ b/AnitomySharp/Parser.cs
@@ -179,6 +179,7 @@ namespace AnitomySharp
private void SearchForEpisodeNumber()
{
var tokens = new List();
+ var allTokens = new List();
for (var i = 0; i < Tokens.Count; i++)
{
var token = Tokens[i];
@@ -187,6 +188,7 @@ namespace AnitomySharp
ParserHelper.IndexOfFirstDigit(token.Content) != -1)
{
tokens.Add(i);
+ allTokens.Add(i);
}
}
@@ -228,6 +230,12 @@ namespace AnitomySharp
// "e.g. "[12]", "(2006)"
if (ParseNumber.SearchForIsolatedNumbers(tokens)) return;
+ // e.g. "OVA 3", "OtherToken[Hint05]", "[Web Preview 06]": maybe incorrect, so put the last
+ if (ParseNumber.SearchForSymbolWithEpisode(allTokens)) return;
+
+ // e.g. [13(341)], [13 (341)]
+ if (ParseNumber.SearchForEquivalentNumbersWithBracket(allTokens)) return;
+
// Consider using the last number as a last resort
ParseNumber.SearchForLastNumber(tokens);
}
@@ -235,7 +243,7 @@ namespace AnitomySharp
///
/// Search for anime title
///
- /// 搜索动画名
+ /// 搜索动画名
///
private void SearchForAnimeTitle()
{
@@ -283,6 +291,13 @@ namespace AnitomySharp
{
tokenBegin = tokenBeginWithNoReleaseGroup;
}
+ // 去除纯数字标题
+ // skip token with only number
+ if (Regex.Match(Tokens[tokenBegin].Content, ParserNumber.RegexMatchOnlyStart + @"^[0-9]+$" + ParserNumber.RegexMatchOnlyEnd).Success)
+ {
+ tokenBegin = tokenBeginWithNoReleaseGroup;
+ }
+
skippedPreviousGroup = true;
} while (Token.InListRange(tokenBegin, Tokens));
}
@@ -398,7 +413,7 @@ namespace AnitomySharp
{
var token = Tokens[i];
/** 跳过括号标记类型的标记 */
- if (token.Category == Token.TokenCategory.Bracket) continue;
+ if (token.Category != Token.TokenCategory.Unknown) continue;
var tokenContent = token.Content;
// e.g. "2016-17"
@@ -408,13 +423,21 @@ namespace AnitomySharp
{
tokenContent = tokenContent.Split(match.Groups[2].Value)[0];
}
- // add newtype e.g. "2021 OVA"
- if (token.Category != Token.TokenCategory.Unknown || !StringHelper.IsNumericString(tokenContent) ||
- !(ParseHelper.IsTokenContainAnimeType(i) ^ ParseHelper.IsTokenIsolated(i)))
+
+ if (!StringHelper.IsNumericString(tokenContent))
{
continue;
}
+ // e.g. "[2021 OVA]"
+ if(ParseHelper.IsNextTokenContainAnimeType(i)&&!ParseHelper.IsTokenIsolated(i)){}
+
+ // TODO may not be necessary
+ // if (!ParseHelper.IsTokenIsolated(i))
+ // {
+ // continue;
+ // }
+
var number = StringHelper.StringToInt(tokenContent);
// Anime year
@@ -422,7 +445,7 @@ namespace AnitomySharp
{
if (Empty(Element.ElementCategory.ElementAnimeYear))
{
- Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, token.Content));
+ Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, tokenContent));
token.Category = Token.TokenCategory.Identifier;
continue;
}
diff --git a/AnitomySharp/ParserHelper.cs b/AnitomySharp/ParserHelper.cs
index a75accb..fc9d2c7 100644
--- a/AnitomySharp/ParserHelper.cs
+++ b/AnitomySharp/ParserHelper.cs
@@ -235,7 +235,7 @@ namespace AnitomySharp
///
/// Returns whether or not a token at the current pos is isolated(surrounded by braces).
///
- /// 判断当前位置标记(token)是否孤立,即是否被括号包裹
+ /// 判断当前位置标记(token)是否孤立,是否被括号包裹
///
///
///
@@ -246,6 +246,20 @@ namespace AnitomySharp
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
return IsTokenCategory(nextToken, Token.TokenCategory.Bracket);
}
+ ///
+ /// Returns whether or not a token at the current pos is isolated(surrounded by braces, delimiter).
+ ///
+ /// 判断当前位置标记(token)是否孤立,前面是否为分隔符,后面是否为括号包裹
+ ///
+ ///
+ ///
+ public bool IsTokenIsolatedWithDelimiterAndBracket(int pos)
+ {
+ var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNone);
+ if (!IsTokenCategory(prevToken, Token.TokenCategory.Delimiter)) return false;
+ var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
+ return IsTokenCategory(nextToken, Token.TokenCategory.Bracket);
+ }
///
/// Returns whether or not a token at the current pos+1 is ElementAnimeType.
@@ -254,13 +268,37 @@ namespace AnitomySharp
///
///
///
- public bool IsTokenContainAnimeType(int pos)
+ public bool IsNextTokenContainAnimeType(int pos)
{
var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
if (!IsTokenCategory(prevToken, Token.TokenCategory.Bracket)) return false;
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[nextToken].Content);
}
+ ///
+ /// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType。如果是,则返回`true`
+ ///
+ ///
+ ///
+ public bool IsPrevTokenContainAnimeType(int pos)
+ {
+ var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
+ var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
+ if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false;
+ return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content);
+ }
+ ///
+ /// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType(在 PeekEntries 中)。如果是,则返回`true`
+ ///
+ ///
+ ///
+ public bool IsPrevTokenContainAnimeTypeInPeekEntries(int pos)
+ {
+ var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
+ var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
+ if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false;
+ return KeywordManager.ContainsInPeekEntries(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content);
+ }
///
/// Finds and sets the anime season keyword.
diff --git a/AnitomySharp/ParserNumber.cs b/AnitomySharp/ParserNumber.cs
index 4e10317..de3174f 100644
--- a/AnitomySharp/ParserNumber.cs
+++ b/AnitomySharp/ParserNumber.cs
@@ -412,7 +412,7 @@ namespace AnitomySharp
_parser.Tokens.Insert(foundIdx,
new Token(options.Identifiable ? Token.TokenCategory.Identifier : Token.TokenCategory.Unknown, token.Enclosed, prefix));
- return true;
+ return true;
}
@@ -698,6 +698,50 @@ namespace AnitomySharp
return false;
}
+ ///
+ /// 搜索同动画类型同时出现的集数
+ ///
+ ///
+ ///
+ public bool SearchForSymbolWithEpisode(List tokens)
+ {
+ // Match from back to front
+ for (int i = tokens.Count - 1; i >= 0; i--)
+ {
+ var it = tokens[i];
+
+ // e.g. OVA 3, [Web Preview 06]: Web Preview in PeekEntries
+ if ((_parser.ParseHelper.IsPrevTokenContainAnimeType(it) || _parser.ParseHelper.IsPrevTokenContainAnimeTypeInPeekEntries(it)) && !_parser.ParseHelper.IsTokenIsolated(it))
+ {
+ SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], false);
+ return true;
+ }
+ // e.g. OtherToken[Hint05]
+ // it>1: makesure this token is not first one
+ if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
+ {
+ var tokenContent = _parser.Tokens[it].Content;
+ var numberBegin = ParserHelper.IndexOfFirstDigit(tokenContent);
+ var prefix = StringHelper.SubstringWithCheck(tokenContent, 0, numberBegin);
+ var number = StringHelper.SubstringWithCheck(tokenContent, numberBegin, tokenContent.Length - numberBegin);
+ // token should be: alphaNumeric
+ if (prefix != "" && StringHelper.IsAlphaString(prefix) && StringHelper.IsNumericString(number))
+ {
+ SetEpisodeNumber(number, _parser.Tokens[it], true);
+ return true;
+ }
+ }
+ // e.g. OtherToken[Disc 01]
+ if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(it) && StringHelper.IsNumericString(_parser.Tokens[it].Content))
+ {
+ SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
+ return true;
+ }
+ }
+
+ return false;
+ }
+
///
/// Searches for equivalent number in a list of tokens. e.g. 08(114)
///
@@ -730,10 +774,7 @@ namespace AnitomySharp
continue;
}
- var list = new List
- {
- _parser.Tokens[it], _parser.Tokens[nextToken]
- };
+ var list = new List { _parser.Tokens[it], _parser.Tokens[nextToken] };
list.Sort((o1, o2) => StringHelper.StringToInt(o1.Content) - StringHelper.StringToInt(o2.Content));
SetEpisodeNumber(list[0].Content, list[0], false);
@@ -743,6 +784,50 @@ namespace AnitomySharp
return false;
}
+ ///
+ /// Searches for equivalent number in a list of tokens. e.g. 08(114)
+ ///
+ /// 匹配自带等效集数的数字,常见于分割放送,匹配括号包裹的数字
+ ///
+ /// the list of tokens
+ /// true if an equivalent number was found
+ public bool SearchForEquivalentNumbersWithBracket(List tokens)
+ {
+ foreach (var it in tokens)
+ {
+ // Find the first enclosed, non-delimiter token
+ var nextToken = Token.FindNextToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
+ if (!Token.InListRange(nextToken, _parser.Tokens) || !(_parser.Tokens[it].Content.Contains("(") || _parser.Tokens[nextToken].Content.Contains(")")))
+ {
+ continue;
+ }
+
+ // e.g. [13(341)]
+ if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
+ {
+ string[] episodes = _parser.Tokens[it].Content.Split(new string[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
+ if (StringHelper.IsNumericString(episodes[0]) && StringHelper.IsNumericString(episodes[1]))
+ {
+ SetEpisodeNumber(episodes[0], _parser.Tokens[it], false);
+ SetAlternativeEpisodeNumber(episodes[1], _parser.Tokens[it]);
+ return true;
+ }
+ }
+
+ // e.g. [13 (341)]
+ if (it > 1 && _parser.Tokens[nextToken].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(nextToken))
+ {
+ string episode = _parser.Tokens[nextToken].Content.Replace("(", "").Replace(")", "");
+ if (StringHelper.IsNumericString(_parser.Tokens[it].Content) && StringHelper.IsNumericString(episode))
+ {
+ SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
+ SetAlternativeEpisodeNumber(episode, _parser.Tokens[nextToken]);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
///
/// Searches for the last number token in a list of tokens
diff --git a/AnitomySharp/StringHelper.cs b/AnitomySharp/StringHelper.cs
index 2f50953..8f4413b 100644
--- a/AnitomySharp/StringHelper.cs
+++ b/AnitomySharp/StringHelper.cs
@@ -120,6 +120,17 @@ namespace AnitomySharp
{
return str.All(char.IsDigit);
}
+ ///
+ /// Returns whether or not the str is a alpha string.
+ ///
+ /// 判断字符串是否全字母
+ ///
+ ///
+ ///
+ public static bool IsAlphaString(string str)
+ {
+ return str.All(char.IsLetter);
+ }
///
/// Returns the int value of the str; 0 otherwise.