tweak(anitomy): update version v0.4.0

This commit is contained in:
cxfksword 2024-01-06 10:15:52 +08:00
parent f222258080
commit b7541fbb03
6 changed files with 203 additions and 33 deletions

View File

@ -4,23 +4,25 @@
<TargetFramework>net6.0</TargetFramework>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
<PackageId>AnitomySharp.NET6</PackageId>
<Authors>tabratton;senritsu</Authors>
<PackageVersion>0.4.0</PackageVersion>
<Version>0.4.0</Version>
<Authors>tabratton;senritsu;chu-shen</Authors>
<Description>AnitomySharp is a C# port of Anitomy by erengy, a library for parsing anime video filenames. All credit to erengy for the actual library and logic.
This fork of AnitomySharp is inspired by tabratton and senritsu, which adds more custom rules.
</Description>
<RepositoryUrl>https://github.com/chu-shen/AnitomySharp.git</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageLicenseFile>LICENSE</PackageLicenseFile>
<PackageTags>Anitomy Anime</PackageTags>
<PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
<AssemblyVersion>0.3.0</AssemblyVersion>
<FileVersion>0.3.0</FileVersion>
<Version>0.3.0</Version>
<GenerateDocumentationFile>false</GenerateDocumentationFile>
<PackageLicenseFile>LICENSE</PackageLicenseFile>
<PackageReadmeFile>README.md</PackageReadmeFile>
<DocumentationFile>AnitomySharp.xml</DocumentationFile>
<GenerateDocumentationFile>false</GenerateDocumentationFile>
</PropertyGroup>
<ItemGroup>
<None Include="..\LICENSE" Pack="true" Visible="false" PackagePath="" />
<PackageReference Include="Microsoft.DocAsCode.App" Version="2.60.0" />
<None Include="..\README.md" Pack="true" PackagePath=""/>
</ItemGroup>
</Project>

View File

@ -24,13 +24,13 @@ namespace AnitomySharp
public static class KeywordManager
{
/// <summary>
/// 包含所有关键词(大写)的内部关键词元素词典
/// 包含所有关键词的内部关键词元素词典,比较器忽略大小写
/// </summary>
private static readonly Dictionary<string, Keyword> Keys = new Dictionary<string, Keyword>();
private static readonly Dictionary<string, Keyword> Keys = new Dictionary<string, Keyword>(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// 文件扩展名,无值
/// 文件扩展名,无值,比较器忽略大小写
/// </summary>
private static readonly Dictionary<string, Keyword> Extensions = new Dictionary<string, Keyword>();
private static readonly Dictionary<string, Keyword> Extensions = new Dictionary<string, Keyword>(StringComparer.OrdinalIgnoreCase);
/// <summary>
/// ~~一眼真~~
@ -64,18 +64,19 @@ namespace AnitomySharp
"GEKIJOUBAN", "MOVIE",
"OAD", "OAV", "ONA", "OVA",
"TV",
"番外編", "總集編","映像特典","特典","特典アニメ",
"番外編", "總集編","DRAMA",
"映像特典","特典","特典アニメ",
// 特典 Special 剩下的各种类型可以全部命名成 SP对于较特殊意义的特典也可以自定义命名
"SPECIAL", "SPECIALS", "SP",
"SPECIAL", "SPECIALS", "SP", "SPs",
// 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。
"IV",
// 音乐视频 Music Video
"MV"});
// add "SP" to ElementAnimeType with optionsUnidentifiable
// Add(Element.ElementCategory.ElementAnimeType,
// optionsUnidentifiableUnsearchable,
// new List<string> {"SP"}); // e.g. "Yumeiro Patissiere SP Professional"
// add "SP" to ElementAnimeType with optionsUnidentifiable
// Add(Element.ElementCategory.ElementAnimeType,
// optionsUnidentifiableUnsearchable,
// new List<string> { "SP" }); // e.g. "Yumeiro Patissiere SP Professional", but it is widely used to represent special
Add(Element.ElementCategory.ElementAnimeType,
optionsUnidentifiableInvalid,
@ -84,7 +85,7 @@ namespace AnitomySharp
// 无字 OP/ED Non-Credit Opening/Ending
"ED", "ENDING", "NCED", "NCOP", "OP", "OPENING",
// 预告 Preview 预告下一话内容 注意编号表示其预告的是第几话的内容而不是跟在哪一话后面
"PREVIEW",
"PREVIEW", "YOKOKU",
// 菜单 Menu BD/DVD 播放选择菜单
"MENU",
// 广告 Commercial Message 电视放送广告,时长一般在 7s/15s/30s/45s/... 左右
@ -92,7 +93,7 @@ namespace AnitomySharp
// 语音信息
"MESSAGE",
// 宣传片/预告片 Promotion Video / Trailer 一般时长在 1~2min 命名参考原盘和 jsum
"PV", "Teaser","TRAILER", "DRAMA",
"PV", "Teaser","TRAILER",
// 真人特典 Interview/Talk/Stage... 目前我们对于节目、采访、舞台活动、制作等三次元画面的长视频,一概怼成 IV。
"INTERVIEW",
"EVENT", "TOKUTEN", "LOGO"});
@ -150,7 +151,7 @@ namespace AnitomySharp
Add(Element.ElementCategory.ElementOther,
optionsDefault,
new List<string> { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE" });
new List<string> { "REMASTER", "REMASTERED", "UNCUT", "TS", "VFR", "WIDESCREEN", "WS", "SPURSENGINE","DISC" });
Add(Element.ElementCategory.ElementReleaseGroup,
optionsDefault,
@ -281,6 +282,16 @@ namespace AnitomySharp
return false;
}
/// <summary>
/// 判断预处理元素列表中是否包含给定的字符串(<paramref name="keyword"/>)
/// </summary>
/// <param name="category">元素类别</param>
/// <param name="keyword">待判断的字符串</param>
/// <returns>`true`表示包含</returns>
public static bool ContainsInPeekEntries(Element.ElementCategory category, string keyword)
{
return PeekEntries.Any(entry => entry.Item1 == category && entry.Item2.Contains(keyword, StringComparer.OrdinalIgnoreCase));
}
/// <summary>
/// Finds a particular <c>keyword</c>. If found sets <c>category</c> and <c>options</c> to the found search result.

View File

@ -179,6 +179,7 @@ namespace AnitomySharp
private void SearchForEpisodeNumber()
{
var tokens = new List<int>();
var allTokens = new List<int>();
for (var i = 0; i < Tokens.Count; i++)
{
var token = Tokens[i];
@ -187,6 +188,7 @@ namespace AnitomySharp
ParserHelper.IndexOfFirstDigit(token.Content) != -1)
{
tokens.Add(i);
allTokens.Add(i);
}
}
@ -228,6 +230,12 @@ namespace AnitomySharp
// "e.g. "[12]", "(2006)"
if (ParseNumber.SearchForIsolatedNumbers(tokens)) return;
// e.g. "OVA 3", "OtherToken[Hint05]", "[Web Preview 06]": maybe incorrect, so put the last
if (ParseNumber.SearchForSymbolWithEpisode(allTokens)) return;
// e.g. [13(341)], [13 (341)]
if (ParseNumber.SearchForEquivalentNumbersWithBracket(allTokens)) return;
// Consider using the last number as a last resort
ParseNumber.SearchForLastNumber(tokens);
}
@ -235,7 +243,7 @@ namespace AnitomySharp
/// <summary>
/// Search for anime title
///
/// 搜索动画名
/// 搜索动画名
/// </summary>
private void SearchForAnimeTitle()
{
@ -283,6 +291,13 @@ namespace AnitomySharp
{
tokenBegin = tokenBeginWithNoReleaseGroup;
}
// 去除纯数字标题
// skip token with only number
if (Regex.Match(Tokens[tokenBegin].Content, ParserNumber.RegexMatchOnlyStart + @"^[0-9]+$" + ParserNumber.RegexMatchOnlyEnd).Success)
{
tokenBegin = tokenBeginWithNoReleaseGroup;
}
skippedPreviousGroup = true;
} while (Token.InListRange(tokenBegin, Tokens));
}
@ -398,7 +413,7 @@ namespace AnitomySharp
{
var token = Tokens[i];
/** 跳过括号标记类型的标记 */
if (token.Category == Token.TokenCategory.Bracket) continue;
if (token.Category != Token.TokenCategory.Unknown) continue;
var tokenContent = token.Content;
// e.g. "2016-17"
@ -408,13 +423,21 @@ namespace AnitomySharp
{
tokenContent = tokenContent.Split(match.Groups[2].Value)[0];
}
// add newtype e.g. "2021 OVA"
if (token.Category != Token.TokenCategory.Unknown || !StringHelper.IsNumericString(tokenContent) ||
!(ParseHelper.IsTokenContainAnimeType(i) ^ ParseHelper.IsTokenIsolated(i)))
if (!StringHelper.IsNumericString(tokenContent))
{
continue;
}
// e.g. "[2021 OVA]"
if(ParseHelper.IsNextTokenContainAnimeType(i)&&!ParseHelper.IsTokenIsolated(i)){}
// TODO may not be necessary
// if (!ParseHelper.IsTokenIsolated(i))
// {
// continue;
// }
var number = StringHelper.StringToInt(tokenContent);
// Anime year
@ -422,7 +445,7 @@ namespace AnitomySharp
{
if (Empty(Element.ElementCategory.ElementAnimeYear))
{
Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, token.Content));
Elements.Add(new Element(Element.ElementCategory.ElementAnimeYear, tokenContent));
token.Category = Token.TokenCategory.Identifier;
continue;
}

View File

@ -235,7 +235,7 @@ namespace AnitomySharp
/// <summary>
/// Returns whether or not a token at the current <c>pos</c> is isolated(surrounded by braces).
///
/// 判断当前位置标记(token)是否孤立,是否被括号包裹
/// 判断当前位置标记(token)是否孤立,是否被括号包裹
/// </summary>
/// <param name="pos"></param>
/// <returns></returns>
@ -246,6 +246,20 @@ namespace AnitomySharp
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
return IsTokenCategory(nextToken, Token.TokenCategory.Bracket);
}
/// <summary>
/// Returns whether or not a token at the current <c>pos</c> is isolated(surrounded by braces, delimiter).
///
/// 判断当前位置标记(token)是否孤立,前面是否为分隔符,后面是否为括号包裹
/// </summary>
/// <param name="pos"></param>
/// <returns></returns>
public bool IsTokenIsolatedWithDelimiterAndBracket(int pos)
{
var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNone);
if (!IsTokenCategory(prevToken, Token.TokenCategory.Delimiter)) return false;
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
return IsTokenCategory(nextToken, Token.TokenCategory.Bracket);
}
/// <summary>
/// Returns whether or not a token at the current <c>pos+1</c> is ElementAnimeType.
@ -254,13 +268,37 @@ namespace AnitomySharp
/// </summary>
/// <param name="pos"></param>
/// <returns></returns>
public bool IsTokenContainAnimeType(int pos)
public bool IsNextTokenContainAnimeType(int pos)
{
var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
if (!IsTokenCategory(prevToken, Token.TokenCategory.Bracket)) return false;
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[nextToken].Content);
}
/// <summary>
/// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType。如果是则返回`true`
/// </summary>
/// <param name="pos"></param>
/// <returns></returns>
public bool IsPrevTokenContainAnimeType(int pos)
{
var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false;
return KeywordManager.Contains(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content);
}
/// <summary>
/// 判断当前标记(token)的上一个标记的类型是否为ElementAnimeType在 PeekEntries 中)。如果是,则返回`true`
/// </summary>
/// <param name="pos"></param>
/// <returns></returns>
public bool IsPrevTokenContainAnimeTypeInPeekEntries(int pos)
{
var prevToken = Token.FindPrevToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
var nextToken = Token.FindNextToken(_parser.Tokens, pos, Token.TokenFlag.FlagNotDelimiter);
if (!IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) return false;
return KeywordManager.ContainsInPeekEntries(Element.ElementCategory.ElementAnimeType, _parser.Tokens[prevToken].Content);
}
/// <summary>
/// Finds and sets the anime season keyword.

View File

@ -412,7 +412,7 @@ namespace AnitomySharp
_parser.Tokens.Insert(foundIdx,
new Token(options.Identifiable ? Token.TokenCategory.Identifier : Token.TokenCategory.Unknown, token.Enclosed, prefix));
return true;
return true;
}
@ -698,6 +698,50 @@ namespace AnitomySharp
return false;
}
/// <summary>
/// 搜索同动画类型同时出现的集数
/// </summary>
/// <param name="tokens"></param>
/// <returns></returns>
public bool SearchForSymbolWithEpisode(List<int> tokens)
{
// Match from back to front
for (int i = tokens.Count - 1; i >= 0; i--)
{
var it = tokens[i];
// e.g. OVA 3, [Web Preview 06]: Web Preview in PeekEntries
if ((_parser.ParseHelper.IsPrevTokenContainAnimeType(it) || _parser.ParseHelper.IsPrevTokenContainAnimeTypeInPeekEntries(it)) && !_parser.ParseHelper.IsTokenIsolated(it))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], false);
return true;
}
// e.g. OtherToken[Hint05]
// it>1: makesure this token is not first one
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
{
var tokenContent = _parser.Tokens[it].Content;
var numberBegin = ParserHelper.IndexOfFirstDigit(tokenContent);
var prefix = StringHelper.SubstringWithCheck(tokenContent, 0, numberBegin);
var number = StringHelper.SubstringWithCheck(tokenContent, numberBegin, tokenContent.Length - numberBegin);
// token should be: alphaNumeric
if (prefix != "" && StringHelper.IsAlphaString(prefix) && StringHelper.IsNumericString(number))
{
SetEpisodeNumber(number, _parser.Tokens[it], true);
return true;
}
}
// e.g. OtherToken[Disc 01]
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(it) && StringHelper.IsNumericString(_parser.Tokens[it].Content))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
return true;
}
}
return false;
}
/// <summary>
/// Searches for equivalent number in a list of <c>tokens</c>. e.g. 08(114)
///
@ -730,10 +774,7 @@ namespace AnitomySharp
continue;
}
var list = new List<Token>
{
_parser.Tokens[it], _parser.Tokens[nextToken]
};
var list = new List<Token> { _parser.Tokens[it], _parser.Tokens[nextToken] };
list.Sort((o1, o2) => StringHelper.StringToInt(o1.Content) - StringHelper.StringToInt(o2.Content));
SetEpisodeNumber(list[0].Content, list[0], false);
@ -743,6 +784,50 @@ namespace AnitomySharp
return false;
}
/// <summary>
/// Searches for equivalent number in a list of <c>tokens</c>. e.g. 08(114)
///
/// 匹配自带等效集数的数字,常见于分割放送,匹配括号包裹的数字
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if an equivalent number was found</returns>
public bool SearchForEquivalentNumbersWithBracket(List<int> tokens)
{
foreach (var it in tokens)
{
// Find the first enclosed, non-delimiter token
var nextToken = Token.FindNextToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
if (!Token.InListRange(nextToken, _parser.Tokens) || !(_parser.Tokens[it].Content.Contains("(") || _parser.Tokens[nextToken].Content.Contains(")")))
{
continue;
}
// e.g. [13(341)]
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
{
string[] episodes = _parser.Tokens[it].Content.Split(new string[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
if (StringHelper.IsNumericString(episodes[0]) && StringHelper.IsNumericString(episodes[1]))
{
SetEpisodeNumber(episodes[0], _parser.Tokens[it], false);
SetAlternativeEpisodeNumber(episodes[1], _parser.Tokens[it]);
return true;
}
}
// e.g. [13 (341)]
if (it > 1 && _parser.Tokens[nextToken].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(nextToken))
{
string episode = _parser.Tokens[nextToken].Content.Replace("(", "").Replace(")", "");
if (StringHelper.IsNumericString(_parser.Tokens[it].Content) && StringHelper.IsNumericString(episode))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
SetAlternativeEpisodeNumber(episode, _parser.Tokens[nextToken]);
return true;
}
}
}
return false;
}
/// <summary>
/// Searches for the last number token in a list of <c>tokens</c>

View File

@ -120,6 +120,17 @@ namespace AnitomySharp
{
return str.All(char.IsDigit);
}
/// <summary>
/// Returns whether or not the <c>str</c> is a alpha string.
///
/// 判断字符串是否全字母
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static bool IsAlphaString(string str)
{
return str.All(char.IsLetter);
}
/// <summary>
/// Returns the int value of the <c>str</c>; 0 otherwise.