jellyfin-plugin-metashark/AnitomySharp/ParserNumber.cs

900 lines
37 KiB
C#
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2014-2017, Eren Okka
* Copyright (c) 2016-2017, Paul Miller
* Copyright (c) 2017-2018, Tyler Bratton
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
namespace AnitomySharp
{
/// <summary>
/// A utility class to assist in number parsing.
/// </summary>
public class ParserNumber
{
/// <summary>
/// 动画最小计算年份
/// </summary>
public const int AnimeYearMin = 1900;
/// <summary>
/// 动画最大计算年份
/// </summary>
public const int AnimeYearMax = 2100;
/// <summary>
///
/// </summary>
private const int EpisodeNumberMax = 9999;
/// <summary>
/// 最大卷数
/// </summary>
private const int VolumeNumberMax = 99;
/// <summary>
/// 正则开头
/// </summary>
public const string RegexMatchOnlyStart = @"\A(?:";
/// <summary>
/// 正则结尾
/// </summary>
public const string RegexMatchOnlyEnd = @")\z";
/// <summary>
///
/// </summary>
private readonly Parser _parser;
/// <summary>
///
/// </summary>
/// <param name="parser"></param>
public ParserNumber(Parser parser)
{
_parser = parser;
}
/// <summary>
/// Returns whether or not the <c>number</c> is a volume number
///
/// 返验证卷数字符串是否有效
/// </summary>
private static bool IsValidVolumeNumber(string number)
{
return StringHelper.StringToInt(number) <= VolumeNumberMax;
}
/// <summary>
/// Returns whether or not the <c>number</c> is a valid episode number.
///
/// 验证集数字符串是否有效
/// </summary>
private static bool IsValidEpisodeNumber(string number)
{
// Eliminate non numeric portion of number, then parse as double.
var temp = "";
for (var i = 0; i < number.Length && char.IsDigit(number[i]); i++)
{
temp += number[i];
}
return !string.IsNullOrEmpty(temp) && double.Parse(temp) <= EpisodeNumberMax;
}
/// <summary>
/// Sets the alternative episode number.
/// </summary>
private bool SetAlternativeEpisodeNumber(string number, Token token)
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementEpisodeNumberAlt, number));
token.Category = Token.TokenCategory.Identifier;
return true;
}
/// <summary>
/// Sets the volume number.
///
/// 添加卷数元素
/// </summary>
/// <param name="number">the number</param>
/// <param name="token">the token which contains the volume number</param>
/// <param name="validate">true if we should check if it's a valid number, false to disable verification</param>
/// <returns>true if the volume number was set</returns>
public bool SetVolumeNumber(string number, Token token, bool validate)
{
if (validate && !IsValidVolumeNumber(number)) return false;
_parser.Elements.Add(new Element(Element.ElementCategory.ElementVolumeNumber, number));
token.Category = Token.TokenCategory.Identifier;
return true;
}
/// <summary>
/// Sets the anime episode number.
///
/// 添加集数元素
/// </summary>
/// <param name="number">the episode number</param>
/// <param name="token">the token which contains the volume number</param>
/// <param name="validate">true if we should check if it's a valid episode number; false to disable validation</param>
/// <returns>true if the episode number was set</returns>
public bool SetEpisodeNumber(string number, Token token, bool validate)
{
if (validate && !IsValidEpisodeNumber(number)) return false;
token.Category = Token.TokenCategory.Identifier;
var category = Element.ElementCategory.ElementEpisodeNumber;
/** Handle equivalent numbers */
if (_parser.IsEpisodeKeywordsFound)
{
foreach (var element in _parser.Elements)
{
if (element.Category != Element.ElementCategory.ElementEpisodeNumber) continue;
/** The larger number gets to be the alternative one */
var comparison = StringHelper.StringToInt(number) - StringHelper.StringToInt(element.Value);
if (comparison > 0)
{
category = Element.ElementCategory.ElementEpisodeNumberAlt;
}
else if (comparison < 0)
{
element.Category = Element.ElementCategory.ElementEpisodeNumberAlt;
}
else
{
return false; /** No need to add the same number twice */
}
break;
}
}
_parser.Elements.Add(new Element(category, number));
return true;
}
/// <summary>
/// Checks if a number follows the specified <c>token</c>
///
/// 确认此标记中是否包含给定元素类型的关键词,如果包含且其能满足匹配模式,则添加此元素
/// </summary>
/// <param name="category">the category to set if a number follows the <c>token</c></param>
/// <param name="token">the token</param>
/// <returns>true if a number follows the token; false otherwise</returns>
private bool NumberComesAfterPrefix(Element.ElementCategory category, Token token)
{
var numberBegin = ParserHelper.IndexOfFirstDigit(token.Content);
var prefix = StringHelper.SubstringWithCheck(token.Content, 0, numberBegin).ToUpperInvariant();
if (!KeywordManager.Contains(category, prefix)) return false;
var number = StringHelper.SubstringWithCheck(token.Content, numberBegin, token.Content.Length - numberBegin);
switch (category)
{
case Element.ElementCategory.ElementEpisodePrefix:
if (!MatchEpisodePatterns(number, token))
{
SetEpisodeNumber(number, token, false);
}
return true;
case Element.ElementCategory.ElementVolumePrefix:
if (!MatchVolumePatterns(number, token))
{
SetVolumeNumber(number, token, false);
}
return true;
default:
return false;
}
}
/// <summary>
/// Checks whether the number precedes the word "of"
/// </summary>
/// <param name="token">the token</param>
/// <param name="currentTokenIdx">the index of the token</param>
/// <returns>true if the token precedes the word "of"</returns>
private bool NumberComesBeforeAnotherNumber(Token token, int currentTokenIdx)
{
var separatorToken = Token.FindNextToken(_parser.Tokens, currentTokenIdx, Token.TokenFlag.FlagNotDelimiter);
if (!Token.InListRange(separatorToken, _parser.Tokens)) return false;
var separators = new List<Tuple<string, bool>>
{
Tuple.Create("&", true),
Tuple.Create("of", false)
};
foreach (var separator in separators)
{
if (_parser.Tokens[separatorToken].Content != separator.Item1) continue;
var otherToken = Token.FindNextToken(_parser.Tokens, separatorToken, Token.TokenFlag.FlagNotDelimiter);
if (!Token.InListRange(otherToken, _parser.Tokens)
|| !StringHelper.IsNumericString(_parser.Tokens[otherToken].Content)) continue;
SetEpisodeNumber(token.Content, token, false);
if (separator.Item2)
{
SetEpisodeNumber(_parser.Tokens[otherToken].Content, _parser.Tokens[otherToken], false);
}
_parser.Tokens[separatorToken].Category = Token.TokenCategory.Identifier;
_parser.Tokens[otherToken].Category = Token.TokenCategory.Identifier;
return true;
}
return false;
}
// EPISODE MATCHERS
/// <summary>
/// Attempts to find an episode/season inside a <c>word</c>
///
/// 在传入的字符串中共尝试匹配季/集
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the word was matched to an episode/season number</returns>
public bool MatchEpisodePatterns(string word, Token token)
{
if (StringHelper.IsNumericString(word)) return false;
word = word.Trim(" -".ToCharArray());
// 根据前后是否为数字进行分流处理
var numericFront = char.IsDigit(word[0]);
var numericBack = char.IsDigit(word[word.Length - 1]);
if (numericFront && numericBack)
{
// e.g. "01v2"
if (MatchSingleEpisodePattern(word, token))
{
return true;
}
// e.g. "01-02", "03-05v2"
if (MatchMultiEpisodePattern(word, token))
{
return true;
}
// e.g. "07.5"
if (MatchFractionalEpisodePattern(word, token))
{
return true;
}
}
if (numericBack)
{
// e.g. "2x01", "S01E03", "S01-02xE001-150"
if (MatchSeasonAndEpisodePattern(word, token))
{
return true;
}
// e.g. "#01", "#02-03v2"
if (MatchNumberSignPattern(word, token))
{
return true;
}
}
// e.g. "ED1", "OP4a", "OVA2"
if (!numericFront && MatchTypeAndEpisodePattern(word, token))
{
return true;
}
// e.g. "4a", "111C"
if (numericFront && !numericBack && MatchPartialEpisodePattern(word, token))
{
return true;
}
// e.g. "01-24Fin"
if (word.IndexOf("fin", StringComparison.OrdinalIgnoreCase) >= 0)
{
if (MatchMultiEpisodePattern(word, token))
{
return true;
}
}
// U+8A71 is used as counter for stories, episodes of TV series, etc.
return MatchJapaneseCounterPattern(word, token);
}
/// <summary>
/// Match a single episode pattern. e.g. "01v2".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchSingleEpisodePattern(string word, Token token)
{
const string regexPattern = RegexMatchOnlyStart + @"(\d{1,4})[vV](\d)" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
SetEpisodeNumber(match.Groups[1].Value, token, false);
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[2].Value));
return true;
}
/// <summary>
/// Match a multi episode pattern. e.g. "01-02", "03-05v2", "01-24Fin".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchMultiEpisodePattern(string word, Token token)
{
const string regexPattern = RegexMatchOnlyStart + @"(\d{1,4})(?:[vV](\d))?[-~&+](\d{1,4})(?:[vV](\d))?(FIN|Fin|fin)?" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
var lowerBound = match.Groups[1].Value;
var upperBound = match.Groups[3].Value;
/** Avoid matching expressions such as "009-1" or "5-2" */
if (StringHelper.StringToInt(lowerBound) >= StringHelper.StringToInt(upperBound)) return false;
if (!SetEpisodeNumber(lowerBound, token, true)) return false;
SetEpisodeNumber(upperBound, token, true);
if (!string.IsNullOrEmpty(match.Groups[2].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[2].Value));
}
if (!string.IsNullOrEmpty(match.Groups[4].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[4].Value));
}
if (!string.IsNullOrEmpty(match.Groups[5].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseInformation, match.Groups[5].Value));
}
return true;
}
/// <summary>
/// Match season and episode patterns. e.g. "2x01", "S01E03", "S01-02xE001-150", "S01E06v2".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchSeasonAndEpisodePattern(string word, Token token)
{
const string regexPattern = RegexMatchOnlyStart + @"S?(\d{1,2})(?:-S?(\d{1,2}))?(?:x|[ ._-x]?EP?)(\d{1,4})(?:-E?P?(\d{1,4}))?(?:[vV](\d{1,2}))?" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
_parser.Elements.Add(new Element(Element.ElementCategory.ElementAnimeSeason, match.Groups[1].Value));
if (!string.IsNullOrEmpty(match.Groups[2].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementAnimeSeason, match.Groups[2].Value));
}
SetEpisodeNumber(match.Groups[3].Value, token, false);
if (!string.IsNullOrEmpty(match.Groups[4].Value))
{
SetEpisodeNumber(match.Groups[4].Value, token, false);
}
return true;
}
/// <summary>
/// Match type and episode. e.g. "ED1", "OP4a", "OVA2".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchTypeAndEpisodePattern(string word, Token token)
{
var numberBegin = ParserHelper.IndexOfFirstDigit(word);
var prefix = StringHelper.SubstringWithCheck(word, 0, numberBegin);
var category = Element.ElementCategory.ElementAnimeType;
var options = new KeywordOptions();
if (!KeywordManager.FindAndSet(KeywordManager.Normalize(prefix), ref category, ref options)) return false;
_parser.Elements.Add(new Element(Element.ElementCategory.ElementAnimeType, prefix));
var number = word.Substring(numberBegin);
if (!MatchEpisodePatterns(number, token) && !SetEpisodeNumber(number, token, true)) return false;
var foundIdx = _parser.Tokens.IndexOf(token);
if (foundIdx == -1) return true;
token.Content = number;
_parser.Tokens.Insert(foundIdx,
new Token(options.Identifiable ? Token.TokenCategory.Identifier : Token.TokenCategory.Unknown, token.Enclosed, prefix));
return true;
}
/// <summary>
/// Match fractional episodes. e.g. "07.5"
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchFractionalEpisodePattern(string word, Token token)
{
if (string.IsNullOrEmpty(word))
{
word = "";
}
const string regexPattern = RegexMatchOnlyStart + @"\d+\.5" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
return match.Success && SetEpisodeNumber(word, token, true);
}
/// <summary>
/// Match partial episodes. e.g. "4a", "111C".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchPartialEpisodePattern(string word, Token token)
{
if (string.IsNullOrEmpty(word)) return false;
var foundIdx = Enumerable.Range(0, word.Length)
.DefaultIfEmpty(word.Length)
.FirstOrDefault(value => !char.IsDigit(word[value]));
var suffixLength = word.Length - foundIdx;
bool IsValidSuffix(int c) => c >= 'A' && c <= 'C' || c >= 'a' && c <= 'c';
return suffixLength == 1 && IsValidSuffix(word[foundIdx]) && SetEpisodeNumber(word, token, true);
}
/// <summary>
/// Match episodes with number signs. e.g. "#01", "#02-03v2"
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchNumberSignPattern(string word, Token token)
{
if (string.IsNullOrEmpty(word) || word[0] != '#') word = "";
const string regexPattern = RegexMatchOnlyStart + @"#(\d{1,4})(?:[-~&+](\d{1,4}))?(?:[vV](\d))?" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
if (!SetEpisodeNumber(match.Groups[1].Value, token, true)) return false;
if (!string.IsNullOrEmpty(match.Groups[2].Value))
{
SetEpisodeNumber(match.Groups[2].Value, token, false);
}
if (!string.IsNullOrEmpty(match.Groups[3].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[3].Value));
}
return true;
}
/// <summary>
/// Match Japanese patterns. e.g. U+8A71 is used as counter for stories, episodes of TV series, etc.
///
/// 匹配日文中常见顺序词
///
/// 符合这种匹配模式的,一般在集数后都紧跟本集标题 #TODO
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
public bool MatchJapaneseCounterPattern(string word, Token token)
{
if (string.IsNullOrEmpty(word)) return false;
// 1st|2nd|3rd| #TODO
string regexPattern = @"(|Ⅱ|Ⅲ)";
var match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = ParserHelper.GetNumberFromOrdinal(match.Groups[1].Value);
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
regexPattern = @"([上中下前後])([巻卷編编])";
match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = ParserHelper.GetNumberFromOrdinal(match.Groups[1].Value);
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
// 全角数字:\uFF10-\uFF19
regexPattern = @"([第全]?)([0-9一二三四五六七八九十壱弐参\uFF10-\uFF19]+)([回集話话幕夜発縛])";
match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = match.Groups[2].Value;
if (!StringHelper.IsNumericString(episodeNumber))
{
episodeNumber = ParserHelper.GetNumberFromOrdinal(episodeNumber);
}
episodeNumber = ParserHelper.GetNumberFromFullWidth(episodeNumber);
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
regexPattern = @"([第全]?)([0-9一二三四五六七八九十壱弐参\uFF10-\uFF19]+)([期章巻卷])";
match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = match.Groups[2].Value;
if (!StringHelper.IsNumericString(episodeNumber))
{
episodeNumber = ParserHelper.GetNumberFromOrdinal(episodeNumber);
}
episodeNumber = ParserHelper.GetNumberFromFullWidth(episodeNumber);
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
regexPattern = @"(EPISODE|ACT|scene|ep|screen|voice|case|menu|rail|round|game|page|collection|cage|office|doll|Princess)([ \.\-_])([0-9]+)";
match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = match.Groups[3].Value;
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
regexPattern = @"(vol|volume)([ \.\-_])([0-9]+)";
match = Regex.Match(word, RegexMatchOnlyStart + regexPattern + RegexMatchOnlyEnd, RegexOptions.IgnoreCase);
if (match.Success)
{
var episodeNumber = match.Groups[3].Value;
SetEpisodeNumber(episodeNumber, token, false);
return true;
}
else
{
return false;
}
}
// VOLUME MATCHES
/// <summary>
/// Attempts to find an episode/season inside a <c>word</c>
///
/// 在传入的字符串中共尝试匹配季/集
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the word was matched to an episode/season number</returns>
public bool MatchVolumePatterns(string word, Token token)
{
// All patterns contain at least one non-numeric character
if (StringHelper.IsNumericString(word)) return false;
word = word.Trim(" -".ToCharArray());
var numericFront = char.IsDigit(word[0]);
var numericBack = char.IsDigit(word[word.Length - 1]);
if (numericFront && numericBack)
{
// e.g. "01v2" e.g. "01-02", "03-05v2"
return MatchSingleVolumePattern(word, token) || MatchMultiVolumePattern(word, token);
}
return false;
}
/// <summary>
/// Match single volume. e.g. "01v2"
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchSingleVolumePattern(string word, Token token)
{
if (string.IsNullOrEmpty(word)) word = "";
const string regexPattern = RegexMatchOnlyStart + @"(\d{1,2})[vV](\d)" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
SetVolumeNumber(match.Groups[1].Value, token, false);
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[2].Value));
return true;
}
/// <summary>
/// Match multi-volume. e.g. "01-02", "03-05v2".
/// </summary>
/// <param name="word">the word</param>
/// <param name="token">the token</param>
/// <returns>true if the token matched</returns>
private bool MatchMultiVolumePattern(string word, Token token)
{
if (string.IsNullOrEmpty(word)) word = "";
const string regexPattern = RegexMatchOnlyStart + @"(\d{1,2})[-~&+](\d{1,2})(?:[vV](\d))?" + RegexMatchOnlyEnd;
var match = Regex.Match(word, regexPattern);
if (!match.Success) return false;
var lowerBound = match.Groups[1].Value;
var upperBound = match.Groups[2].Value;
if (StringHelper.StringToInt(lowerBound) >= StringHelper.StringToInt(upperBound)) return false;
if (!SetVolumeNumber(lowerBound, token, true)) return false;
SetVolumeNumber(upperBound, token, false);
if (string.IsNullOrEmpty(match.Groups[3].Value))
{
_parser.Elements.Add(new Element(Element.ElementCategory.ElementReleaseVersion, match.Groups[3].Value));
}
return true;
}
// SEARCH
/// <summary>
/// Searches for isolated numbers in a list of <c>tokens</c>.
///
/// 搜索孤立数字
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if an isolated number was found</returns>
public bool SearchForIsolatedNumbers(IEnumerable<int> tokens)
{
return tokens
.Where(it => _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
.Any(it => SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true));
}
/// <summary>
/// Searches for separated numbers in a list of <c>tokens</c>.
///
/// 搜索带分隔符的数字
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true fi a separated number was found</returns>
public bool SearchForSeparatedNumbers(List<int> tokens)
{
foreach (var it in tokens)
{
var previousToken = Token.FindPrevToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
// See if the number has a preceding "-" separator
if (!_parser.ParseHelper.IsTokenCategory(previousToken, Token.TokenCategory.Unknown)
|| !ParserHelper.IsDashCharacter(_parser.Tokens[previousToken].Content[0])) continue;
if (!SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true)) continue;
_parser.Tokens[previousToken].Category = Token.TokenCategory.Identifier;
return true;
}
return false;
}
/// <summary>
/// Searches for episode patterns in a list of <c>tokens</c>.
///
/// 在标记列表中匹配集数模式
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if an episode number was found</returns>
public bool SearchForEpisodePatterns(List<int> tokens)
{
foreach (var it in tokens)
{
var numericFront = _parser.Tokens[it].Content.Length > 0 && char.IsDigit(_parser.Tokens[it].Content[0]);
if (!numericFront)
{
// e.g. "EP.1", "Vol.1"
if (NumberComesAfterPrefix(Element.ElementCategory.ElementEpisodePrefix, _parser.Tokens[it]))
{
return true;
}
if (NumberComesAfterPrefix(Element.ElementCategory.ElementVolumePrefix, _parser.Tokens[it]))
{
continue;
}
}
else
{
// e.g. "8 & 10", "01 of 24"
if (NumberComesBeforeAnotherNumber(_parser.Tokens[it], it))
{
return true;
}
}
// Look for other patterns
if (MatchEpisodePatterns(_parser.Tokens[it].Content, _parser.Tokens[it]))
{
return true;
}
}
return false;
}
/// <summary>
/// 搜索同动画类型同时出现的集数
/// </summary>
/// <param name="tokens"></param>
/// <returns></returns>
public bool SearchForSymbolWithEpisode(List<int> tokens)
{
// Match from back to front
for (int i = tokens.Count - 1; i >= 0; i--)
{
var it = tokens[i];
// e.g. OVA 3, [Web Preview 06]: Web Preview in PeekEntries
if ((_parser.ParseHelper.IsPrevTokenContainAnimeType(it) || _parser.ParseHelper.IsPrevTokenContainAnimeTypeInPeekEntries(it)) && !_parser.ParseHelper.IsTokenIsolated(it))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], false);
return true;
}
// e.g. OtherToken[Hint05]
// it>1: makesure this token is not first one
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
{
var tokenContent = _parser.Tokens[it].Content;
var numberBegin = ParserHelper.IndexOfFirstDigit(tokenContent);
var prefix = StringHelper.SubstringWithCheck(tokenContent, 0, numberBegin);
var number = StringHelper.SubstringWithCheck(tokenContent, numberBegin, tokenContent.Length - numberBegin);
// token should be: alphaNumeric
if (prefix != "" && StringHelper.IsAlphaString(prefix) && StringHelper.IsNumericString(number))
{
SetEpisodeNumber(number, _parser.Tokens[it], true);
return true;
}
}
// e.g. OtherToken[Disc 01]
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(it) && StringHelper.IsNumericString(_parser.Tokens[it].Content))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
return true;
}
}
return false;
}
/// <summary>
/// Searches for equivalent number in a list of <c>tokens</c>. e.g. 08(114)
///
/// 匹配自带等效集数的数字,常见于分割放送
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if an equivalent number was found</returns>
public bool SearchForEquivalentNumbers(List<int> tokens)
{
foreach (var it in tokens)
{
// Find number must be isolated.
if (_parser.ParseHelper.IsTokenIsolated(it) || !IsValidEpisodeNumber(_parser.Tokens[it].Content))
{
continue;
}
// Find the first enclosed, non-delimiter token
var nextToken = Token.FindNextToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
if (!_parser.ParseHelper.IsTokenCategory(nextToken, Token.TokenCategory.Bracket)) continue;
nextToken = Token.FindNextToken(_parser.Tokens, nextToken, Token.TokenFlag.FlagEnclosed,
Token.TokenFlag.FlagNotDelimiter);
if (!_parser.ParseHelper.IsTokenCategory(nextToken, Token.TokenCategory.Unknown)) continue;
// Check if it's an isolated number
if (!_parser.ParseHelper.IsTokenIsolated(nextToken)
|| !StringHelper.IsNumericString(_parser.Tokens[nextToken].Content)
|| !IsValidEpisodeNumber(_parser.Tokens[nextToken].Content))
{
continue;
}
var list = new List<Token> { _parser.Tokens[it], _parser.Tokens[nextToken] };
list.Sort((o1, o2) => StringHelper.StringToInt(o1.Content) - StringHelper.StringToInt(o2.Content));
SetEpisodeNumber(list[0].Content, list[0], false);
SetAlternativeEpisodeNumber(list[1].Content, list[1]);
return true;
}
return false;
}
/// <summary>
/// Searches for equivalent number in a list of <c>tokens</c>. e.g. 08(114)
///
/// 匹配自带等效集数的数字,常见于分割放送,匹配括号包裹的数字
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if an equivalent number was found</returns>
public bool SearchForEquivalentNumbersWithBracket(List<int> tokens)
{
foreach (var it in tokens)
{
// Find the first enclosed, non-delimiter token
var nextToken = Token.FindNextToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
if (!Token.InListRange(nextToken, _parser.Tokens) || !(_parser.Tokens[it].Content.Contains("(") || _parser.Tokens[nextToken].Content.Contains(")")))
{
continue;
}
// e.g. [13(341)]
if (it > 1 && _parser.Tokens[it].Enclosed && _parser.ParseHelper.IsTokenIsolated(it))
{
string[] episodes = _parser.Tokens[it].Content.Split(new string[] { "(", ")" }, StringSplitOptions.RemoveEmptyEntries);
if (StringHelper.IsNumericString(episodes[0]) && StringHelper.IsNumericString(episodes[1]))
{
SetEpisodeNumber(episodes[0], _parser.Tokens[it], false);
SetAlternativeEpisodeNumber(episodes[1], _parser.Tokens[it]);
return true;
}
}
// e.g. [13 (341)]
if (it > 1 && _parser.Tokens[nextToken].Enclosed && _parser.ParseHelper.IsTokenIsolatedWithDelimiterAndBracket(nextToken))
{
string episode = _parser.Tokens[nextToken].Content.Replace("(", "").Replace(")", "");
if (StringHelper.IsNumericString(_parser.Tokens[it].Content) && StringHelper.IsNumericString(episode))
{
SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true);
SetAlternativeEpisodeNumber(episode, _parser.Tokens[nextToken]);
return true;
}
}
}
return false;
}
/// <summary>
/// Searches for the last number token in a list of <c>tokens</c>
///
/// 搜索最后一个数字
/// </summary>
/// <param name="tokens">the list of tokens</param>
/// <returns>true if the last number token was found</returns>
public bool SearchForLastNumber(List<int> tokens)
{
for (var i = tokens.Count - 1; i >= 0; i--)
{
var it = tokens[i];
// Assuming that episode number always comes after the title,
// the first token cannot be what we're looking for
if (it == 0) continue;
if (_parser.Tokens[it].Enclosed) continue;
// Ignore if it's the first non-enclosed, non-delimiter token
if (_parser.Tokens.GetRange(0, it)
.All(r => r.Enclosed || r.Category == Token.TokenCategory.Delimiter))
{
continue;
}
var previousToken = Token.FindPrevToken(_parser.Tokens, it, Token.TokenFlag.FlagNotDelimiter);
if (_parser.ParseHelper.IsTokenCategory(previousToken, Token.TokenCategory.Unknown))
{
if (_parser.Tokens[previousToken].Content.Equals("Movie", StringComparison.InvariantCultureIgnoreCase)
|| _parser.Tokens[previousToken].Content.Equals("Part", StringComparison.InvariantCultureIgnoreCase))
{
continue;
}
}
// We'll use this number after all
if (SetEpisodeNumber(_parser.Tokens[it].Content, _parser.Tokens[it], true))
{
return true;
}
}
return false;
}
}
}