fix: actor lack of overview. close #80

This commit is contained in:
cxfksword 2024-05-18 16:04:05 +08:00
parent 408929fc03
commit 4ae795d503
3 changed files with 70 additions and 50 deletions

View File

@ -52,18 +52,10 @@ namespace Jellyfin.Plugin.MetaShark.Api
Regex regSubname = new Regex(@"又名: (.+?)\n", RegexOptions.Compiled); Regex regSubname = new Regex(@"又名: (.+?)\n", RegexOptions.Compiled);
Regex regImdb = new Regex(@"IMDb: (tt\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase); Regex regImdb = new Regex(@"IMDb: (tt\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
Regex regSite = new Regex(@"官方网站: (.+?)\n", RegexOptions.Compiled); Regex regSite = new Regex(@"官方网站: (.+?)\n", RegexOptions.Compiled);
Regex regNameMath = new Regex(@"(.+第\w季|[\w\uff1a\uff01\uff0c\u00b7]+)\s*(.*)", RegexOptions.Compiled);
Regex regRole = new Regex(@"\([饰|配]?\s*?(.+?)\)", RegexOptions.Compiled); Regex regRole = new Regex(@"\([饰|配]?\s*?(.+?)\)", RegexOptions.Compiled);
Regex regBackgroundImage = new Regex(@"url\(([^)]+?)\)$", RegexOptions.Compiled); Regex regBackgroundImage = new Regex(@"url\(([^)]+?)\)$", RegexOptions.Compiled);
Regex regGender = new Regex(@"性别: \n(.+?)\n", RegexOptions.Compiled); Regex regLifedate = new Regex(@"(.+?) 至 (.+)", RegexOptions.Compiled);
Regex regConstellation = new Regex(@"星座: \n(.+?)\n", RegexOptions.Compiled); Regex regHtmlTag = new Regex(@"<.?>", RegexOptions.Compiled);
Regex regBirthdate = new Regex(@"出生日期: \n(.+?)\n", RegexOptions.Compiled);
Regex regLifedate = new Regex(@"生卒日期: \n(.+?) 至 (.+)", RegexOptions.Compiled);
Regex regBirthplace = new Regex(@"出生地: \n(.+?)\n", RegexOptions.Compiled);
Regex regCelebrityRole = new Regex(@"职业: \n(.+?)\n", RegexOptions.Compiled);
Regex regNickname = new Regex(@"更多外文名: \n(.+?)\n", RegexOptions.Compiled);
Regex regFamily = new Regex(@"家庭成员: \n(.+?)\n", RegexOptions.Compiled);
Regex regCelebrityImdb = new Regex(@"imdb编号:\s+?(nm\d+)", RegexOptions.Compiled);
Regex regImgHost = new Regex(@"\/\/(img\d+?)\.", RegexOptions.Compiled); Regex regImgHost = new Regex(@"\/\/(img\d+?)\.", RegexOptions.Compiled);
// 匹配除了换行符之外所有空白 // 匹配除了换行符之外所有空白
Regex regOverviewSpace = new Regex(@"\n[^\S\n]+", RegexOptions.Compiled); Regex regOverviewSpace = new Regex(@"\n[^\S\n]+", RegexOptions.Compiled);
@ -90,7 +82,7 @@ namespace Jellyfin.Plugin.MetaShark.Api
var handler = new HttpClientHandlerEx(); var handler = new HttpClientHandlerEx();
this._cookieContainer = handler.CookieContainer; this._cookieContainer = handler.CookieContainer;
httpClient = new HttpClient(handler); httpClient = new HttpClient(handler);
httpClient.Timeout = TimeSpan.FromSeconds(10); httpClient.Timeout = TimeSpan.FromSeconds(20);
httpClient.DefaultRequestHeaders.Add("User-Agent", HTTP_USER_AGENT); httpClient.DefaultRequestHeaders.Add("User-Agent", HTTP_USER_AGENT);
httpClient.DefaultRequestHeaders.Add("Origin", "https://movie.douban.com"); httpClient.DefaultRequestHeaders.Add("Origin", "https://movie.douban.com");
httpClient.DefaultRequestHeaders.Add("Referer", "https://movie.douban.com/"); httpClient.DefaultRequestHeaders.Add("Referer", "https://movie.douban.com/");
@ -519,48 +511,64 @@ namespace Jellyfin.Plugin.MetaShark.Api
var contentNode = doc.QuerySelector("#content"); var contentNode = doc.QuerySelector("#content");
if (contentNode != null) if (contentNode != null)
{ {
var img = contentNode.GetAttr("#headline .nbg img", "src") ?? string.Empty; celebrity.Img = contentNode.GetAttr("img.avatar", "src") ?? string.Empty;
var nameStr = contentNode.GetText("h1") ?? string.Empty; var nameStr = contentNode.GetText("h1.subject-name") ?? string.Empty;
var name = this.ParseCelebrityName(nameStr); celebrity.Name = this.ParseCelebrityName(nameStr);
var englishName = nameStr.Replace(name, "").Trim(); celebrity.EnglishName = nameStr.Replace(celebrity.Name, "").Trim();
var intro = contentNode.GetText("#intro span.all") ?? string.Empty;
if (string.IsNullOrEmpty(intro))
{
intro = contentNode.GetText("#intro div.bd") ?? string.Empty;
}
var info = contentNode.GetText("div.info") ?? string.Empty;
var gender = info.GetMatchGroup(this.regGender);
var constellation = info.GetMatchGroup(this.regConstellation);
var birthdate = info.GetMatchGroup(this.regBirthdate);
// 生卒日期 var family = string.Empty;
var enddate = string.Empty; var propertyNodes = contentNode.QuerySelectorAll("ul.subject-property>li");
var match = this.regLifedate.Match(info); foreach (var li in propertyNodes)
if (match.Success && match.Groups.Count > 2)
{ {
birthdate = match.Groups[1].Value.Trim(); var label = li.GetText("span.label") ?? string.Empty;
enddate = match.Groups[2].Value.Trim(); var value = li.GetText("span.value") ?? string.Empty;
switch (label)
{
case "性别:":
celebrity.Gender = value;
break;
case "星座:":
celebrity.Constellation = value;
break;
case "出生日期:":
celebrity.Birthdate = value;
break;
case "去世日期:":
celebrity.Enddate = value;
break;
case "生卒日期:":
var match = this.regLifedate.Match(value);
if (match.Success && match.Groups.Count > 2)
{
celebrity.Birthdate = match.Groups[1].Value.Trim();
celebrity.Enddate = match.Groups[2].Value.Trim();
}
break;
case "出生地:":
celebrity.Birthplace = value;
break;
case "职业:":
celebrity.Role = value;
break;
case "更多外文名:":
celebrity.NickName = value;
break;
case "家庭成员:":
family = value;
break;
case "IMDb编号:":
celebrity.Imdb = value;
break;
default:
break;
}
} }
var birthplace = info.GetMatchGroup(this.regBirthplace); // 保留段落关系,把段落替换为换行符
var role = info.GetMatchGroup(this.regCelebrityRole); var intro = contentNode.GetHtml("section.subject-intro div.content") ?? string.Empty;
var nickname = info.GetMatchGroup(this.regNickname); intro = regHtmlTag.Replace(intro.Replace("</p>", "\n"), "");
var family = info.GetMatchGroup(this.regFamily);
var imdb = info.GetMatchGroup(this.regCelebrityImdb);
celebrity.Img = img;
celebrity.Gender = gender;
celebrity.Birthdate = birthdate;
celebrity.Enddate = enddate;
celebrity.NickName = nickname;
celebrity.EnglishName = englishName;
celebrity.Imdb = imdb;
celebrity.Birthplace = birthplace;
celebrity.Name = name;
celebrity.Intro = formatOverview(intro); celebrity.Intro = formatOverview(intro);
celebrity.Constellation = constellation;
celebrity.Role = role;
_memoryCache.Set<DoubanCelebrity?>(cacheKey, celebrity, expiredOption); _memoryCache.Set<DoubanCelebrity?>(cacheKey, celebrity, expiredOption);
return celebrity; return celebrity;
} }

View File

@ -20,6 +20,17 @@ namespace Jellyfin.Plugin.MetaShark.Core
return null; return null;
} }
public static string? GetHtml(this IElement el, string css)
{
var node = el.QuerySelector(css);
if (node != null)
{
return node.Html().Trim();
}
return null;
}
public static string GetTextOrDefault(this IElement el, string css, string defaultVal = "") public static string GetTextOrDefault(this IElement el, string css, string defaultVal = "")
{ {
var node = el.QuerySelector(css); var node = el.QuerySelector(css);

View File

@ -120,8 +120,9 @@ namespace Jellyfin.Plugin.MetaShark.Providers
var findResult = await this._tmdbApi.FindByExternalIdAsync(c.Imdb, FindExternalSource.Imdb, info.MetadataLanguage, cancellationToken).ConfigureAwait(false); var findResult = await this._tmdbApi.FindByExternalIdAsync(c.Imdb, FindExternalSource.Imdb, info.MetadataLanguage, cancellationToken).ConfigureAwait(false);
if (findResult?.PersonResults != null && findResult.PersonResults.Count > 0) if (findResult?.PersonResults != null && findResult.PersonResults.Count > 0)
{ {
this.Log($"GetPersonMetadata of found tmdb [id]: {findResult.PersonResults[0].Id}"); var foundTmdbId = findResult.PersonResults.First().Id.ToString();
item.SetProviderId(MetadataProvider.Tmdb, $"{findResult.PersonResults[0].Id}"); this.Log($"GetPersonMetadata of found tmdb [id]: {foundTmdbId}");
item.SetProviderId(MetadataProvider.Tmdb, $"{foundTmdbId}");
} }
} }