1.豆瓣api增加原名以及描述的摘取

2.优化名称截取,增加通过词库过滤干扰字符
3.修改TMDB接口地址
This commit is contained in:
jlzhu 2022-12-12 16:43:07 +08:00
parent 2899054876
commit 07e2f98c07
9 changed files with 709 additions and 35 deletions

View File

@ -165,7 +165,7 @@ namespace Jellyfin.Plugin.MetaShark.Api
var cat = titleStr.GetMatchGroup(this.regCat);
var subjectStr = movieElement.GetText("div.rating-info>.subject-cast") ?? string.Empty;
var year = subjectStr.GetMatchGroup(this.regYear);
var desc = movieElement.GetText("div.content>p") ?? string.Empty;
if (cat != "电影" && cat != "电视剧")
{
continue;
@ -174,11 +174,13 @@ namespace Jellyfin.Plugin.MetaShark.Api
var movie = new DoubanSubject();
movie.Sid = sid;
movie.Name = name;
movie.OriginalName = subjectStr.Split("/").FirstOrDefault(a => a.Contains("原名:"),"").Replace("原名:","");
movie.Genre = cat;
movie.Category = cat;
movie.Img = img;
movie.Rating = rating.ToFloat();
movie.Year = year.ToInt();
movie.Intro = desc;
list.Add(movie);
}

View File

@ -44,6 +44,12 @@ namespace Jellyfin.Plugin.MetaShark.Core
return 0.0f;
}
public static bool IsChinese(this string s)
{
Regex chineseReg = new Regex(@"[\u4e00-\u9fa5]{1,}", RegexOptions.Compiled);
return chineseReg.IsMatch(s.Replace(" ", string.Empty).Trim());
}
public static double Distance(this string s1, string s2)
{
var jw = new JaroWinkler();

View File

@ -0,0 +1,90 @@
namespace Jellyfin.Plugin.MetaShark.Parser
{
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
public class BTNamePareser
{
private Regex btGroupReg = new Regex(@"\[[a-zA-Z\-]*\]", RegexOptions.Compiled);
private Regex yearReg = new Regex(@"[12][890][78901234][0-9]", RegexOptions.Compiled);
private Regex resoReg = new Regex(@"([0-9]{3,4}[pP])|([0-9]{3,4}[iI])|([hH][dD])|(4[kK])|([sS][dD])", RegexOptions.Compiled);
private Regex codeReg = new Regex(@"([hH]\.[0-9]{3})|([vV][cC]-?1)|([xX][vV][iI][dD])
|([mM][pP][eE][Gg]-?\d)|([fF][lL][aA][Cc])|([aA][pP][eE])|([dD][tT][sS])|([aA][cC]-?\d)|([wW][aA][vV])
|([mM][pP]\d)|([aA][lL][aA][cC])|([aA]{2}[cC])"
, RegexOptions.Compiled);
private Regex chineseReg = new Regex(@"[\u4e00-\u9fa5]{1,}", RegexOptions.Compiled);
private Regex serisReg = new Regex(@"([sS][0-9]{1,2})|([Ss][eE][rR][iI][sS][0-9]{1,2})", RegexOptions.Compiled);
private Regex episodeReg = new Regex(@"([eE][0-9]{1,3})|([Ee][pP][iI][sS][oO][dD][eE][0-9]{1,3})", RegexOptions.Compiled);
public class ResourceInfo
{
public string? Name { get; set; }
public string? ChineseName { get; set; }
public string? EnglishName { get; set; }
public string? Year { get; set; }
public string? Resolution { get; set; }
public string? Seris { get; set; }
public string? Episode { get; set; }
public bool isSeris()
{
return Seris != null || Episode != null;
}
}
public ResourceInfo Match(string btFileName, ILogger _logger)
{
NameTrimmer trimmer = new NameTrimmer();
var trimmedName = trimmer.trimName(btFileName, _logger);
var btgroup = GetMatch(trimmedName, btGroupReg);
var year = GetMatch(trimmedName, yearReg);
var reso = GetMatch(trimmedName, resoReg);
var code = GetMatch(trimmedName, codeReg);
var chinese = GetMatch(trimmedName, chineseReg);
var seris = GetMatch(trimmedName, serisReg);
var episode = GetMatch(trimmedName, episodeReg);
ResourceInfo info = new ResourceInfo();
info.ChineseName = chinese?.MatchContent;
info.Year = year?.MatchContent;
info.Seris = seris?.MatchContent;
info.Episode = episode?.MatchContent;
info.Resolution = reso?.MatchContent;
info.Name = trimmedName;
info.EnglishName = ReplaceMatch(trimmedName, "", chineseReg).Trim();
return info;
}
private class MatchResult
{
public int Index { get; set; }
public string MatchContent { get; set; }
}
private MatchResult? GetMatch(string text, Regex reg)
{
var match = reg.Match(text);
if (match.Success && match.Groups.Count > 0)
{
return new MatchResult
{
Index = match.Groups[0].Index,
MatchContent = match.Groups[0].Value.Trim(),
};
}
return null;
}
private string ReplaceMatch(string text, string replacement, Regex reg)
{
return reg.Replace(text, replacement);
}
}
}

View File

@ -0,0 +1,133 @@

namespace Jellyfin.Plugin.MetaShark.Parser
{
using Microsoft.Extensions.Logging;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
class NameTrimmer
{
private static TreeNode node;
private const string path = "/Parser/dict.txt";
public static string AssemblyDirectory
{
get
{
string codeBase = Assembly.GetExecutingAssembly().CodeBase;
UriBuilder uri = new UriBuilder(codeBase);
string path = Uri.UnescapeDataString(uri.Path);
return Path.GetDirectoryName(path);
}
}
public string trimName(string name, ILogger _logger)
{
if (NameTrimmer.node == null)
{
_logger.LogInformation($"生成词库树");
List<string> dics = new List<string>();
List<string> lines = File.ReadAllLines(AssemblyDirectory + path).ToList();
lines.ForEach(line =>
{
string[] words = line.Split(" ");
if (words.Length == 2)
{
dics.Add(words[0]);
}
});
NameTrimmer.node = new TreeNode();
node.insertMany(dics);
_logger.LogInformation($"生成词库树完成");
}
_logger.LogInformation($"解析名称");
foreach (char c in "!\"#$%&()*+,-./:;<=>?@[\\]^_{|}~")
{
name = name.Replace(c, ' ');
}
string[] nameWords = name.Split(' ');
string empStr = "";
_logger.LogInformation($"开始比对" + name);
foreach (string word in nameWords)
{
if (string.IsNullOrEmpty(word))
{
continue;
}
if (NameTrimmer.node.search(word.ToLower()))
{
continue;
}
else
{
empStr += word + " ";
}
}
_logger.LogInformation($"比对结果" + empStr);
return empStr;
}
private class TreeNode
{
private bool IsLeaf = false;
private List<TreeNode> nodes = new List<TreeNode>();
private char value;
public void insert(string word)
{
var currentNode = this;
foreach (char c in word)
{
var findNode = currentNode.nodes.FirstOrDefault(a => a.value == c);
if (findNode == null)
{
TreeNode newNode = new TreeNode();
newNode.value = c;
currentNode.nodes.Add(newNode);
currentNode = newNode;
}
else
{
currentNode = findNode;
}
}
currentNode.IsLeaf = true;
}
public void insertMany(List<string> words)
{
foreach (var work in words)
{
this.insert(work);
}
}
public bool search(string word)
{
var current = this;
foreach (char c in word)
{
if (current != null)
{
current = current.nodes.FirstOrDefault(a => a.value == c);
}
else
{
break;
}
}
return current != null ? current.IsLeaf : false;
}
}
}
}

View File

@ -0,0 +1,419 @@
www 3268
org 685
mp4 16210
cc 620
the 10098
web 4943
dlrip 539
x264 7897
720p 7931
mkv 7680
tv 774
rip 252
flac 1199
sis001 1584
movie 298
1080p 14008
nf 320
dl 2478
x265 2381
10bit 867
hdr 350
ddp5 748
sexinsex 351
net 857
dark 217
of 3905
3d 389
real 251
x64 264
com 5469
rmvb 1159
no 1867
on 728
live 427
iso 542
by 1886
1280x720 487
hevc 1810
aac 3110
x 833
family 377
multi 247
subs 342
thz 610
la 1151
heyzo 510
xxx 7648
2160p 1209
ktr 2665
rarbg 4056
art 335
little 382
digital 421
portable 237
war 290
with 1084
bluray 3310
h264 2757
dts 1075
xyz 411
fc2ppv 632
ts 514
zip 3188
you 452
it 423
va 303
music 216
mp3 996
320kbps 236
hd 2964
cum 305
avi 5544
cd 251
webrip 2874
yts 427
mx 331
dvdrip 2255
for 1090
teen 377
and 3868
fuck 491
480p 1169
black 846
in 2108
vol 1066
internal 215
n1c 362
to 1537
7z 372
ass 434
b 435
xvid 2726
ac3 1482
a 2816
two 214
i 979
bdrip 1771
rose 250
me 1077
1pondo 311
dual 516
raws 574
girl 660
gb 571
pack 352
edition 428
avc 946
new 919
life 335
h265 409
hardcore 267
red 269
fc2 426
ppv 448
o 397
white 342
imageset 249
all 555
season 690
hdtv 2013
pl 417
pedo 597
baby 310
eztv 1192
re 730
fhd 574
bbqddq 216
man 380
tgx 708
dd5 348
h 798
rartv 368
tokyo 546
hot 961
xbay 208
my 1027
game 268
rar 1740
deep 264
ru 376
sex 986
stars 233
team 370
audio 378
repack 446
her 547
best 450
dvd 575
rus 816
lostfilm 246
e 346
brrip 461
fucked 240
theav 244
wa 324
is 491
english 984
m 248
ita 510
eng 1380
sub 398
ii 315
pthc 1820
lolita 324
xvx 475
t 225
69av 252
one 554
divx 207
house 259
hdrip 601
bd 657
american 349
ntb 353
tvboxnow 413
chs 754
v 391
pdf 1202
f 216
complete 524
collection 598
ukr 201
video 409
studio 268
ptsc 291
blue 229
angel 316
bbc 224
time 336
series 264
prt 400
olo 512
s 595
de 750
preteen 507
anal 863
r 409
jpg 284
wmv 1489
ni 255
last 240
amzn 597
part 521
full 417
remux 297
ma 466
w 212
fucking 250
aavv333 256
big 927
at 396
hjd2048 287
sd 733
fgt 240
child 237
uncensored 384
boy 284
aac2 252
4k 489
cock 382
ion10 268
pussy 321
tits 217
c 813
djvu 279
wild 220
club 411
high 258
d 487
brazzers 265
megusta 235
dead 277
gog 238
out 248
christmas 214
private 253
top 239
wife 222
3dmgame 292
green 352
french 391
extreme 231
mpg 788
gets 250
first 308
korean 236
star 622
japanese 267
av 293
torrenting 483
up 357
l 226
young 364
sky 273
story 241
chinese 256
hd1080p 344
kleenex 326
big5 479
1920x1080 340
carib 292
v1 441
hindi 208
exe 407
day 319
pro 307
atmos 291
sexy 279
love 927
final 282
epub 475
night 286
vs 296
next 421
msd 209
us 295
en 498
porn 289
k 233
wrb 262
girls 579
nike 255
blu 316
ray 400
j 207
from 357
alexis 207
mide 205
world 385
san 278
g 281
hard 278
good 217
p 397
blonde 206
castellano 278
rq 282
cap 205
mov 243
cht 234
your 301
raw 212
milf 240
snis 245
avistaz 1
cinemaz 1
exoticaz 1
privatehd 1
aidoru!online 1
bibliotik 1
bwt 1
concertos 1
xtr 1
space 1
torrents 1
teamhd 1
empornium 1
六维空间 1
skyeysnow 1
ab 1
btn 1
ggn 1
gfxpeers 1
jpopsuki 1
lztr 1
nebulance 1
ptp 1
alpharatio 1
anthelion 1
bemaniso 1
brks 1
dic 1
gpw 1
forever 1
oppaitime 1
ops 1
red 1
snakepop 1
sugoimusic 1
uhdbits 1
cgpeers 1
iptorrents 1
torrentstd 1
1ptbar 1
52pt 1
爱薇网 1
byrbt 1
chdbits 1
discfan 1
torrentccf 1
hdai 1
hdatmos 1
hdchina 1
hdcity 1
dolby 1
hdfans 1
hdhome 1
hdsky 1
hdtime 1
hdzone 1
海棠pt 1
hudbt 1
joyhd 1
mteam 1
lemonhd 1
南洋pt 1
nicept 1
npubits 1
opencd 1
ourbits 1
btschool 1
tlfbits 1
hd4fans 1
伊甸园 1
hdu 1
pt@keepfrds 1
ptmsg 1
麦田pt 1
葡萄 1
聆音club 1
溪涧草堂pt 1
pter 1
pthome 1
烧包 1
ssd 1
北洋园 1
u2 1
ultrahd 1
备胎 1
haidan 1
hdarea 1
百川pt 1
pttime 1
aither 1
asiancinema 1
beyondhd 1
blutopia 1
jptv 1
hdpost 1
animetorrents 1
bb 1
ccfbits 1
cinemageddon 1
filelist 1
hdb 1
hdroute 1
karagarga 1
pornbits 1
pussytorrents 1
sdbits 1
intheshadow 1
tg 1
ttg 1
cinematik 1
mtv 1
myanonamouse 1
torrentleech 1
torrentseeds 1
ptt 1

View File

@ -21,6 +21,7 @@ using System.Web;
using TMDbLib.Objects.General;
using Jellyfin.Plugin.MetaShark.Configuration;
using Jellyfin.Plugin.MetaShark.Core;
using Jellyfin.Plugin.MetaShark.Parser;
namespace Jellyfin.Plugin.MetaShark.Providers
{
@ -80,11 +81,13 @@ namespace Jellyfin.Plugin.MetaShark.Providers
{
// ParseName is required here.
// Caller provides the filename with extension stripped and NOT the parsed filename
var searchName = info.Name;
var parsedName = this._libraryManager.ParseName(info.Name);
if (parsedName != null)
BTNamePareser pareser = new BTNamePareser();
var search_info = pareser.Match(info.Name, this._logger);
string searchName = search_info.ChineseName != null ? search_info.ChineseName : search_info.EnglishName;
if (info.Year == null && search_info.Year != null)
{
searchName = parsedName.Name;
info.Year = int.Parse(search_info.Year);
}
this.Log($"GuessByDouban of [name]: {info.Name} year: {info.Year} search name: {searchName}");
@ -101,23 +104,25 @@ namespace Jellyfin.Plugin.MetaShark.Providers
{
continue;
}
if (jw.Similarity(searchName, item.Name) < 0.8)
{
continue;
}
if (parsedName == null || parsedName.Year == null || parsedName.Year == 0)
//英文关键词搜,结果是中文的情况,不适用相似匹配
if (jw.Similarity(searchName, item.Name) > 0.8
|| jw.Similarity(searchName, item.OriginalName) > 0.8)
{
this.Log($"GuessByDouban of [name] found Sid: {item.Sid}");
return item.Sid;
}
if (parsedName.Year == item.Year)
if (item.Name.Contains(searchName) && (info.Year != null && info.Year == item.Year))
{
this.Log($"GuessByDouban of [name] found Sid: {item.Sid}");
return item.Sid;
}
if (searchName.IsChinese() != item.Name.IsChinese()
&& searchName.IsChinese() != item.OriginalName.IsChinese())
{
this.Log($"GuessByDouban of [name] found tmdb id: \"{item.Sid}\"");
return item.Sid;
}
}
return null;
@ -139,19 +144,19 @@ namespace Jellyfin.Plugin.MetaShark.Providers
{
continue;
}
var score = jw.Similarity(name, item.Name);
// this.Log($"GuestDoubanSeasonByYear name: {name} douban_name: {item.Name} douban_sid: {item.Sid} douban_year: {item.Year} score: {score} ");
if (score < 0.8)
{
continue;
}
if (year == item.Year)
if (jw.Similarity(name, item.Name) > 0.8)
{
this.Log($"GuestDoubanSeasonByYear of [name] found Sid: {item.Sid}");
return item.Sid;
}
if ((name.IsChinese() != item.Name.IsChinese()
&& name.IsChinese() != item.OriginalName.IsChinese()) || year == item.Year)
{
this.Log($"GuestDoubanSeasonByYear of [name] found Sid: \"{item.Sid}\"");
return item.Sid;
}
}
return null;
@ -202,17 +207,32 @@ namespace Jellyfin.Plugin.MetaShark.Providers
{
// ParseName is required here.
// Caller provides the filename with extension stripped and NOT the parsed filename
var parsedName = this._libraryManager.ParseName(info.Name);
this.Log($"GuestByTmdb of [name]: {info.Name} search name: {parsedName.Name}");
BTNamePareser pareser = new BTNamePareser();
var search_info = pareser.Match(info.Name, this._logger);
string searchName = search_info.ChineseName != null ? search_info.ChineseName : search_info.EnglishName;
if (info.Year == null && search_info.Year != null)
{
info.Year = int.Parse(search_info.Year);
}
this.Log($"GuestByTmdb of [name]: {info.Name} search name: {searchName}");
var jw = new JaroWinkler();
switch (info)
{
case MovieInfo:
var movieResults = await this._tmdbApi.SearchMovieAsync(parsedName.Name, parsedName.Year ?? 0, info.MetadataLanguage, cancellationToken).ConfigureAwait(false);
var movieResults = await this._tmdbApi.SearchMovieAsync(searchName, info.Year ?? 0, info.MetadataLanguage, cancellationToken).ConfigureAwait(false);
foreach (var item in movieResults)
{
if (jw.Similarity(parsedName.Name, item.Title) > 0.8)
if (jw.Similarity(searchName, item.Title) > 0.8
|| jw.Similarity(searchName, item.OriginalTitle) > 0.8)
{
this.Log($"GuestByTmdb of [name] found tmdb id: \"{item.Id}\"");
return item.Id.ToString(CultureInfo.InvariantCulture);
}
if (searchName.IsChinese() != item.Title.IsChinese()
&& searchName.IsChinese() != item.OriginalTitle.IsChinese())
{
this.Log($"GuestByTmdb of [name] found tmdb id: \"{item.Id}\"");
return item.Id.ToString(CultureInfo.InvariantCulture);
@ -220,10 +240,17 @@ namespace Jellyfin.Plugin.MetaShark.Providers
}
break;
case SeriesInfo:
var seriesResults = await this._tmdbApi.SearchSeriesAsync(parsedName.Name, info.MetadataLanguage, cancellationToken).ConfigureAwait(false);
var seriesResults = await this._tmdbApi.SearchSeriesAsync(searchName, info.MetadataLanguage, cancellationToken).ConfigureAwait(false);
foreach (var item in seriesResults)
{
if (jw.Similarity(parsedName.Name, item.Name) > 0.8)
if (jw.Similarity(searchName, item.Name) > 0.8
|| jw.Similarity(searchName, item.OriginalName) > 0.8)
{
this.Log($"GuestByTmdb of [name] found tmdb id: \"{item.Id}\"");
return item.Id.ToString(CultureInfo.InvariantCulture);
}
if (searchName.IsChinese() != item.Name.IsChinese()
&& searchName.IsChinese() != item.OriginalName.IsChinese())
{
this.Log($"GuestByTmdb of [name] found tmdb id: \"{item.Id}\"");
return item.Id.ToString(CultureInfo.InvariantCulture);

View File

@ -48,7 +48,6 @@ namespace Jellyfin.Plugin.MetaShark.Providers
{
return result;
}
// 从douban搜索
var res = await this._doubanApi.SearchAsync(info.Name, cancellationToken).ConfigureAwait(false);
result.AddRange(res.Take(this.config.MaxSearchResult).Select(x =>
@ -141,7 +140,6 @@ namespace Jellyfin.Plugin.MetaShark.Providers
}
}
}
result.Item = movie;
result.QueriedById = true;
result.HasMetadata = true;
@ -158,7 +156,6 @@ namespace Jellyfin.Plugin.MetaShark.Providers
}
if (!string.IsNullOrEmpty(tmdbId))
{
this.Log($"GetMovieMetadata of tmdb [id]: \"{tmdbId}\"");

View File

@ -150,9 +150,9 @@ namespace Jellyfin.Plugin.MetaShark.Providers
result.HasMetadata = true;
subject.LimitDirectorCelebrities.Take(this.config.MaxCastMembers).ToList().ForEach(c => result.AddPerson(new PersonInfo
{
Name = c.Name,
Type = c.RoleType,
Role = c.Role,
Name = c.Name == "" ? "未知" : c.Name,
Type = c.RoleType == "" ? "未知" : c.Name,
Role = c.Role == "" ? "未知" : c.Name,
ImageUrl = c.Img,
ProviderIds = new Dictionary<string, string> { { DoubanProviderId, c.Id } },
}));

View File

@ -17,7 +17,7 @@ namespace TMDbLib.Client
public partial class TMDbClient : IDisposable
{
private const string ApiVersion = "3";
private const string ProductionUrl = "api.themoviedb.org";
private const string ProductionUrl = "api.tmdb.org";
private readonly ITMDbSerializer _serializer;
private RestClient _client;