From 48700c12403ea1fc77d4fa6bd84ce1a208b876b0 Mon Sep 17 00:00:00 2001 From: sinanmohd Date: Sat, 7 Oct 2023 19:35:23 +0530 Subject: server/opensubtitles: init search_filesize --- lib/attr.lua | 232 +++++++++++++++++++++++++++++++++++++++++++++++ lib/util.lua | 11 +++ main.lua | 8 +- server/opensubtitles.lua | 90 +++++++++++++++++- 4 files changed, 335 insertions(+), 6 deletions(-) create mode 100644 lib/attr.lua diff --git a/lib/attr.lua b/lib/attr.lua new file mode 100644 index 0000000..8c1613a --- /dev/null +++ b/lib/attr.lua @@ -0,0 +1,232 @@ +#!/usr/bin/env lua + +local util = require 'lib/util' + +local extract = function (name, patterns) + local r + + for _, p in pairs(patterns) do + r = r or name:match(p) + name = name:gsub(p, '') + end + + return name, r +end + +local build_dlim = function (name, attrs) + local dlim ,r + + attrs = attrs or {} + dlim = '[%-%.%s]?' + local vcodecs = { + '[M]m[Pp][Ee][Gg]' .. dlim .. '[1234]', + } + local acodecs = { + '[Dd][Tt][Ss]' .. dlim .. '[Hh][Dd]', + '[Dd][Dd]' .. dlim .. '[57]%.1', + } + local sources= { + "[Ww][Ee][Bb]" .. dlim .. "[Dd][Ll]", + "[Hh][Dd]" .. dlim .. "[Tt][Vv]", + "[Hh][Dd]" .. dlim .. "[Tt][Ss]", + } + local series = { + '[Ss]%d%d?' .. dlim .. '[Ee]%d%d?', + } + local sizes = { + '%d%d%d' .. dlim .. '[Mm][Bb]', + '%d%d?%.%d%d?' .. dlim .. '[Gg][Bb]', + } + local depths = { + '1[02]' .. dlim .. '[Bb][Ii][Tt]' + } + + name, attrs.vcodec = extract(name, vcodecs) + name, attrs.source = extract(name, sources) + name, attrs.acodecs = extract(name, acodecs) + name, attrs.size = extract(name, sizes) + name, attrs.depth = extract(name, depths) + + name, r = extract(name, series) + if r then + attrs.season = tonumber(r:match('%d+')) + attrs.episode = tonumber(r:match('%d+$')) + end + + return name, attrs +end + +local build_atom = function (name, attrs) + local r, year + + attrs = attrs or {} + local vcodecs = { + "[Aa][Vv]1", + "[xXHh]26[345]", + "[Aa][Vv][Cc]", + "[Hh][Ee][Vv][Cc]", + "[Xx][Vv][Ii][Dd]", + } + local acodecs = { + "[Oo][Pp][Uu][Ss]", + "[Aa][Aa][Cc]", + "[Ee]?[Aa][Cc]3", + "[Dd][Tt][Ss]", + } + local sources= { + "[Bb][Ll][Uu][Rr][Aa][Yy]", + "[Bb][Rr][Rr][Ii][Pp]", + "[Dd][Vv][Dd][Rr][Ii][Pp]", + "[Ww][Ee][Bb][Rr][Ii][Pp]", + "[Hh][Dd][Rr][Ii][Pp]", + "[Rr][Ee][Rr][Ii][Pp]", + } + local reses = { + "2160[Pp]", + "1440[Pp]", + "1080[Pp]", + "720[Pp]", + "480[Pp]", + "[Uu][Hh][Dd]", + "4[Kk]" + } + local series = { + '%d%d[Xx]%d%d', + } + local channels = { + '6[Cc][Hh]', + '[57]%.1', + } + + name, attrs.vcodec = extract(name, vcodecs) + name, attrs.source = extract(name, sources) + name, attrs.res = extract(name, reses) + name, attrs.acodecs = extract(name, acodecs) + name, attrs.channel = extract(name, channels) + + name, r = extract(name, series) + if r then + attrs.season = tonumber(r:match('%d+')) + attrs.episode = tonumber(r:match('%d+$')) + end + + for y in name:gmatch('%d%d%d%d') do + year = tonumber(y) + if year > 1900 and year <= tonumber(os.date('%Y')) then + attrs.year = y + end + end + if attrs.year then + name = name:gsub(tostring(attrs.year), '') + end + + return name, attrs +end + +local build_low = function (name, attrs) + local low_attr, lows + + lows = { 'SDH' } + + low_attr = {} + for _, low in pairs(lows) do + low_attr[#low_attr + 1] = name:match(low) + name = name:gsub(low, '') + end + + attrs = attrs or {} + if #low_attr > 0 then + attrs.low = low_attr + end + + return name, attrs +end + +local build_title = function (name, attrs) + attrs.title = {} + for w in name:gmatch('%w+') do + attrs.title[#attrs.title + 1] = w + end + + if #attrs.title > 1 then + attrs.scene = attrs.title[#attrs.title] + attrs.title[#attrs.title] = nil + end + + return attrs +end + +local build = function (name) + local attrs = {} + + name = build_dlim(name, attrs) + name = build_atom(name, attrs) + name = build_low(name, attrs) + build_title(name, attrs) + + return attrs +end + +local weigh = function (a1, a2) + local key_score, score + + key_score = { + name = 10, + season = 10, + episode = 10, + source = 7, + scene = 5, + vcodec = 3, + acodec = 3, + rese = 2, + default = 1, + } + + score = 0 + for k, v in pairs(a1) do + if not a2[k] then + goto continue + end + + if k == 'name' then + for _, name in pairs(v) do + if util.array_search(a2.name, name) then + score = score + key_score.name + end + end + else + if v == a2[k] then + score = score + (key_score[k] or key_score.default) + end + end + + ::continue:: + end + + return score +end + +local fuzzy = function (name, tab) + local name_attr, high, score + + high = { + score = 0, + name = next(tab) + } + + name_attr = build(name) + for k in pairs(tab) do + score = weigh(name_attr, build(k)) + if score > high.score then + high.score = score + high.name = k + end + end + + return tab[high.name] +end + +return { + build = build, + fuzzy = fuzzy, +} diff --git a/lib/util.lua b/lib/util.lua index d52922d..6263507 100644 --- a/lib/util.lua +++ b/lib/util.lua @@ -70,6 +70,16 @@ local array_merge = function (a1, a2) return a end +local array_search = function (a, key) + for _, v in pairs(a) do + if v == key then + return true + end + end + + return false +end + local table_print = function (t) for k, v in pairs(t) do print( '|'.. k .. '=' .. v .. '|') @@ -186,6 +196,7 @@ return { table_print = table_print, table_match_or_any = table_match_or_any, array_merge = array_merge, + array_search = array_search, zip_ext_first = zip_ext_first, string_vid_path_to_name = string_vid_path_to_name, opensubtitles_hash = opensubtitles_hash, diff --git a/main.lua b/main.lua index f54c673..0f62464 100644 --- a/main.lua +++ b/main.lua @@ -42,7 +42,7 @@ local sub_needed = function () end local sub_setup = function () - local out, name, path, rc + local out, name, path, rc, filesize mp.osd_message('fetching subtitle') @@ -61,8 +61,12 @@ local sub_setup = function () if not util.file_exists(path) then name = mp.get_property_native('media-title') end + filesize = mp.get_property_native('file-size') - rc = opensubtitles.search(path, out, name) + rc = opensubtitles.search(path, out, { + name = name, + filesize = filesize + }) if not rc then rc = subscene.search(path, out, name) end diff --git a/server/opensubtitles.lua b/server/opensubtitles.lua index b797f44..f0c6195 100644 --- a/server/opensubtitles.lua +++ b/server/opensubtitles.lua @@ -2,6 +2,7 @@ local curl = require 'lib/curl' local util = require 'lib/util' +local attr = require 'lib/attr' -- [[ languages supported by opensubtitles ]] -- local languages = { @@ -134,14 +135,94 @@ local search_ohash = function (ohash) end end -local search = function (path, out, name) - local ohash, link +local ids_fetch = function (page) + local iter, no_name, line, id, name, tab + + tab = {} + no_name = 0 + iter = page:gmatch('[^\n\r]+') + while true do + line = iter() + if not line then + break + end + + id = line:match('/en/subtitles/%d*') + if id then + id = id:match('%d+$') + + line = iter() -- movie + if line:find('%.%.%.$') then + -- name cuts off... + name = line:gsub('"[^"]*$', '') + name = name:match('[^"]+$') + else + name = line:gsub('
]+$') + end + + if not name then + line = iter() + + if line:find('^%[S%d%dE%d%d%]$') then + -- it's a series + line = iter() + if line:find('%.%.%.$') then + name = line:gsub('^.*title="', '') + name = name:match('[^"]+') + else + name = line:match('[^<]+') + end + else + -- no name + name = tostring(no_name) + no_name = no_name + 1 + end + end + + tab[name] = id + end + end + + return tab +end + +local search_filesize = function (filesize, name) + local fetch, hcode, url, id, a + + a = attr.build(name) + + url = domain .. '/en' .. '/search/sublanguageid-' .. languages[language] + if a.season and a.episode then + url = url .. '/season-' .. a.season .. '/episode-' .. a.episode + end + url = url .. '/moviebytesize-' .. filesize + + fetch, hcode = curl.get(url, nil, nil, tries) + if not hcode then + return nil + end + + print(url) + util.table_print(ids_fetch(fetch)) + id = attr.fuzzy(name, ids_fetch(fetch)) + if id then + print(domain .. '/en/subtitleserve/sub/' .. id) + return domain .. '/en/subtitleserve/sub/' .. id + end +end + +local search = function (path, out, info) + local ohash, link, name if util.file_exists(path) then ohash = util.opensubtitles_hash(path) link = search_ohash(ohash) - else - name = name or util.string_vid_path_to_name(path) + end + + if not link then + name = info.name or util.string_vid_path_to_name(path) + link = search_filesize(info.filesize, name) end if link then @@ -151,5 +232,6 @@ end return { search_ohash = search_ohash, + search_filesize = search_filesize, search = search } -- cgit v1.2.3