aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsinanmohd <sinan@sinanmohd.com>2023-10-07 19:35:23 +0530
committersinanmohd <sinan@sinanmohd.com>2023-10-08 11:56:50 +0530
commit48700c12403ea1fc77d4fa6bd84ce1a208b876b0 (patch)
treeff5b4bd812e3eb0a6c0681cc5c34eb108ff32454
parent5f5adabdeec7f2bf0897ef81fbc7e04dfed4b015 (diff)
server/opensubtitles: init search_filesize
-rw-r--r--lib/attr.lua232
-rw-r--r--lib/util.lua11
-rw-r--r--main.lua8
-rw-r--r--server/opensubtitles.lua90
4 files changed, 335 insertions, 6 deletions
diff --git a/lib/attr.lua b/lib/attr.lua
new file mode 100644
index 0000000..8c1613a
--- /dev/null
+++ b/lib/attr.lua
@@ -0,0 +1,232 @@
+#!/usr/bin/env lua
+
+local util = require 'lib/util'
+
+local extract = function (name, patterns)
+ local r
+
+ for _, p in pairs(patterns) do
+ r = r or name:match(p)
+ name = name:gsub(p, '')
+ end
+
+ return name, r
+end
+
+local build_dlim = function (name, attrs)
+ local dlim ,r
+
+ attrs = attrs or {}
+ dlim = '[%-%.%s]?'
+ local vcodecs = {
+ '[M]m[Pp][Ee][Gg]' .. dlim .. '[1234]',
+ }
+ local acodecs = {
+ '[Dd][Tt][Ss]' .. dlim .. '[Hh][Dd]',
+ '[Dd][Dd]' .. dlim .. '[57]%.1',
+ }
+ local sources= {
+ "[Ww][Ee][Bb]" .. dlim .. "[Dd][Ll]",
+ "[Hh][Dd]" .. dlim .. "[Tt][Vv]",
+ "[Hh][Dd]" .. dlim .. "[Tt][Ss]",
+ }
+ local series = {
+ '[Ss]%d%d?' .. dlim .. '[Ee]%d%d?',
+ }
+ local sizes = {
+ '%d%d%d' .. dlim .. '[Mm][Bb]',
+ '%d%d?%.%d%d?' .. dlim .. '[Gg][Bb]',
+ }
+ local depths = {
+ '1[02]' .. dlim .. '[Bb][Ii][Tt]'
+ }
+
+ name, attrs.vcodec = extract(name, vcodecs)
+ name, attrs.source = extract(name, sources)
+ name, attrs.acodecs = extract(name, acodecs)
+ name, attrs.size = extract(name, sizes)
+ name, attrs.depth = extract(name, depths)
+
+ name, r = extract(name, series)
+ if r then
+ attrs.season = tonumber(r:match('%d+'))
+ attrs.episode = tonumber(r:match('%d+$'))
+ end
+
+ return name, attrs
+end
+
+local build_atom = function (name, attrs)
+ local r, year
+
+ attrs = attrs or {}
+ local vcodecs = {
+ "[Aa][Vv]1",
+ "[xXHh]26[345]",
+ "[Aa][Vv][Cc]",
+ "[Hh][Ee][Vv][Cc]",
+ "[Xx][Vv][Ii][Dd]",
+ }
+ local acodecs = {
+ "[Oo][Pp][Uu][Ss]",
+ "[Aa][Aa][Cc]",
+ "[Ee]?[Aa][Cc]3",
+ "[Dd][Tt][Ss]",
+ }
+ local sources= {
+ "[Bb][Ll][Uu][Rr][Aa][Yy]",
+ "[Bb][Rr][Rr][Ii][Pp]",
+ "[Dd][Vv][Dd][Rr][Ii][Pp]",
+ "[Ww][Ee][Bb][Rr][Ii][Pp]",
+ "[Hh][Dd][Rr][Ii][Pp]",
+ "[Rr][Ee][Rr][Ii][Pp]",
+ }
+ local reses = {
+ "2160[Pp]",
+ "1440[Pp]",
+ "1080[Pp]",
+ "720[Pp]",
+ "480[Pp]",
+ "[Uu][Hh][Dd]",
+ "4[Kk]"
+ }
+ local series = {
+ '%d%d[Xx]%d%d',
+ }
+ local channels = {
+ '6[Cc][Hh]',
+ '[57]%.1',
+ }
+
+ name, attrs.vcodec = extract(name, vcodecs)
+ name, attrs.source = extract(name, sources)
+ name, attrs.res = extract(name, reses)
+ name, attrs.acodecs = extract(name, acodecs)
+ name, attrs.channel = extract(name, channels)
+
+ name, r = extract(name, series)
+ if r then
+ attrs.season = tonumber(r:match('%d+'))
+ attrs.episode = tonumber(r:match('%d+$'))
+ end
+
+ for y in name:gmatch('%d%d%d%d') do
+ year = tonumber(y)
+ if year > 1900 and year <= tonumber(os.date('%Y')) then
+ attrs.year = y
+ end
+ end
+ if attrs.year then
+ name = name:gsub(tostring(attrs.year), '')
+ end
+
+ return name, attrs
+end
+
+local build_low = function (name, attrs)
+ local low_attr, lows
+
+ lows = { 'SDH' }
+
+ low_attr = {}
+ for _, low in pairs(lows) do
+ low_attr[#low_attr + 1] = name:match(low)
+ name = name:gsub(low, '')
+ end
+
+ attrs = attrs or {}
+ if #low_attr > 0 then
+ attrs.low = low_attr
+ end
+
+ return name, attrs
+end
+
+local build_title = function (name, attrs)
+ attrs.title = {}
+ for w in name:gmatch('%w+') do
+ attrs.title[#attrs.title + 1] = w
+ end
+
+ if #attrs.title > 1 then
+ attrs.scene = attrs.title[#attrs.title]
+ attrs.title[#attrs.title] = nil
+ end
+
+ return attrs
+end
+
+local build = function (name)
+ local attrs = {}
+
+ name = build_dlim(name, attrs)
+ name = build_atom(name, attrs)
+ name = build_low(name, attrs)
+ build_title(name, attrs)
+
+ return attrs
+end
+
+local weigh = function (a1, a2)
+ local key_score, score
+
+ key_score = {
+ name = 10,
+ season = 10,
+ episode = 10,
+ source = 7,
+ scene = 5,
+ vcodec = 3,
+ acodec = 3,
+ rese = 2,
+ default = 1,
+ }
+
+ score = 0
+ for k, v in pairs(a1) do
+ if not a2[k] then
+ goto continue
+ end
+
+ if k == 'name' then
+ for _, name in pairs(v) do
+ if util.array_search(a2.name, name) then
+ score = score + key_score.name
+ end
+ end
+ else
+ if v == a2[k] then
+ score = score + (key_score[k] or key_score.default)
+ end
+ end
+
+ ::continue::
+ end
+
+ return score
+end
+
+local fuzzy = function (name, tab)
+ local name_attr, high, score
+
+ high = {
+ score = 0,
+ name = next(tab)
+ }
+
+ name_attr = build(name)
+ for k in pairs(tab) do
+ score = weigh(name_attr, build(k))
+ if score > high.score then
+ high.score = score
+ high.name = k
+ end
+ end
+
+ return tab[high.name]
+end
+
+return {
+ build = build,
+ fuzzy = fuzzy,
+}
diff --git a/lib/util.lua b/lib/util.lua
index d52922d..6263507 100644
--- a/lib/util.lua
+++ b/lib/util.lua
@@ -70,6 +70,16 @@ local array_merge = function (a1, a2)
return a
end
+local array_search = function (a, key)
+ for _, v in pairs(a) do
+ if v == key then
+ return true
+ end
+ end
+
+ return false
+end
+
local table_print = function (t)
for k, v in pairs(t) do
print( '|'.. k .. '=' .. v .. '|')
@@ -186,6 +196,7 @@ return {
table_print = table_print,
table_match_or_any = table_match_or_any,
array_merge = array_merge,
+ array_search = array_search,
zip_ext_first = zip_ext_first,
string_vid_path_to_name = string_vid_path_to_name,
opensubtitles_hash = opensubtitles_hash,
diff --git a/main.lua b/main.lua
index f54c673..0f62464 100644
--- a/main.lua
+++ b/main.lua
@@ -42,7 +42,7 @@ local sub_needed = function ()
end
local sub_setup = function ()
- local out, name, path, rc
+ local out, name, path, rc, filesize
mp.osd_message('fetching subtitle')
@@ -61,8 +61,12 @@ local sub_setup = function ()
if not util.file_exists(path) then
name = mp.get_property_native('media-title')
end
+ filesize = mp.get_property_native('file-size')
- rc = opensubtitles.search(path, out, name)
+ rc = opensubtitles.search(path, out, {
+ name = name,
+ filesize = filesize
+ })
if not rc then
rc = subscene.search(path, out, name)
end
diff --git a/server/opensubtitles.lua b/server/opensubtitles.lua
index b797f44..f0c6195 100644
--- a/server/opensubtitles.lua
+++ b/server/opensubtitles.lua
@@ -2,6 +2,7 @@
local curl = require 'lib/curl'
local util = require 'lib/util'
+local attr = require 'lib/attr'
-- [[ languages supported by opensubtitles ]] --
local languages = {
@@ -134,14 +135,94 @@ local search_ohash = function (ohash)
end
end
-local search = function (path, out, name)
- local ohash, link
+local ids_fetch = function (page)
+ local iter, no_name, line, id, name, tab
+
+ tab = {}
+ no_name = 0
+ iter = page:gmatch('[^\n\r]+')
+ while true do
+ line = iter()
+ if not line then
+ break
+ end
+
+ id = line:match('/en/subtitles/%d*')
+ if id then
+ id = id:match('%d+$')
+
+ line = iter() -- movie
+ if line:find('%.%.%.$') then
+ -- name cuts off...
+ name = line:gsub('"[^"]*$', '')
+ name = name:match('[^"]+$')
+ else
+ name = line:gsub('<br/><a rel.*$', '')
+ name = name:match('[^>]+$')
+ end
+
+ if not name then
+ line = iter()
+
+ if line:find('^%[S%d%dE%d%d%]$') then
+ -- it's a series
+ line = iter()
+ if line:find('%.%.%.$') then
+ name = line:gsub('^.*title="', '')
+ name = name:match('[^"]+')
+ else
+ name = line:match('[^<]+')
+ end
+ else
+ -- no name
+ name = tostring(no_name)
+ no_name = no_name + 1
+ end
+ end
+
+ tab[name] = id
+ end
+ end
+
+ return tab
+end
+
+local search_filesize = function (filesize, name)
+ local fetch, hcode, url, id, a
+
+ a = attr.build(name)
+
+ url = domain .. '/en' .. '/search/sublanguageid-' .. languages[language]
+ if a.season and a.episode then
+ url = url .. '/season-' .. a.season .. '/episode-' .. a.episode
+ end
+ url = url .. '/moviebytesize-' .. filesize
+
+ fetch, hcode = curl.get(url, nil, nil, tries)
+ if not hcode then
+ return nil
+ end
+
+ print(url)
+ util.table_print(ids_fetch(fetch))
+ id = attr.fuzzy(name, ids_fetch(fetch))
+ if id then
+ print(domain .. '/en/subtitleserve/sub/' .. id)
+ return domain .. '/en/subtitleserve/sub/' .. id
+ end
+end
+
+local search = function (path, out, info)
+ local ohash, link, name
if util.file_exists(path) then
ohash = util.opensubtitles_hash(path)
link = search_ohash(ohash)
- else
- name = name or util.string_vid_path_to_name(path)
+ end
+
+ if not link then
+ name = info.name or util.string_vid_path_to_name(path)
+ link = search_filesize(info.filesize, name)
end
if link then
@@ -151,5 +232,6 @@ end
return {
search_ohash = search_ohash,
+ search_filesize = search_filesize,
search = search
}