Oct-23-2017, 04:50 PM
I recently discovered opensubtitle API. So here's a script that uses SubDB and OpenSubtitles to download subs.
I've also recycled my minor project as python scripts and project sharing hub
. Its a Django based website check it out.
ProjectPy
import argparse
import os
import hashlib
import urllib.request as request
import struct
import gzip
from xmlrpc.client import ServerProxy
VIDEO_FORMATS = ['.3g2', '.3gp', '.3gp2', '.3gpp', '.60d', '.ajp', '.asf', '.asx', '.avchd', '.avi', '.bik', '.bix',
'.box', '.cam','.divx', '.dmf', '.dv', '.dvr-ms', '.evo', '.flc', '.fli', '.flic', '.flv','.flx',
'.gvi', '.gvp', '.h264', '.m1v', '.m2p', '.m2ts', '.m2v', '.m4e', '.m4v', '.mjp', '.mjpeg',
'.mjpg', '.mkv', '.moov', '.mov', '.movhd', '.movie', '.movx', '.mp4', '.mpe', '.mpeg', '.mpg',
'.mpv', '.mpv2', '.mxf', '.nsv', '.nut', '.ogg', '.ogm', '.omf', '.ps', '.qt', '.ram', '.rm',
'.rmvb', '.swf', '.ts', '.vfw', '.vid', '.video', '.viv', '.vivo', '.vob', '.vro', '.wm', '.wmv',
'.wmx', '.wrap', '.wvx', '.wx', '.x264', '.xvid']
class SubDB:
""" API Link : https://goo.gl/n9B5J4
"""
def __init__(self):
self.BASEURL = "http://api.thesubdb.com/?action=download&hash="
self.HEADERS = {
'User-Agent': 'SubDB/1.0 (PPY/0.1; https://projectpy.ml/)',
}
def get_hash(self,name):
"""Originally from : https://goo.gl/n9B5J4
"""
readsize = 64 * 1024
with open(name, 'rb') as f:
size = os.path.getsize(name)
data = f.read(readsize)
f.seek(-readsize, os.SEEK_END)
data += f.read(readsize)
return hashlib.md5(data).hexdigest()
def download_subtitles(self,path,lang='en'):
"""
:param path: The video file path for which to download subs.
:param lang: Subtitle Language
:return:
"""
url = self.BASEURL + self.get_hash(path) + "&language=" + lang
req = request.Request(url, headers=self.HEADERS)
res = request.urlopen(req)
if res.getcode() == 200:
with open(os.path.splitext(path)[0] + '.srt', "wb") as sub_file:
sub_file.write(res.read())
print("[SubDB]Subtitles Found for: " + path)
@staticmethod
def get_lang():
"""
:return: List of available subtitle languages
"""
header = {
'User-Agent': 'SubDB/1.0 (PPY/0.1; https://projectpy.ml/)',
}
url = 'http://api.thesubdb.com/?action=languages'
req = request.Request(url, headers=header)
res = request.urlopen(req)
if res.getcode() == 200:
lang = res.read().decode('utf-8').split(',')
else:
# currently available
lang = ['en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ro', 'sv', 'tr']
return lang
class OpenSub:
""" Some Class Methods Are Copied From : https://goo.gl/7Yohyj
API Link : https://goo.gl/ZN8Wc1
"""
def __init__(self):
self.TAGS = ['bluray', 'cam', 'dvb', 'dvd', 'hd-dvd', 'hdtv', 'ppv', 'telecine', 'telesync', 'tv', 'vhs', 'vod',
'web-dl', 'webrip', 'workprint']
self.OPENSUBTITLES_SERVER = 'http://api.opensubtitles.org/xml-rpc'
self.USER_AGENT = 'OSTestUserAgentTemp' # Only for testing
self.xmlrpc = ServerProxy(self.OPENSUBTITLES_SERVER,
allow_none=True)
self.language = 'en'
self.token = None
self.user = '' # your username
self.passw = '' # your password
if self.login(self.user, self.passw):
print("Login Successful")
else:
print("OpenSubtitles login Failed. This could reduce the download limit")
def _get_from_data_or_none(self, key):
'''Return the key recieved from data if the status is 200,
otherwise return None.
'''
status = self.data.get('status').split()[0]
return self.data.get(key) if '200' == status else None
def login(self, username, password):
'''Returns token is login is ok, otherwise None.
'''
self.data = self.xmlrpc.LogIn(username, password,
self.language, self.USER_AGENT)
token = self._get_from_data_or_none('token')
if token:
self.token = token
return token
def logout(self):
'''Returns True if logout is ok, otherwise None.
'''
data = self.xmlrpc.LogOut(self.token)
return '200' in data.get('status')
def search_subtitles(self, params):
'''Returns a list with the subtitles info.
'''
self.data = self.xmlrpc.SearchSubtitles(self.token, params)
return self._get_from_data_or_none('data')
def download_subtitles(self, path,lang=''):
"""
:param path: The video file path for which to download subs.
:param lang: Subtitle Language
:return:
"""
payload = [self.create_payload(path,lang)]
search_result = self.search_subtitles(payload)
if search_result:
dllink = self.analyse_result(search_result)
gzfile = request.urlopen(dllink)
try:
with gzip.open(gzfile, 'rb') as f:
with open(os.path.splitext(path)[0] + '.srt', 'wb') as sub_file:
sub_file.write(f.read())
print("[OpenSubtitles]Subtitles Found for: " + path)
except PermissionError:
print("Permision Error: when creating subtitles for {}:".format(path))
def analyse_result(self, result):
"""
:param result: Search result to find appropriate subtitles
:return: Download Link of best match for subtitles
"""
score = 0
dllink = None
for record in result:
if record.get('Score', 0) > score:
score = record.get('Score', 0)
dllink = record.get('SubDownloadLink')
print(record.get('sublanguageid'))
return dllink
def get_tags(self, path):
"""
:param path: The video file path for which to download subs.
:return: tags on video
"""
name = os.path.basename(path).lower()
tags = []
for word in self.TAGS:
if word in name:
tags.append(word)
return tags
def create_payload(self, path, lang):
"""
:param path: The video file path for which to download subs.
:param lang: Subtitle Language
:return: Payload containing data about file
"""
payload = {}
payload['moviebytesize'] = str(os.path.getsize(path))
payload['sublanguageid'] = lang
tags = self.get_tags(path)
if tags:
payload['tags'] = ','.join(tags)
payload['moviehash'] = self.get_hash(path)
return payload
def get_hash(self, path):
'''Original from: http://goo.gl/qqfM0
'''
size = os.path.getsize(path)
longlongformat = 'q' # long long
bytesize = struct.calcsize(longlongformat)
try:
f = open(path, "rb")
except(IOError):
return "IOError"
hash = int(size)
if int(size) < 65536 * 2:
return "SizeError"
for x in range(65536 // bytesize):
buffer = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF # to remain as 64bit number
f.seek(max(0, int(size) - 65536), 0)
for x in range(65536 // bytesize):
buffer = f.read(bytesize)
(l_value,) = struct.unpack(longlongformat, buffer)
hash += l_value
hash = hash & 0xFFFFFFFFFFFFFFFF
f.close()
returnedhash = "%016x" % hash
return str(returnedhash)
def down_sub(pathlist,lang =''):
"""Driver function to find subtitles with SubDB and OpenSubtitle
"""
downloader_subdb = SubDB() # SUBDB
downloader_os = OpenSub() # OpenSubtitles
for path in pathlist:
if not os.path.exists(os.path.splitext(path)[0] + '.srt'):
try:
downloader_subdb.download_subtitles(path, lang)
except:
downloader_os.download_subtitles(path,lang)
def is_video(filepath):
ext = os.path.splitext(filepath)[1]
if ext in VIDEO_FORMATS:
return True
else:
return False
def recursive_search(directory, all_vids=[]):
""":param directory: Path of Directory to be searched recursively
:param all_vids: All video files
:return: Path string of all video files in a directory/subdirectory
"""
try:
for entry in os.scandir(directory):
if entry.is_dir():
all_vids + (recursive_search(entry.path))
elif entry.is_file():
if is_video(entry.path):
all_vids.append(entry.path)
except PermissionError as e:
print(e)
return all_vids
def main():
parser = argparse.ArgumentParser()
parser.add_argument('path', help='Path to the file or directory', type=str)
parser.add_argument('-l','--language', help='Languages of subtitle as per ISO 639-1 codes',choices=SubDB.get_lang())
args = parser.parse_args()
path = args.path
lang = 'en'
if args.language:
lang = args.language
if os.path.isdir(path):
down_sub(recursive_search(path),lang)
else:
if is_video(path):
down_sub([path],lang)
if __name__ == "__main__":
main()I've also recycled my minor project as python scripts and project sharing hub
. Its a Django based website check it out.ProjectPy
Reply
