00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 __title__ ="revision3XSL_api - XPath and XSLT functions for the www.revision3L.com RSS/HTML"
00016 __author__="R.D. Vaughan"
00017 __purpose__='''
00018 This python script is intended to perform a variety of utility functions
00019 for the conversion of data to the MNV standard RSS output format.
00020 See this link for the specifications:
00021 http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format
00022 '''
00023
00024 __version__="v0.1.1"
00025
00026
00027
00028
00029 __xpathClassList__ = ['xpathFunctions', ]
00030
00031
00032
00033 __xsltExtentionList__ = []
00034
00035 import os, sys, re, time, datetime, shutil, urllib, string
00036 from copy import deepcopy
00037
00038
00039 class OutStreamEncoder(object):
00040 """Wraps a stream with an encoder"""
00041 def __init__(self, outstream, encoding=None):
00042 self.out = outstream
00043 if not encoding:
00044 self.encoding = sys.getfilesystemencoding()
00045 else:
00046 self.encoding = encoding
00047
00048 def write(self, obj):
00049 """Wraps the output stream, encoding Unicode strings with the specified encoding"""
00050 if isinstance(obj, unicode):
00051 try:
00052 self.out.write(obj.encode(self.encoding))
00053 except IOError:
00054 pass
00055 else:
00056 try:
00057 self.out.write(obj)
00058 except IOError:
00059 pass
00060
00061 def __getattr__(self, attr):
00062 """Delegate everything but write to the stream"""
00063 return getattr(self.out, attr)
00064 sys.stdout = OutStreamEncoder(sys.stdout, 'utf8')
00065 sys.stderr = OutStreamEncoder(sys.stderr, 'utf8')
00066
00067 try:
00068 from StringIO import StringIO
00069 from lxml import etree
00070 except Exception, e:
00071 sys.stderr.write(u'\n! Error - Importing the "lxml" and "StringIO" python libraries failed on error(%s)\n' % e)
00072 sys.exit(1)
00073
00074
00075
00076
00077
00078 version = ''
00079 for digit in etree.LIBXML_VERSION:
00080 version+=str(digit)+'.'
00081 version = version[:-1]
00082 if version < '2.7.2':
00083 sys.stderr.write(u'''
00084 ! Error - The installed version of the "lxml" python library "libxml" version is too old.
00085 At least "libxml" version 2.7.2 must be installed. Your version is (%s).
00086 ''' % version)
00087 sys.exit(1)
00088
00089
00090 class xpathFunctions(object):
00091 """Functions specific extending XPath
00092 """
00093 def __init__(self):
00094 self.functList = ['revision3LinkGeneration', 'revision3Episode', 'revision3checkIfDBItem', ]
00095 self.episodeRegex = [
00096 re.compile(u'''^.+?\\-\\-(?P<episodeno>[0-9]+)\\-\\-.*$''', re.UNICODE),
00097 ]
00098 self.namespaces = {
00099 'atom': "http://www.w3.org/2005/Atom",
00100 'media': "http://search.yahoo.com/mrss/",
00101 'itunes':"http://www.itunes.com/dtds/podcast-1.0.dtd",
00102 'xhtml': "http://www.w3.org/1999/xhtml",
00103 'mythtv': "http://www.mythtv.org/wiki/MythNetvision_Grabber_Script_Format",
00104 'cnettv': "http://cnettv.com/mrss/",
00105 'creativeCommons': "http://backend.userland.com/creativeCommonsRssModule",
00106 'amp': "http://www.adobe.com/amp/1.0",
00107 'content': "http://purl.org/rss/1.0/modules/content/",
00108 }
00109 self.mediaIdFilters = [
00110 [etree.XPath('//object/@id', namespaces=self.namespaces ), None],
00111 ]
00112 self.FullScreen = u'http://revision3.com/show/popupPlayer?video_id=%s&quality=high&offset=0'
00113 self.FullScreenParser = common.parsers['html'].copy()
00114
00115
00116
00117
00118
00119
00120
00121
00122 def revision3LinkGeneration(self, context, *arg):
00123 '''Generate a link for the video.
00124 Call example: 'mnvXpath:revision3LinkGeneration(string(link))'
00125 return the url link
00126 '''
00127 webURL = arg[0]
00128 try:
00129 tmpHTML = etree.parse(webURL, self.FullScreenParser)
00130 except Exception, errmsg:
00131 sys.stderr.write(u"Error reading url(%s) error(%s)\n" % (webURL, errmsg))
00132 return webURL
00133
00134 for index in range(len(self.mediaIdFilters)):
00135 mediaId = self.mediaIdFilters[index][0](tmpHTML)
00136 if not len(mediaId):
00137 continue
00138 if self.mediaIdFilters[index][1]:
00139 match = self.mediaIdFilters[index][1].match(mediaId[0])
00140 if match:
00141 videocode = match.groups()
00142 return self.FullScreen % (videocode[0])
00143 else:
00144 return self.FullScreen % (mediaId[0].strip().replace(u'player-', u''))
00145 else:
00146 return webURL
00147
00148
00149 def revision3Episode(self, context, *arg):
00150 '''Parse the download link and extract an episode number
00151 Call example: 'mnvXpath:revision3Episode(.)'
00152 return the a massaged title element and an episode element in an array
00153 '''
00154 title = arg[0][0].find('title').text
00155 link = arg[0][0].find('enclosure').attrib['url']
00156
00157 episodeNumber = u''
00158 for index in range(len(self.episodeRegex)):
00159 match = self.episodeRegex[index].match(link)
00160 if match:
00161 episodeNumber = int(match.groups()[0])
00162 break
00163 titleElement = etree.XML(u"<xml></xml>")
00164 etree.SubElement(titleElement, "title").text = u'Ep%03d: %s' % (episodeNumber, title)
00165 if episodeNumber:
00166 etree.SubElement(titleElement, "episode").text = u'%s' % episodeNumber
00167 return [titleElement]
00168
00169
00170 def revision3checkIfDBItem(self, context, arg):
00171 '''Use a unique key value pairing to find out if the 'internetcontentarticles' table already
00172 has a matching item. This is done to save accessing the Internet when not required.
00173 Call example: 'mnvXpath:revision3checkIfDBItem(.)'
00174 return True if a match was found
00175 return False if a match was not found
00176 '''
00177 return common.checkIfDBItem('dummy', {'title': self.revision3Episode(context, arg)[0].find('title').text, })
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196