00001 #!/usr/bin/env perl
00002 # @(#)$Header: /home/mythtv/mythtvrep/scripts/twit.tv.pl,v 1.32 2010/07/24 23:28:11 mythtv Exp $
00003 # Auric 2010/01/10 http://web.aanet.com.au/auric/
00004 #
00005 # MythNetvision Grabber Script for TWiT.tv site.
00006 #
00007 # If you want to alter any of the default settings.
00008 # Create/Change $HOME/.mythtv/MythNetvision/userGrabberPrefs/twit.tv.cfg
00009 # Format of file
00010 # player=mplayer
00011 # playerargs=-fs -zoom %MEDIAURL%
00012 #
00013 # Some settings you can have in this are
00014 # Print info/progress message: 0 - off, 1 - low ,2 - high
00015 # mnvinfo
00016 # Info messages go to: 0 = stderr, filename = filename
00017 # mnvinfoop
00018 # External player to use
00019 # player
00020 # Args to external player %MEDIAURL% will be replaced with content url
00021 # playerargs
00022 # External download to use
00023 # download
00024 # Args to external download %MEDIAURL% will be replaced with content url
00025 # downloadargs
00026 # A network player like a flash or html5 html. TODO 0.24 May not be approved
00027 # netplayer
00028 # Type flash or html5
00029 # netplayertype
00030 # Seconds to cache results Default 72000
00031 # cachetime
00032 #
00033 ################################################################################
00034 use strict;
00035 use warnings;
00036 use Getopt::Std;
00037 use LWP::Simple;
00038 use HTML::TreeBuilder;
00039 use HTML::Entities;
00040 use Data::Dumper;
00041 use Date::Parse;
00042 use Date::Format;
00043 use Encode;
00044 use Storable;
00045 use File::stat;
00046 use File::Basename;
00047 use FindBin '$Bin', '$Script';
00048 use lib "$Bin/nv_perl_libs";
00049 use mnvcommonsubs;
00050
00051 #################################### Settings #################################
00052 # Load from config file. May overwrite above.
00053 mnvloadconfig(fileparse($Script, '.pl'), "notused");
00054
00055 #################################### Globals ##################################
00056 my $version = '$Revision: 1.32 $'; $version =~ s/\D*([\d\.]+)\D*/$1/; # rcs tag populated
00057 my $command = "twit.tv.pl"; my $commandthumbnail = "twit.tv.png"; my $author = "Auric";
00058 my $site = 'TWiT.tv';
00059 my $description = 'Leo Laporte & Friends';
00060 my $baseurl = 'http://twit.tv';
00061 my $baseicon = 'http://twit.tv/sites/all/themes/twit/img/logo.gif';
00062 my $store = "/tmp/.${site}.diritemsref.store";
00063 our ($opt_v, $opt_T, $opt_p, $opt_S);
00064 my %diritems;
00065
00066 #################################### Site Specific Subs ##########################
00067 # Build all vid items for all directories
00068 # input hash ref to { "directory name" => [array of anonymous hash's] }
00069 # anonymous hash {
00070 # 'dirthumbnail' => $icon,
00071 # 'title' => $title,
00072 # 'mythtv:subtitle' => "",
00073 # 'author' => $author,
00074 # 'pubDate' => $pubDate,
00075 # 'description' => $description,
00076 # 'link' => $url,
00077 # 'player' => $player,
00078 # 'playerargs' => $playerargs,
00079 # 'download' => $download,
00080 # 'downloadargs' => $downloadargs,
00081 # 'media:thumbnailurl' => "",
00082 # 'media:contenturl' => $contenturl,
00083 # 'media:contentlength' => $length,
00084 # 'media:contentduration' => "",
00085 # 'media:contentwidth' => "",
00086 # 'media:contentheight' => "",
00087 # 'media:contentlanguage' => $language,
00088 # 'rating' => ""
00089 # 'mythtv:country' => ""
00090 # 'mythtv:season' => ""
00091 # 'mythtv:episode' => ""
00092 # 'mythtv:customhtml' => ""
00093 # }
00094 # Basically this hash ref is what you need to build.
00095 # input base url
00096 # output items found
00097 sub builddiritems {
00098 my $diritemsref = shift @_;
00099 my $baseurl = shift @_;
00100
00101 my $dirurlsref = builddirurls($baseurl);
00102 my $vidurlsref = buildvidurls($dirurlsref);
00103 my $itemsfound = 0;
00104 foreach my $dir (keys(%$vidurlsref)) {
00105 my $diritemsfound = 0;
00106 for (my $c = 0; $c <= $#{$vidurlsref->{$dir}}; $c++) {
00107 my $found = builditems($diritemsref, $dir, ${$vidurlsref->{$dir}}[$c]);
00108 $itemsfound += $found;
00109 $diritemsfound += $found;
00110 # Skip rest as nothing found so far. (To speed things up)
00111 if ($c > 0 && $diritemsfound == 0) {
00112 mnvinfomsg(2, "Skipping rest of $dir as nothing found so far");
00113 last;
00114 }
00115 }
00116 mnvinfomsg(1, "$dir Items found $diritemsfound");
00117 }
00118 return $itemsfound;
00119 }
00120
00121 # Collect url's of all the podcasts
00122 # input base url
00123 # return hash ref to { "directory name" => "url" }
00124 sub builddirurls {
00125 my $baseurl = shift@_;
00126
00127 my %dirurls;
00128
00129 mnvinfomsg(1, "Getting $baseurl");
00130 my $content = get($baseurl);
00131 unless ($content) {
00132 die "Could not retrieve $baseurl";
00133 }
00134 my $tree = HTML::TreeBuilder->new;
00135 eval { $tree->parse($content); };
00136 if ($@) {
00137 die "$baseurl parse failed, $@";
00138 }
00139 $tree->eof();
00140
00141 my @ptrs;
00142 my $tmp = $tree->look_down('class', 'leaf first');
00143 ($tmp) and push(@ptrs, $tmp);
00144 my @tmp = $tree->look_down('class', 'leaf');
00145 (@tmp) and push(@ptrs, @tmp);
00146 foreach my $ptr (@ptrs) {
00147 my @as = $ptr->find_by_tag_name('a');
00148 foreach my $a (@as) {
00149 my $dir = $a->as_trimmed_text();
00150 $dirurls{$dir} = mnvcleantext($baseurl.$a->attr('href'));
00151 }
00152 }
00153 #print STDERR Dumper(%dirurls);
00154 (keys(%dirurls)) or die "No urls found";
00155
00156 return \%dirurls;
00157 }
00158
00159 # Collect url's to all vids
00160 # input hash ref to { "directory name" => "url" }
00161 # return hash ref to { "directory name" => [url] }
00162 sub buildvidurls {
00163 my $dirurls = shift @_;
00164
00165 my %vidurls;
00166
00167 foreach my $dir (sort(keys(%$dirurls))) {
00168 mnvinfomsg(1, "Getting $dir $dirurls->{$dir}");
00169 my $content = get($dirurls->{$dir});
00170 unless ($content) {
00171 warn "Could not retrieve $dirurls->{$dir}";
00172 next;
00173 }
00174 my $tree = HTML::TreeBuilder->new;
00175 eval { $tree->parse($content); };
00176 if ($@) {
00177 warn "$dirurls->{$dir} parse failed, $@";
00178 next;
00179 }
00180 $tree->eof();
00181
00182 # Not used anywhere.
00183 #my $dirdesc;
00184 #my $ptr = $tree->look_down('class', 'podcast-description');
00185 #($ptr) and $dirdesc = $ptr->as_trimmed_text();
00186
00187 my @ptrs;
00188 my $tmp = $tree->look_down('class', 'podcast-number current');
00189 ($tmp) and push(@ptrs, $tmp);
00190 my @tmp = $tree->look_down('class', 'podcast-number');
00191 (@tmp) and push(@ptrs, @tmp);
00192 foreach my $urlp (@ptrs) {
00193 push(@{$vidurls{$dir}}, mnvcleantext($baseurl.$urlp->attr('href')));
00194 }
00195 }
00196 #print STDERR Dumper(%vidurls);
00197 return \%vidurls;
00198 }
00199
00200 # Build all items
00201 # input hash ref to { "directory name" => [array of anonymous hash's] }
00202 # input "directory name"
00203 # input url
00204 # output number of items added
00205 sub builditems {
00206 my $diritemsref = shift @_;
00207 my $dir = shift @_;
00208 my $url = shift @_;
00209
00210 mnvinfomsg(2, "Getting $dir Episode $url");
00211 my $content = get($url);
00212 unless ($content) {
00213 warn "Could not retrieve $url";
00214 return 0;
00215 }
00216 my $tree = HTML::TreeBuilder->new;
00217 eval { $tree->parse($content); };
00218 if ($@) {
00219 warn "$url parse failed, $@";
00220 return 0;
00221 }
00222 $tree->eof();
00223
00224 my @links;
00225 my @as = $tree->find_by_tag_name('a');
00226 foreach my $a (@as) {
00227 $a->as_trimmed_text() =~ /Download Video/ or next;
00228 $a->attr('href') =~ /^http:.*video.*mp4$/ and push(@links, mnvcleantext($a->attr('href')));
00229 }
00230 (@links) or return 0;
00231
00232 my $title = ""; my $pubDate = ""; my $desc = "";
00233 my @ptrs;
00234 my $tmp = $tree->look_down('class', 'podcast-number current');
00235 ($tmp) and push(@ptrs, $tmp);
00236 my @tmp = $tree->look_down('class', 'podcast-number');
00237 (@tmp) and push(@ptrs, @tmp);
00238 my $ptr;
00239 foreach my $tmp (@ptrs) {
00240 my $testurl = $tmp->attr('href');
00241 $url =~ /http:.*${testurl}/ and $ptr = $tmp and last;
00242 }
00243 if ($ptr) {
00244 $title = mnvcleantext($ptr->attr('title'));
00245 $ptr = $ptr->parent();
00246 my $ptr2 = $ptr->look_down('class', 'podcast-date');
00247 if ($ptr2) {
00248 my $time = str2time($ptr2->as_trimmed_text());
00249 $pubDate = time2str("%a, %d %b %Y 00:00:00 GMT", $time);
00250 $pubDate = mnvcleantext($pubDate);
00251 }
00252 $ptr2 = $ptr->find_by_tag_name('p');
00253 ($ptr2) and $desc = mnvcleantext($ptr2->as_trimmed_text());
00254 }
00255 ($title) or return 0;
00256
00257 my $icon = $baseicon;
00258 $ptr = $tree->look_down('class', 'imagecache imagecache-coverart');
00259 ($ptr) and $icon = mnvcleantext($ptr->attr('src'));
00260
00261 my $duration = "";
00262 $ptr = $tree->look_down('class', 'running-time');
00263 ($ptr) and my $tmpdur = $ptr->as_trimmed_text();
00264 if ($tmpdur =~ s/Running time:
00265 my $hours = 0; my $mins = 0; my $secs = 0;
00266 my $count = $tmpdur =~ s/(:)/$1/g;
00267 if ($count == 1) {
00268 ($mins, $secs) = split(':', $tmpdur);
00269 } elsif ($count == 2) {
00270 ($hours, $mins, $secs) = split(':', $tmpdur);
00271 } else {
00272 goto NODURATION;
00273 }
00274 $tmpdur = ($hours * 60 * 60) + ($mins * 60) + $secs;
00275 ($tmpdur > 0) and $duration = mnvcleantext($tmpdur);
00276 }
00277 NODURATION:
00278
00279 my $country = "";
00280
00281 my $count = 0;
00282 foreach my $contenturl (@links) {
00283 my ($width, $height, $titleresolution);
00284 $contenturl =~ /_(\d\d\d)x(\d\d\d)_/;
00285 if ($1 && $2) {
00286 $width = $1;
00287 $height = $2;
00288 }
00289 if (mnvgetconfig('resolution')) {
00290 $titleresolution = $title;
00291 if ((mnvgetconfig('resolution') eq "high") && ($width < 750)) {
00292 mnvinfomsg(1, "Skipping $contenturl due to wrong resolution");
00293 next;
00294 }
00295 if ((mnvgetconfig('resolution') eq "low") && ($width >= 750)) {
00296 mnvinfomsg(1, "Skipping $contenturl due to wrong resolution");
00297 next;
00298 }
00299 } else {
00300 $titleresolution = "$title (${width}x${height})";
00301 }
00302 my $link = $url;
00303 if ((mnvgetconfig('netplayer')) && ($contenturl)) {
00304 if (mnvistype(mnvgetconfig('netplayertype'), $contenturl)) {
00305 my $encodedtitle = decode_entities($title);
00306 $encodedtitle = mnvURLEncode($encodedtitle);
00307 $link = mnvcleantext(mnvgetconfig('netplayer')."?title=${encodedtitle}&videofile=").$contenturl;
00308 } else {
00309 mnvinfomsg(1, "Not ".mnvgetconfig('netplayertype')." $contenturl");
00310 }
00311 }
00312 push(@{$diritemsref->{$dir}}, {
00313 'dirthumbnail' => $icon,
00314 'title' => $titleresolution,
00315 'mythtv:subtitle' => "",
00316 'author' => "twit.tv",
00317 'pubDate' => $pubDate,
00318 'description' => $desc,
00319 'link' => $link,
00320 'player' => mnvgetconfig('player'),
00321 'playerargs' => mnvgetconfig('playerargs'),
00322 'download' => mnvgetconfig('download'),
00323 'downloadargs' => mnvgetconfig('downloadargs'),
00324 'media:thumbnailurl' => $icon,
00325 'media:contenturl' => $contenturl,
00326 'media:contentlength' => "",
00327 'media:contentduration' => $duration,
00328 'media:contentwidth' => $width,
00329 'media:contentheight' => $height,
00330 'media:contentlanguage' => "",
00331 'rating' => "",
00332 'mythtv:country' => $country,
00333 'mythtv:season' => "",
00334 'mythtv:episode' => "",
00335 'mythtv:customhtml' => "no"
00336 });
00337
00338 mnvinfomsg(2, "Added $title");
00339 $count ++;
00340 }
00341 return $count;
00342 }
00343
00344 #################################### Main #####################################
00345 # If you copy this for another site, hopefully these won't need to changed
00346 getopts('vtTp:S:');
00347
00348 if ($opt_v) {
00349 ($mnvcommonsubs::netvisionver == 23) and print "$site|TS\n";
00350 ($mnvcommonsubs::netvisionver > 23) and mnvprintversion($site, $command, $author, $commandthumbnail, $version, $description);
00351 exit 0;
00352 }
00353
00354 my $type; my $page = 1; my $search = "";
00355 if ($opt_T) {
00356 $type = "tree";
00357 } elsif ($opt_S) {
00358 $type = "search";
00359 $search = $opt_S;
00360 ($opt_p) and $page = $opt_p;
00361 } else {
00362 print STDERR "Must have -T or -S option\n";
00363 exit 1;
00364 }
00365
00366 $SIG{'INT'} = \&mnvcleanexit;
00367 $SIG{'HUP'} = \&mnvcleanexit;
00368 $SIG{'TERM'} = \&mnvcleanexit;
00369 $SIG{'QUIT'} = \&mnvcleanexit;
00370
00371 my $diritemsref = \%diritems;
00372 my $totalitems = 0; my $filtereditems = 0;
00373 my $ss = stat($store);
00374 if (($ss) && (time() - $ss->mtime) < mnvgetconfig('cachetime')) {
00375 eval { $diritemsref = retrieve($store); };
00376 if ($@) {
00377 die "Could not load store, $@";
00378 }
00379 $totalitems = mnvnumresults($diritemsref);
00380 mnvinfomsg(1, "Using previous run data");
00381 } else {
00382 $totalitems = builddiritems($diritemsref, $baseurl);
00383 eval { store($diritemsref, $store); };
00384 if ($@) {
00385 warn "Could not save store, $@";
00386 }
00387 }
00388
00389 mnvrssheader();
00390 print '<channel>
00391 <title>'.$site.'</title>
00392 <link>'.$baseurl.'</link>
00393 <description>'.$description.'</description>'."\n";
00394 if ($type eq "search") {
00395 $filtereditems = mnvfilter($diritemsref, $search);
00396 mnvprintsearch($diritemsref, $page);
00397 mnvinfomsg(1, "Total Items match $filtereditems of $totalitems");
00398 } else {
00399 mnvprinttree($diritemsref, 4);
00400 mnvinfomsg(1, "Total Items found $totalitems");
00401 }
00402 print "</channel>\n";
00403 mnvrssfooter();
00404
00405 mnvcleanexit 0;