Hey everyone,
I stumbled on this thread and really like the idea of being able to auto download movies. I've tried using the most recent script that I can find here (from pilGrim), but I'm running into errors.
I know nothing about programming, so I'm hoping I can find some help here.
Here is the output that I am getting:
Code: Select all
218 existing movies
watching for 0 movies
2010-06-24 22:22:15: xxxxxFILENAME1xxxx
No IMDB link - Skipping
2010-06-24 22:22:15: xxxxFILENAME2xxxxx
Traceback (most recent call last):
File "/home/htpc/movie_script/movie_script_RT.py", line 158, in <module>
optimized_movie_title=optimized_movie_title.group()
AttributeError: 'NoneType' object has no attribute 'group'
Here is the exact code that I used after making my modifications:
Code: Select all
import urllib
import xml.dom.minidom
import re
import time
import os
import sys
##########
# MOVIES #
##########
#This script will scan a NZBS.org movie feed and test for movie score, year and if you currently have the movie, if the movie is in the process of a dl
#If conditions are true the script will feed the movie to SABnzbd for download.
#Currently set to download IMDB Score of > 75 with > 1,000 votes or Rotton Tomato Score > 75, year is set to > 1995
#You can change these settings in line 109, 185 & 195
#What you need to run: SABnzbd & Python 2.6. Once installed you can set the script to run on a schedule or just double click.
#A good debug python tool like ActiveState Komodo is helpful
#The script works with NZBS.org news feeds. Create a NZBS.org feed and then use feedburner to process your feed to ensure all works properly.
#You can find scripts that work with newzbin.com here: http://forums.sabnzbd.org/index.php?topic=1335.0
#The most complete TV Script can be found here: http://forums.sabnzbd.org/index.php?topic=1786.0
#This script is a blend of features found in preveously posted scripts. Many thanks to all original posters who conrtibuted parts to the feature set:
#popcornarsonist, xxhds, binhex, doubledrat, Cuchulainn
#Maintained by pilGrim
#Version 1.1
################################### change to your own values #####################################
# set this to the location of your downloads. the first entry must be your live download area
movie_dirs= ["/media/PRORAID_/Movies"]
# a directory that will hold string to match against movies that you want no matter what score they get
watch_dir="/media/PRORAID_/Watched"
# uncomment the following if you don't want to search for a particular set of movies
#watch_dir=""
# your SABNZB address
my_SAB="http://localhost:8080"
# the RSS link to your search
url="http://nzbs.org/rss.php?catid=4&i=xxxxx&h=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx&dl=1&num=100"
################################### change to your own values #####################################
# Search IMDB for movie, bases on the script by Switch
class IMDB:
def __init__(self, config):
# Credit for these expressions go to XBMC.org
self.re = re.compile('><a href="/title/([t0-9]*)/[^>]*>([^<]*)</a> *\(([0-9]*)')
self.re_alt1 = re.compile('<title>([^\(]*)\((\d{4})')
self.re_alt2 = re.compile('href="/title/tt([0-9]*)/fullcredits"')
self.url = 'http://akas.imdb.com/find?s=ttl&q=%s'
self.config = config
def _search(self, data):
''' Executes imdb search and returns first result, if present, in the form of ttID, name, year '''
match = self._match_search_page(data)
if not match:
match = self._match_redirect(data)
return match
def _match_search_page(self, data):
matches = re.findall(self.re, data)
if matches:
return matches[0]
return matches
def _match_redirect(self, data):
m1 = re.search(self.re_alt1, data)
m2 = re.search(self.re_alt2, data)
if m1 and m2:
return (m2.group(1), m1.group(1), m1.group(2))
else:
return '','',''
movie_list = []
for dir in movie_dirs:
for movie in os.listdir(dir):
movie_list.append(movie)
print str(len(movie_list))+" existing movies"
watch_list = []
if len(watch_dir)>0:
watch_list = os.listdir(watch_dir)
print "watching for "+str(len(watch_list))+" movies"
def removeIllegalChars(origstring):
if sys.platform == "win32":
newstring=re.sub("[\\?,]", ".", origstring)
newstring=re.sub("[:]", ";", newstring)
else:
newstring=re.sub("[/?,]", ".", origstring)
# print origstring+" becomes "+newstring
return newstring
def isGoodMovie(imdb_html):
#This is set up to only get movies above an 8.0, with over 1000 votes
if re.compile("<b>[[7]\.[6-9]|[8-9]\.[0-9]]/10</b>").search(imdb_html) and re.compile(">[0-9]*,[0-9][0-9][0-9] votes<").search(imdb_html):
return 1
else:
return 0
def rottenLink2(name):
tomato_link="http://www.rottentomatoes.com/search/full_search.php?search="+name
sock = urllib.urlopen(tomato_link)
htmlSource = sock.read()
sock.close()
rotten_link2 = re.compile("href=\"\/m\/.+?\>").search(htmlSource)
if rotten_link2==None:
return "error"
else:
rotten_link2= re.sub("href=\"","",rotten_link2.group())
rotten_link2= re.sub("\>","",rotten_link2)
rotten_link2="http://www.rottentomatoes.com"+rotten_link2
return rotten_link2
dom=xml.dom.minidom.parse(urllib.urlopen(url))
# go through each entry returned from the above newzbin search
for node in dom.getElementsByTagName("item"):
report_title=node.getElementsByTagName("title")[0].childNodes[0].data
report_title = re.sub("\;","\:",report_title)
report_title = re.sub("\s","\:",report_title)
legal_movie_title = removeIllegalChars(report_title)
print ""
print time.strftime("%Y-%m-%d %H:%M:%S")+": "+ report_title
report_description=node.getElementsByTagName("description")[0].childNodes[0].data
movie_desc = re.search("IMDB:.+\/\"\>", report_description)
if movie_desc:
movie_link = re.search("http.+\/",movie_desc.group())
imdb_link=movie_link.group()
# print(imdb_link)
sock = urllib.urlopen(imdb_link)
htmlSource = sock.read()
sock.close()
# get the movie code from the imdb_link
i = IMDB({})
imdb_code, name, year = i._search(htmlSource)
imdb_rating=re.compile("<b>?/10</b>").search(htmlSource)
optimized_movie_title=re.compile(".+[^\s]").search(name)
optimized_movie_title=optimized_movie_title.group()
optimized_movie_title=re.sub("\s"," ",optimized_movie_title)
optimized_movie_title=re.sub(":","",optimized_movie_title)
optimized_movie_title=re.sub(",","",optimized_movie_title)
rotten_link="http://www.rottentomatoes.com/alias/?type=imdbid&s="+imdb_code
rotten_link2=rottenLink2(name)
sock=urllib.urlopen(rotten_link)
rotten_source=sock.read()
sock.close()
rotten_rating=re.compile("<span.+?percent.+?>.+?<").search(rotten_source)
if (rotten_rating==None):
if (rotten_link2<>"error"):
sock=urllib.urlopen(rotten_link2)
rotten_source=sock.read()
sock.close()
rotten_rating=re.compile("<span.+?percent.+?>.+?<").search(rotten_source)
if (rotten_rating==None):
rotten_rating="00"
else:
rotten_rating=re.sub(".+percent.+?\>","",rotten_rating.group())
rotten_rating=re.sub("\<","",rotten_rating)
print "Tomatometer: " + rotten_rating
print "IMDB rating OVER 75: ", isGoodMovie(htmlSource) == 1
if ((isGoodMovie(htmlSource) == 1) or (int(rotten_rating) > 75) ==1):
print "Good Rating? YES"
download = 1
else:
print "Good Rating? NO, Skipping"
download = 0
#Year check. Only download if newer than year set
if download == 1:
if int(year) > 1995:
download = 1
print "Newer than 1995? YES"
else:
download = 0
print "Newer than 1995? NO, Skipping"
# doesn't get a good imdb score, but am I watching for it?
if download == 0:
for watch_file in watch_list:
regex_watch = re.compile(".*"+watch_file+".*", re.IGNORECASE)
if re.match(regex_watch, optimized_movie_title):
print optimized_movie_title+" has a low score, but is one your watch list - you're the boss!"
download = 1
break
if download == 1:
# it's worth a download but do we have it already?
regex_movie_title_string = removeIllegalChars(optimized_movie_title)
regex_movie_title = re.compile(regex_movie_title_string)
#regex_movie_year = re.compile(".*\([0-9][0-9][0-9][0-9]\).*")
print "Do we have : \"" + regex_movie_title_string+"\"",
for movie_file in movie_list:
if re.match(regex_movie_title, movie_file):
print " - YES have it, Skipping"
download = 0
break
if download == 1:
print " - NO Download it"
#we want it, we don't have it, but is it in the queue already?
queue=xml.dom.minidom.parse(urllib.urlopen(my_SAB+"/sabnzbd/api?mode=qstatus&output=xml"))
for job in queue.getElementsByTagName("job"):
filename=job.getElementsByTagName("filename")[0].childNodes[0].data
if filename == report_title:
print "Already in queue - Skipping"
download = 0
if download == 1:
print time.strftime("%Y-%m-%d %H:%M:%S")+": ++++++ added "+ optimized_movie_title+" ++++++"
movie_list.append(legal_movie_title)
msgid=node.getElementsByTagName("link")[0].childNodes[0].data
msgid = urllib.quote(msgid)
urllib.urlopen(my_SAB+"/sabnzbd/api?mode=addurl&name="+msgid+"&cat=movies")
else:
print "No IMDB link - Skipping"
dom.unlink()
Any help would be appreciated.
Thanks!!!