Twitterから地デジの録画予約をできるようにする

仕様

自分のpostから検索ワードを抜き出してテレビ王国からiepg取得(goo番組表はBANされます)
crontab設定ファイルを生成、crontabでfriioの録画コマンドを実行

問題点

文字コードの処理がうまくできずに力業
rssの処理もうまくできずに力業

使わせてもらいます

「SREngine: Sein blog: 【Python】 httpgetの実装」http://blog.srengine.com/2008/01/python-httpget.html
「数値文字参照から文字へ変換 - Python - Snipplr」http://snipplr.com/view.php?codeview&id=11344

recfriio.py

  1 #!/usr/bin/python
  2 # vim:fileencoding=utf-8
  3 
  4 import time
  5 import datetime
  6 import re
  7 import urllib
  8 import urllib2
  9 import twitter
 10 from BeautifulSoup import BeautifulSoup
 11 import htmlentitydefs
 12 
 13 #urlopenモジュール
 14 def httpget(address, user_agent='myagent'):
 15     opener = urllib2.build_opener()
 16     opener.addheaders = [('User-agent', user_agent)]
 17     doc = opener.open(address).read()
 18     return doc
 19 
 20 def htmlentity2unicode(text):
 21     # 正規表現のコンパイル
 22     reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE)
 23     num16_regex = re.compile(u'#x\d+', re.IGNORECASE)
 24     num10_regex = re.compile(u'#\d+', re.IGNORECASE)
 25 
 26     result = u''
 27     i = 0
 28     while True:
 29         # 実体参照 or 文字参照を見つける
 30         match = reference_regex.search(text, i)
 31         if match is None:
 32             result += text[i:]
 33             break
 34 
 35         result += text[i:match.start()]
 36         i = match.end()
 37         name = match.group(1)
 38 
 39         # 実体参照
 40         if name in htmlentitydefs.name2codepoint.keys():
 41             result += unichr(htmlentitydefs.name2codepoint[name])
 42         # 文字参照
 43         elif num16_regex.match(name):
 44             # 16進数
 45             result += unichr(int(u'0'+name[1:], 16))
 46         elif num10_regex.match(name):
 47             # 10進数
 48             result += unichr(int(name[1:]))
 49 
 50     return result
 51 
 52 def channel(c):
 53     ch = {'DFS00400':'27', 'DFS00408':'26', 'DFS00410':'25', 'DFS00418':'22', 'DFS00420':'21', 'DFS00428':    '24', 'DFS00420':'23', 'DFS05C38':'20', 'DFS00440':'28'}
 54    
 55     return ch[c]
 56 
 57 def getIepg(h):
 58     global crontab
 59     global tv
 60 
 61     uri = re.search('<item rdf:about="http://tv.so-net.ne.jp/schedule/(.*?)\.', h).group(1)
 62     iepg = httpget('http://tv.so-net.ne.jp/iepg.tvpid?id=' + uri)
 63     elements  = iepg.decode('shift_jis')
 64     title = re.search('title: (.*?)\n', elements).group(1)
 65     sub = re.search('subtitle: (.*?)\n', elements)
 66     month = re.search('month: (.*?)\n', elements).group(1)
 67     date = re.search('date: (.*?)\n', elements).group(1)
 68     start_h = re.search('start: (.*?):', elements).group(1)
 69     start_m = re.search('start: ..:(.*?)\n', elements).group(1)
 70     end_h = re.search('end: (.*?):', elements).group(1)
 71     end_m = re.search('end: ..:(.*?)\n', elements).group(1)
 72     station = re.search('station: (.*?)\n', elements).group(1)
 73 
 74     subtitle = ''
 75     if sub != None:
 76         subtitle = sub.group(1)
 77 
 78     hours = str(datetime.datetime(2009,1,1,int(end_h),int(end_m)) - datetime.datetime(2009,1,1,int(start_h    ),int(start_m)))
 79     minutes = int(re.search('([0-9]+?):', hours).group(1))*3600
 80     seconds = int(re.search('.*?:(.*?):', hours).group(1))*60
 81 
 82     tv = title[:-1]
 83 
 84     cron = start_m[:-1] + ' ' + start_h + ' ' + date[:-1] + ' ' + month[:-1] + ' * /Applications/recfriio-018/recfriio -c ' + channel(station[:-1]) + ' -s ' + str(minutes + seconds) + ' -f /Volumes/HitachiNov09n1    /friio/'  + title[:-1] + subtitle[:-1] + '.ts\n'
 85     crontab += cron
 86 
 87 def search(keyword):
 88     global tv
 89     global log
 90     rss = httpget('http://tv.so-net.ne.jp/rss/schedulesBySearch.action?stationPlatformId=1&condition.keywo    rd=' + keyword)
 91     l = len(re.split('<rdf:li', rss))
 92     if l == 1:
 93         log += '1' + '/'
 94     else:
 95         getIepg(rss)
 96         log += tv + '/'
 97 
 98 def main():
 99     crontab = ''   #crontab設定ファイル
100     key = []       #検索用キーワード
101     tv = ''        #録画タイトル
102     log = 'LOG:'       #post用ログ
103 
104     file = open('recfriio_status_id.txt', 'r')
105     status_id = file.readline()
106 
107     xml = httpget('http://twitter.com/statuses/user_timeline/n000dle.xml?since_id=' + status_id)
108     soup = BeautifulSoup(xml)
109     tweets = soup.findAll('text')
110     id = soup.findAll('id')
111 
112     if id:
113         status_id = re.search('<id>(.*?)</id>', str(id[0])).group(1)
114         for s in tweets:
115             match = re.search(u'<text>(.*?)&#37682;&#30011;', s.encode('utf-8'))
116             if match != None:
117                 key.append(match.group(1))
118 
119     if key:
120         for k in key:
121             g = htmlentity2unicode(k)
122             search(urllib.quote(g.encode('utf-8','replace').replace('?', ' ')))
123 
124         api = twitter.Api('      ','      ')
125         status = api.PostUpdate(log)
126 
127         q = ''
128         for line in open('.cronpython','r'):
129             print line
130             if re.search('\.ts', line):
131                 q += line
132         file.close()
133
134         file = open('.cronpython', 'w')
135         file.write('*/10 * * * * python recfriio.py\n' + q + crontab.encode('utf-8') 
    + '* * * * * crontab .cronpython\n')
136         file.close()
137 
138     file = open('recfriio_status_id.txt', 'w')
139     file.write(status_id)
140     file.close()
141 
142 if __name__ == '__main__':
143     main()
144