Twitterから地デジの録画予約をできるようにする
使わせてもらいます
「SREngine: Sein blog: 【Python】 httpgetの実装」http://blog.srengine.com/2008/01/python-httpget.html
「数値文字参照から文字へ変換 - Python - Snipplr」http://snipplr.com/view.php?codeview&id=11344
recfriio.py
1 #!/usr/bin/python 2 # vim:fileencoding=utf-8 3 4 import time 5 import datetime 6 import re 7 import urllib 8 import urllib2 9 import twitter 10 from BeautifulSoup import BeautifulSoup 11 import htmlentitydefs 12 13 #urlopenモジュール 14 def httpget(address, user_agent='myagent'): 15 opener = urllib2.build_opener() 16 opener.addheaders = [('User-agent', user_agent)] 17 doc = opener.open(address).read() 18 return doc 19 20 def htmlentity2unicode(text): 21 # 正規表現のコンパイル 22 reference_regex = re.compile(u'&(#x?[0-9a-f]+|[a-z]+);', re.IGNORECASE) 23 num16_regex = re.compile(u'#x\d+', re.IGNORECASE) 24 num10_regex = re.compile(u'#\d+', re.IGNORECASE) 25 26 result = u'' 27 i = 0 28 while True: 29 # 実体参照 or 文字参照を見つける 30 match = reference_regex.search(text, i) 31 if match is None: 32 result += text[i:] 33 break 34 35 result += text[i:match.start()] 36 i = match.end() 37 name = match.group(1) 38 39 # 実体参照 40 if name in htmlentitydefs.name2codepoint.keys(): 41 result += unichr(htmlentitydefs.name2codepoint[name]) 42 # 文字参照 43 elif num16_regex.match(name): 44 # 16進数 45 result += unichr(int(u'0'+name[1:], 16)) 46 elif num10_regex.match(name): 47 # 10進数 48 result += unichr(int(name[1:])) 49 50 return result 51 52 def channel(c): 53 ch = {'DFS00400':'27', 'DFS00408':'26', 'DFS00410':'25', 'DFS00418':'22', 'DFS00420':'21', 'DFS00428': '24', 'DFS00420':'23', 'DFS05C38':'20', 'DFS00440':'28'} 54 55 return ch[c] 56 57 def getIepg(h): 58 global crontab 59 global tv 60 61 uri = re.search('<item rdf:about="http://tv.so-net.ne.jp/schedule/(.*?)\.', h).group(1) 62 iepg = httpget('http://tv.so-net.ne.jp/iepg.tvpid?id=' + uri) 63 elements = iepg.decode('shift_jis') 64 title = re.search('title: (.*?)\n', elements).group(1) 65 sub = re.search('subtitle: (.*?)\n', elements) 66 month = re.search('month: (.*?)\n', elements).group(1) 67 date = re.search('date: (.*?)\n', elements).group(1) 68 start_h = re.search('start: (.*?):', elements).group(1) 69 start_m = re.search('start: ..:(.*?)\n', elements).group(1) 70 end_h = re.search('end: (.*?):', elements).group(1) 71 end_m = re.search('end: ..:(.*?)\n', elements).group(1) 72 station = re.search('station: (.*?)\n', elements).group(1) 73 74 subtitle = '' 75 if sub != None: 76 subtitle = sub.group(1) 77 78 hours = str(datetime.datetime(2009,1,1,int(end_h),int(end_m)) - datetime.datetime(2009,1,1,int(start_h ),int(start_m))) 79 minutes = int(re.search('([0-9]+?):', hours).group(1))*3600 80 seconds = int(re.search('.*?:(.*?):', hours).group(1))*60 81 82 tv = title[:-1] 83 84 cron = start_m[:-1] + ' ' + start_h + ' ' + date[:-1] + ' ' + month[:-1] + ' * /Applications/recfriio-018/recfriio -c ' + channel(station[:-1]) + ' -s ' + str(minutes + seconds) + ' -f /Volumes/HitachiNov09n1 /friio/' + title[:-1] + subtitle[:-1] + '.ts\n' 85 crontab += cron 86 87 def search(keyword): 88 global tv 89 global log 90 rss = httpget('http://tv.so-net.ne.jp/rss/schedulesBySearch.action?stationPlatformId=1&condition.keywo rd=' + keyword) 91 l = len(re.split('<rdf:li', rss)) 92 if l == 1: 93 log += '1' + '/' 94 else: 95 getIepg(rss) 96 log += tv + '/' 97 98 def main(): 99 crontab = '' #crontab設定ファイル 100 key = [] #検索用キーワード 101 tv = '' #録画タイトル 102 log = 'LOG:' #post用ログ 103 104 file = open('recfriio_status_id.txt', 'r') 105 status_id = file.readline() 106 107 xml = httpget('http://twitter.com/statuses/user_timeline/n000dle.xml?since_id=' + status_id) 108 soup = BeautifulSoup(xml) 109 tweets = soup.findAll('text') 110 id = soup.findAll('id') 111 112 if id: 113 status_id = re.search('<id>(.*?)</id>', str(id[0])).group(1) 114 for s in tweets: 115 match = re.search(u'<text>(.*?)録画', s.encode('utf-8')) 116 if match != None: 117 key.append(match.group(1)) 118 119 if key: 120 for k in key: 121 g = htmlentity2unicode(k) 122 search(urllib.quote(g.encode('utf-8','replace').replace('?', ' '))) 123 124 api = twitter.Api(' ',' ') 125 status = api.PostUpdate(log) 126 127 q = '' 128 for line in open('.cronpython','r'): 129 print line 130 if re.search('\.ts', line): 131 q += line 132 file.close() 133 134 file = open('.cronpython', 'w') 135 file.write('*/10 * * * * python recfriio.py\n' + q + crontab.encode('utf-8') + '* * * * * crontab .cronpython\n') 136 file.close() 137 138 file = open('recfriio_status_id.txt', 'w') 139 file.write(status_id) 140 file.close() 141 142 if __name__ == '__main__': 143 main() 144