「「2008年下半期ライトノベルサイト杯」杯」作成用スクリプト
の順位作るのに作った Python スクリプト公開しておきます。
(Python 2.5)
from __future__ import division import urllib BASEURL = 'http://ippo.dip.jp/lightnovel/lnsite2008last/vote/' def readbooks(baseurl): url = baseurl + 'data_book_infos' print 'read:', url f = urllib.urlopen(url) books = {} for i in unicode(f.read(), 'utf').splitlines(): d = dict(zip( ['code', 'isbn', 'title', 'author', 'illustrator', 'label', 'type', 'date', 'dummy1', 'price', 'img', 'amazon', 'width', 'high', 'url', 'dummy2', 'vote_count'] ,i.split('\t') + [0])) books[d['code']] = d f.close() return books books = readbooks(BASEURL) def readvotes(baseurl, books): url = baseurl + 'data_votes' print 'read:', url f = urllib.urlopen(url) votes = {} for i in unicode(f.read(), 'utf').splitlines(): section, name, url, code, disable, dummy = i.split('\t') if disable: continue book = books[code] book['vote_count'] += 1 site = name, url if site in votes: votes[site].append(book) else: votes[site] = [book] f.close() return votes votes = readvotes(BASEURL, books) data = [] for site, booklist in votes.items(): point = 0 for book in booklist: point += 1 / book['vote_count'] data.append((point, site, booklist)) data.sort() data.reverse() d = [] now_point = None now_count = count = 0 same_point_list = None for point, site, booklist in data: count += 1 if point != now_point: if same_point_list is not None: d.append((now_count, now_point, same_point_list)) same_point_list = [] now_point = point now_count = count same_point_list.append((site, booklist)) if same_point_list: d.append((now_count, now_point, same_point_list)) datahtml = [] for count, point, same_point_list in d: ul = [] for (name, url), booklist in same_point_list: unique = [] others = [] for book in booklist: if book['vote_count'] == 1: unique.append( u'<a href="%sranks/%s"><strong>%s</strong></a>' % ( BASEURL, book['code'], book['title'])) else: others.append( (book['vote_count'], u'<a href="%sranks/%s">%s</a> %d人' % ( BASEURL, book['code'], book['title'], book['vote_count']))) others.sort() b = [] if unique: b.append(',<br />'.join(unique)) if others: b.append('<small>%s</small>' % ', '.join([x for c, x in others])) ul.append( u'<li><dl><dt><a href="%s">%s</a> (%d タイトル)</dt><dd>%s</dd></dl></li>' % ( url, name, len(booklist), ',<br />'.join(b))) datahtml.append( u'<li value="%d">%.2f%%\n<ul>%s</ul></li>' % ( count, point * 10, '\n'.join(ul))) html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>ユニークな作品に投票した人</title> </head> <body> <p>「1/投票者数」を合計した値の順。</p> <p>理論上最高(その投票者のみが投票しているもので10作全部が占められている)を「100%%」として表記。</p> <ol>%s</ol> </body>''' % '\n'.join(datahtml) filename = 'unique.htm' print 'write:', filename f = open(filename, 'w') f.write(html.encode('utf')) f.close() # 好きに流用してください。
下位の方まで見たい人は実行してみて。
しかしこの下位の方(投票タイトル数自体が少ないが故の場合はさておき)10作フルに投票しているのにもかかわらず低いというのはメジャータイトルが好きということかそれとも……