「「2008年下半期ライトノベルサイト杯」杯」作成用スクリプト

「「2008年下半期ライトノベルサイト杯」杯」 - つちのこ、のこのこ。(はてな番外地)

の順位作るのに作った Python スクリプト公開しておきます。
(Python 2.5)

from __future__ import division
import urllib

BASEURL = 'http://ippo.dip.jp/lightnovel/lnsite2008last/vote/'

def readbooks(baseurl):
    url = baseurl + 'data_book_infos'
    print 'read:', url
    f = urllib.urlopen(url)
    books = {}
    for i in unicode(f.read(), 'utf').splitlines():
        d = dict(zip(
            ['code', 'isbn', 'title', 'author', 'illustrator',
             'label', 'type', 'date', 'dummy1', 'price', 'img',
             'amazon', 'width', 'high', 'url', 'dummy2', 'vote_count']
            ,i.split('\t') + [0]))
        books[d['code']] = d
    f.close()
    return books

books = readbooks(BASEURL)

def readvotes(baseurl, books):
    url = baseurl + 'data_votes'
    print 'read:', url
    f = urllib.urlopen(url)
    votes = {}
    for i in unicode(f.read(), 'utf').splitlines():
        section, name, url, code, disable, dummy = i.split('\t')
        if disable:
            continue
        book = books[code]
        book['vote_count'] += 1
        site = name, url
        if site in votes:
            votes[site].append(book)
        else:
            votes[site] = [book]
    f.close()
    return votes

votes = readvotes(BASEURL, books)

data = []
for site, booklist in votes.items():
    point = 0
    for book in booklist:
        point += 1 / book['vote_count']
    data.append((point, site, booklist))
data.sort()
data.reverse()

d = []
now_point = None
now_count = count = 0
same_point_list = None
for point, site, booklist in data:
    count += 1
    if point != now_point:
        if same_point_list is not None:
            d.append((now_count, now_point, same_point_list))
        same_point_list = []
        now_point = point
        now_count = count
    same_point_list.append((site, booklist))
if same_point_list:
    d.append((now_count, now_point, same_point_list))

datahtml = []
for count, point, same_point_list in d:
    ul = []
    for (name, url), booklist in same_point_list:
        unique = []
        others = []
        for book in booklist:
            if book['vote_count'] == 1:
                unique.append(
                    u'<a href="%sranks/%s"><strong>%s</strong></a>' % (
                        BASEURL, book['code'], book['title']))
            else:
                others.append(
                    (book['vote_count'],
                     u'<a href="%sranks/%s">%s</a> %d人' % (
                         BASEURL, book['code'], book['title'], book['vote_count'])))
        others.sort()
        b = []
        if unique:
            b.append(',<br />'.join(unique))
        if others:
            b.append('<small>%s</small>' % ', '.join([x for c, x in others]))
        ul.append(
            u'<li><dl><dt><a href="%s">%s</a> (%d タイトル)</dt><dd>%s</dd></dl></li>' % (
                url, name, len(booklist), ',<br />'.join(b)))
    datahtml.append(
        u'<li value="%d">%.2f%%\n<ul>%s</ul></li>' % (
            count, point * 10, '\n'.join(ul)))

html = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>ユニークな作品に投票した人</title>
</head>
<body>
<p>「1/投票者数」を合計した値の順。</p>
<p>理論上最高(その投票者のみが投票しているもので10作全部が占められている)を「100%%」として表記。</p>

<ol>%s</ol>

</body>''' % '\n'.join(datahtml)
filename = 'unique.htm'
print 'write:', filename
f = open(filename, 'w')
f.write(html.encode('utf'))
f.close()
# 好きに流用してください。

下位の方まで見たい人は実行してみて。

しかしこの下位の方(投票タイトル数自体が少ないが故の場合はさておき)10作フルに投票しているのにもかかわらず低いというのはメジャータイトルが好きということかそれとも……

参考: 発行部数というのも一桁くらい平気でサバよんでるよね*1 - つちのこ、のこのこ。(はてな番外地)