てきとうなさいと べぇたばん

集合知プログラミングを読む 2章

TOP > てきとうにこらむ > ゲーム作りとプログラミング日記 > 集合知プログラミングを読む 2章


$ python
Python 2.7.5 (default, Mar  9 2014, 22:15:05)
[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.0.68)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from recommendations import critics
>>> critics
{'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'You, Me and Dupree': 3.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0}, 'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0, 'Just My Luck': 2.0, 'Superman Returns': 3.0, 'You, Me and Dupree': 2.0, 'The Night Listener': 3.0}, 'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'You, Me and Dupree': 2.5, 'Superman Returns': 4.0, 'The Night Listener': 4.5}, 'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 3.0, 'You, Me and Dupree': 2.5}, 'Toby': {'Snakes on a Plane': 4.5, 'Superman Returns': 4.0, 'You, Me and Dupree': 1.0}, 'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5, 'Just My Luck': 1.5, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5, 'The Night Listener': 3.0}, 'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 4.0}}
>>> critics['Lisa Rose']
{'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5, 'Just My Luck': 3.0, 'Superman Returns': 3.5, 'The Night Listener': 3.0, 'You, Me and Dupree': 2.5}
>>> critics['Lisa Rose']['Lady in the Water']



>>> def dist(name1, name2, title1, title2):
...   return sqrt(pow(critics[name1][title1] - critics[name2][title1], 2) + pow(critics[name1][title2] - critics[name2][title2], 2))
>>> print dist('Toby', 'Mick LaSalle', 'Snakes on a Plane', 'You, Me and Dupree')


>>> def dist(name1, name2, title1, title2):
...     return 1 / (1 + sqrt(pow(critics[name1][title1] - critics[name2][title1], 2) + pow(critics[name1][title2] - critics[name2][title2], 2)))
>>> print dist('Toby', 'Mick LaSalle', 'Snakes on a Plane', 'You, Me and Dupree')

とある人AさんとBさんとのすべての製品の類似性を取りたいというのならば、 全部の値を足しあわせた平方根を取ればいい。

# Returns a distance-based similarity score for person1 and person2
def sim_distance(prefs,person1,person2):
    sum_of_squares = 0
    is_match = False

    for item in prefs[person1]:
        if item in prefs[person2]:
            sum_of_squares += pow(prefs[person1][item] - prefs[person2][item], 2)
            is_match = True

    if (is_match != True):
        return 0

    return 1 / (1 + sum_of_squares)


>>> import recommendations
>>> recommendations.sim_distance(critics, 'Toby', 'Mick LaSalle')
>>> recommendations.sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
>>> recommendations.sim_distance(critics, 'Lisa Rose', 'Lisa Rose')



>>> recommendations.sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
>>> recommendations.sim_pearson(critics, 'Toby', 'Mick LaSalle')


  • Jaccard係数
  • マンハッタン距離


# Returns the best matches for person from the prefs dictionary. 
# Number of results and similarity function are optional params.
def topMatches(prefs,person,n=5,similarity=sim_pearson):
    scores = [(similarity(prefs, person, other), other)
            for other in prefs if other != person]


    return scores[0:n]

>>> recommendations.topMatches(recommendations.critics, 'Toby', n=3)
[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]


def getRecommendations(prefs,person,similarity=sim_pearson):
    totals = {}
    simSums = {}
    for other in prefs:
        # 自分自身とは比較しない
        if other == person:

        sim = similarity(prefs, person, other)

        # 0以下のスコアは無視する
        if sim <= 0:

        for item in prefs[other]:
            # まだ見ていない映画の特典のみ算出
            if item not in prefs[person] or prefs[person][item] == 0:
                # 類似度 * スコア
                totals.setdefault(item, 0)
                totals[item] += prefs[other][item] * sim
                # 類似度を合計
                simSums.setdefault(item, 0)
                simSums[item] += sim

    # 正規化したリストを作る
    rankings=[(total / simSums[item], item)

    return rankings

>>> recommendations.getRecommendations(recommendations.critics, 'Toby')
[(3.3477895267131013, 'The Night Listener'), (2.8325499182641614, 'Lady in the Water'), (2.5309807037655645, 'Just My Luck')]



>>> movies = recommendations.transformPrefs(recommendations.critics)
>>> movies
{'Lady in the Water': {'Lisa Rose': 2.5, 'Jack Matthews': 3.0, 'Michael Phillips': 2.5, 'Gene Seymour': 3.0, 'Mick LaSalle': 3.0}, 'Snakes on a Plane': {'Jack Matthews': 4.0, 'Mick LaSalle': 4.0, 'Claudia Puig': 3.5, 'Lisa Rose': 3.5, 'Toby': 4.5, 'Gene Seymour': 3.5, 'Michael Phillips': 3.0}, 'Just My Luck': {'Claudia Puig': 3.0, 'Lisa Rose': 3.0, 'Gene Seymour': 1.5, 'Mick LaSalle': 2.0}, 'Superman Returns': {'Jack Matthews': 5.0, 'Mick LaSalle': 3.0, 'Claudia Puig': 4.0, 'Lisa Rose': 3.5, 'Toby': 4.0, 'Gene Seymour': 5.0, 'Michael Phillips': 3.5}, 'The Night Listener': {'Jack Matthews': 3.0, 'Mick LaSalle': 3.0, 'Claudia Puig': 4.5, 'Lisa Rose': 3.0, 'Gene Seymour': 3.0, 'Michael Phillips': 4.0}, 'You, Me and Dupree': {'Jack Matthews': 3.5, 'Mick LaSalle': 2.0, 'Claudia Puig': 2.5, 'Lisa Rose': 2.5, 'Toby': 1.0, 'Gene Seymour': 3.5}}
>>> recommendations.topMatches(movies, 'Lady in the Water')
[(0.7637626158259785, 'Snakes on a Plane'), (0.4879500364742689, 'Superman Returns'), (0.3333333333333333, 'You, Me and Dupree'), (-0.6123724356957927, 'The Night Listener'), (-0.9449111825230676, 'Just My Luck')]
>>> recommendations.topMatches(movies, 'Just My Luck')
[(0.5555555555555556, 'The Night Listener'), (-0.3333333333333333, 'Snakes on a Plane'), (-0.42289003161103106, 'Superman Returns'), (-0.4856618642571827, 'You, Me and Dupree'), (-0.9449111825230676, 'Lady in the Water')]

2014/11/03 14:29