Difference between most probable answer and answer on the most probable class
The following python notebook with the following distribution with n rows that the most probable answer and the answer on the most probable class are different (see the two last blocks). On this example with the sum as numerical query, the expected value and the answer on the most probable class is \(11 = 1 \times 6 + 2 \times 1 + 3 \times 1\), while the most probable answer is 10.
dist = { 1 : 0.75, 2 : 0.125, 3 : 0.125 } n = 8
import pprint # generate the possible worlds def possible_worlds(dist, n): worlds = [[]] for i in range(n): new_worlds = [] for w in worlds: for v in dist: if len(w) <= i: w.append(v) else: w[i] = v new_worlds.append(w.copy()) worlds = new_worlds return worlds print(len(possible_worlds(dist, n)))
6561
# compute a key for each world based on the values it contains def world_key(w): w.sort() count = 0 current = w[0] key = "" for i in range(len(w)): count+=1 if len(w)-1 == i or w[i+1] != current: key += "{}x{}".format(count, current) if len(w)-1 != i and w[i+1] != current: key += " " current = w[i+1] count = 0 return key # compute the probability of a world def world_prob(w, dist): prob = 1 for v in w: prob*=dist[v] return prob
# computes classes of worlds based on a function computing a key for each world # worlds are in the same class iff they have the same key def world_classes(dist, n, class_key=world_key): worlds = possible_worlds(dist, n) classes = {} for w in worlds: key = class_key(w) if key in classes: classes[key]["count"]+=1 classes[key]["class_prob"]+=world_prob(w, dist) classes[key]["possible_values"].add(world_key(w)) else: wp = world_prob(w, dist) classes[key] = { "world_ex": w, "possible_values": {world_key(w)}, "class_prob": wp, "count": 1 } return classes # the classes of possible worlds based on the function work_key pprint.pprint(world_classes(dist, n))
{'1x1 1x2 6x3': {'class_prob': 2.002716064453125e-05, 'count': 56, 'possible_values': {'1x1 1x2 6x3'}, 'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]}, '1x1 2x2 5x3': {'class_prob': 6.008148193359375e-05, 'count': 168, 'possible_values': {'1x1 2x2 5x3'}, 'world_ex': [1, 2, 2, 3, 3, 3, 3, 3]}, '1x1 3x2 4x3': {'class_prob': 0.00010013580322265625, 'count': 280, 'possible_values': {'1x1 3x2 4x3'}, 'world_ex': [1, 2, 2, 2, 3, 3, 3, 3]}, '1x1 4x2 3x3': {'class_prob': 0.00010013580322265625, 'count': 280, 'possible_values': {'1x1 4x2 3x3'}, 'world_ex': [1, 2, 2, 2, 2, 3, 3, 3]}, '1x1 5x2 2x3': {'class_prob': 6.008148193359375e-05, 'count': 168, 'possible_values': {'1x1 5x2 2x3'}, 'world_ex': [1, 2, 2, 2, 2, 2, 3, 3]}, '1x1 6x2 1x3': {'class_prob': 2.002716064453125e-05, 'count': 56, 'possible_values': {'1x1 6x2 1x3'}, 'world_ex': [1, 2, 2, 2, 2, 2, 2, 3]}, '1x1 7x2': {'class_prob': 2.86102294921875e-06, 'count': 8, 'possible_values': {'1x1 7x2'}, 'world_ex': [1, 2, 2, 2, 2, 2, 2, 2]}, '1x1 7x3': {'class_prob': 2.86102294921875e-06, 'count': 8, 'possible_values': {'1x1 7x3'}, 'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]}, '1x2 7x3': {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'1x2 7x3'}, 'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]}, '2x1 1x2 5x3': {'class_prob': 0.0003604888916015625, 'count': 168, 'possible_values': {'2x1 1x2 5x3'}, 'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]}, '2x1 2x2 4x3': {'class_prob': 0.0009012222290039062, 'count': 420, 'possible_values': {'2x1 2x2 4x3'}, 'world_ex': [1, 1, 2, 2, 3, 3, 3, 3]}, '2x1 3x2 3x3': {'class_prob': 0.001201629638671875, 'count': 560, 'possible_values': {'2x1 3x2 3x3'}, 'world_ex': [1, 1, 2, 2, 2, 3, 3, 3]}, '2x1 4x2 2x3': {'class_prob': 0.0009012222290039062, 'count': 420, 'possible_values': {'2x1 4x2 2x3'}, 'world_ex': [1, 1, 2, 2, 2, 2, 3, 3]}, '2x1 5x2 1x3': {'class_prob': 0.0003604888916015625, 'count': 168, 'possible_values': {'2x1 5x2 1x3'}, 'world_ex': [1, 1, 2, 2, 2, 2, 2, 3]}, '2x1 6x2': {'class_prob': 6.008148193359375e-05, 'count': 28, 'possible_values': {'2x1 6x2'}, 'world_ex': [1, 1, 2, 2, 2, 2, 2, 2]}, '2x1 6x3': {'class_prob': 6.008148193359375e-05, 'count': 28, 'possible_values': {'2x1 6x3'}, 'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]}, '2x2 6x3': {'class_prob': 1.6689300537109375e-06, 'count': 28, 'possible_values': {'2x2 6x3'}, 'world_ex': [2, 2, 3, 3, 3, 3, 3, 3]}, '3x1 1x2 4x3': {'class_prob': 0.003604888916015625, 'count': 280, 'possible_values': {'3x1 1x2 4x3'}, 'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]}, '3x1 2x2 3x3': {'class_prob': 0.00720977783203125, 'count': 560, 'possible_values': {'3x1 2x2 3x3'}, 'world_ex': [1, 1, 1, 2, 2, 3, 3, 3]}, '3x1 3x2 2x3': {'class_prob': 0.00720977783203125, 'count': 560, 'possible_values': {'3x1 3x2 2x3'}, 'world_ex': [1, 1, 1, 2, 2, 2, 3, 3]}, '3x1 4x2 1x3': {'class_prob': 0.003604888916015625, 'count': 280, 'possible_values': {'3x1 4x2 1x3'}, 'world_ex': [1, 1, 1, 2, 2, 2, 2, 3]}, '3x1 5x2': {'class_prob': 0.000720977783203125, 'count': 56, 'possible_values': {'3x1 5x2'}, 'world_ex': [1, 1, 1, 2, 2, 2, 2, 2]}, '3x1 5x3': {'class_prob': 0.000720977783203125, 'count': 56, 'possible_values': {'3x1 5x3'}, 'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]}, '3x2 5x3': {'class_prob': 3.337860107421875e-06, 'count': 56, 'possible_values': {'3x2 5x3'}, 'world_ex': [2, 2, 2, 3, 3, 3, 3, 3]}, '4x1 1x2 3x3': {'class_prob': 0.02162933349609375, 'count': 280, 'possible_values': {'4x1 1x2 3x3'}, 'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]}, '4x1 2x2 2x3': {'class_prob': 0.032444000244140625, 'count': 420, 'possible_values': {'4x1 2x2 2x3'}, 'world_ex': [1, 1, 1, 1, 2, 2, 3, 3]}, '4x1 3x2 1x3': {'class_prob': 0.02162933349609375, 'count': 280, 'possible_values': {'4x1 3x2 1x3'}, 'world_ex': [1, 1, 1, 1, 2, 2, 2, 3]}, '4x1 4x2': {'class_prob': 0.0054073333740234375, 'count': 70, 'possible_values': {'4x1 4x2'}, 'world_ex': [1, 1, 1, 1, 2, 2, 2, 2]}, '4x1 4x3': {'class_prob': 0.0054073333740234375, 'count': 70, 'possible_values': {'4x1 4x3'}, 'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]}, '4x2 4x3': {'class_prob': 4.172325134277344e-06, 'count': 70, 'possible_values': {'4x2 4x3'}, 'world_ex': [2, 2, 2, 2, 3, 3, 3, 3]}, '5x1 1x2 2x3': {'class_prob': 0.0778656005859375, 'count': 168, 'possible_values': {'5x1 1x2 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]}, '5x1 2x2 1x3': {'class_prob': 0.0778656005859375, 'count': 168, 'possible_values': {'5x1 2x2 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 2, 2, 3]}, '5x1 3x2': {'class_prob': 0.0259552001953125, 'count': 56, 'possible_values': {'5x1 3x2'}, 'world_ex': [1, 1, 1, 1, 1, 2, 2, 2]}, '5x1 3x3': {'class_prob': 0.0259552001953125, 'count': 56, 'possible_values': {'5x1 3x3'}, 'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]}, '5x2 3x3': {'class_prob': 3.337860107421875e-06, 'count': 56, 'possible_values': {'5x2 3x3'}, 'world_ex': [2, 2, 2, 2, 2, 3, 3, 3]}, '6x1 1x2 1x3': {'class_prob': 0.155731201171875, 'count': 56, 'possible_values': {'6x1 1x2 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]}, '6x1 2x2': {'class_prob': 0.0778656005859375, 'count': 28, 'possible_values': {'6x1 2x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 2]}, '6x1 2x3': {'class_prob': 0.0778656005859375, 'count': 28, 'possible_values': {'6x1 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]}, '6x2 2x3': {'class_prob': 1.6689300537109375e-06, 'count': 28, 'possible_values': {'6x2 2x3'}, 'world_ex': [2, 2, 2, 2, 2, 2, 3, 3]}, '7x1 1x2': {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]}, '7x1 1x3': {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]}, '7x2 1x3': {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'7x2 1x3'}, 'world_ex': [2, 2, 2, 2, 2, 2, 2, 3]}, '8x1': {'class_prob': 0.1001129150390625, 'count': 1, 'possible_values': {'8x1'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]}, '8x2': {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x2'}, 'world_ex': [2, 2, 2, 2, 2, 2, 2, 2]}, '8x3': {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x3'}, 'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
# the classes of possible worlds where a class contains the world having the same sum of values pprint.pprint(world_classes(dist, n, sum))
{8: {'class_prob': 0.1001129150390625, 'count': 1, 'possible_values': {'8x1'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 1]}, 9: {'class_prob': 0.13348388671875, 'count': 8, 'possible_values': {'7x1 1x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 2]}, 10: {'class_prob': 0.2113494873046875, 'count': 36, 'possible_values': {'6x1 2x2', '7x1 1x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 1, 3]}, 11: {'class_prob': 0.1816864013671875, 'count': 112, 'possible_values': {'6x1 1x2 1x3', '5x1 3x2'}, 'world_ex': [1, 1, 1, 1, 1, 1, 2, 3]}, 12: {'class_prob': 0.16113853454589844, 'count': 266, 'possible_values': {'4x1 4x2', '5x1 2x2 1x3', '6x1 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 1, 3, 3]}, 13: {'class_prob': 0.10021591186523438, 'count': 504, 'possible_values': {'5x1 1x2 2x3', '4x1 3x2 1x3', '3x1 5x2'}, 'world_ex': [1, 1, 1, 1, 1, 2, 3, 3]}, 14: {'class_prob': 0.062064170837402344, 'count': 784, 'possible_values': {'3x1 4x2 1x3', '5x1 3x3', '2x1 6x2', '4x1 2x2 2x3'}, 'world_ex': [1, 1, 1, 1, 1, 3, 3, 3]}, 15: {'class_prob': 0.02920246124267578, 'count': 1016, 'possible_values': {'1x1 7x2', '2x1 5x2 1x3', '3x1 3x2 2x3', '4x1 1x2 3x3'}, 'world_ex': [1, 1, 1, 1, 2, 3, 3, 3]}, 16: {'class_prob': 0.0135384202003479, 'count': 1107, 'possible_values': {'1x1 6x2 1x3', '2x1 4x2 2x3', '3x1 2x2 3x3', '4x1 4x3', '8x2'}, 'world_ex': [1, 1, 1, 1, 3, 3, 3, 3]}, 17: {'class_prob': 0.004867076873779297, 'count': 1016, 'possible_values': {'1x1 5x2 2x3', '2x1 3x2 3x3', '3x1 1x2 4x3', '7x2 1x3'}, 'world_ex': [1, 1, 1, 2, 3, 3, 3, 3]}, 18: {'class_prob': 0.0017240047454833984, 'count': 784, 'possible_values': {'3x1 5x3', '1x1 4x2 3x3', '2x1 2x2 4x3', '6x2 2x3'}, 'world_ex': [1, 1, 1, 3, 3, 3, 3, 3]}, 19: {'class_prob': 0.0004639625549316406, 'count': 504, 'possible_values': {'2x1 1x2 5x3', '5x2 3x3', '1x1 3x2 4x3'}, 'world_ex': [1, 1, 2, 3, 3, 3, 3, 3]}, 20: {'class_prob': 0.00012433528900146484, 'count': 266, 'possible_values': {'2x1 6x3', '4x2 4x3', '1x1 2x2 5x3'}, 'world_ex': [1, 1, 3, 3, 3, 3, 3, 3]}, 21: {'class_prob': 2.3365020751953125e-05, 'count': 112, 'possible_values': {'1x1 1x2 6x3', '3x2 5x3'}, 'world_ex': [1, 2, 3, 3, 3, 3, 3, 3]}, 22: {'class_prob': 4.5299530029296875e-06, 'count': 36, 'possible_values': {'1x1 7x3', '2x2 6x3'}, 'world_ex': [1, 3, 3, 3, 3, 3, 3, 3]}, 23: {'class_prob': 4.76837158203125e-07, 'count': 8, 'possible_values': {'1x2 7x3'}, 'world_ex': [2, 3, 3, 3, 3, 3, 3, 3]}, 24: {'class_prob': 5.960464477539063e-08, 'count': 1, 'possible_values': {'8x3'}, 'world_ex': [3, 3, 3, 3, 3, 3, 3, 3]}}
# returns the class with the highest probability def most_probable_classes(classes): keys = [] prob = 0 for k in classes: if prob == classes[k]["class_prob"]: keys.append(k) if prob < classes[k]["class_prob"]: keys = [k] prob = classes[k]["class_prob"] return { "keys": keys, "prob": prob } # compute the most probable answers for a given aggregate function def most_probable_ans(dist, n, agg): classes = world_classes(dist, n, agg) answers = [] mc = most_probable_classes(classes) for k in mc["keys"]: answers.append({ "ans": k, "prob": mc["prob"], "possible_values": classes[k]["possible_values"] }) return answers print(most_probable_ans(dist, n, sum))
[{'ans': 10, 'prob': 0.2113494873046875, 'possible_values': {'6x1 2x2', '7x1 1x3'}}]
# compute the answer on a most probable class for a given aggregation function def most_correct_ans(dist, n, agg): classes = world_classes(dist, n) answers = [] mc = most_probable_classes(classes) for k in mc["keys"]: world_of_most_probable_class = classes[k]["world_ex"] answers.append({ "ans": agg(world_of_most_probable_class), "prob": mc["prob"], "possible_values": k }) return answers print(most_correct_ans(dist, n, sum))
[{'ans': 11, 'prob': 0.155731201171875, 'possible_values': '6x1 1x2 1x3'}]
The case with another distribution. It was almost a good example, but there are two most probable answers :
dist = { 0 : 0.5, 1 : 0.25, 2 : 0.25 } n = 4 print(most_correct_ans(dist, n, sum)) print(most_probable_ans(dist, n, sum))
[{'ans': 3, 'prob': 0.1875, 'possible_values': '2x0 1x1 1x2'}] [{'ans': 2, 'prob': 0.21875, 'possible_values': {'3x0 1x2', '2x0 2x1'}}, {'ans': 3, 'prob': 0.21875, 'possible_values': {'1x0 3x1', '2x0 1x1 1x2'}}]
import statistics dist = { 1 : 0.75, 2 : 0.0625, 3 : 0.0625, 4 : 0.0625, 5 : 0.0625, } d = { 1 : 0.2, 2 : 0.2, 3 : 0.2, 4 : 0.2, 5 : 0.2, } n = 8 print(most_correct_ans(dist, n, sum)) print(most_probable_ans(dist, n, sum))
[{'ans': 8, 'prob': 0.1001129150390625, 'possible_values': '8x1'}] [{'ans': 12, 'prob': 0.13521230220794678, 'possible_values': {'7x1 1x5', '6x1 1x2 1x4', '5x1 2x2 1x3', '4x1 4x2', '6x1 2x3'}}]
import numpy as np def asum(a): s = 0 for i in range(0, len(a)): s = s + a[i]* (i + 1) return s sums = np.apply_along_axis(asum, 1,np.random.multinomial(5, [0.75, 0.0625,0.0625,0.0625,0.0625], size=20)) counts = np.bincount(sums) print(np.argmax(counts))
8