Great Deal! Get Instant $10 FREE in Account on First Order + 10% Cashback on Every Order Order Now

""" This code probides a basic skeleton for the stationary bandit code. It should be adapted by the students for their work. """ import csv, random class BanditSet(object): """ This object represents...

1 answer below »
"""
This code probides a basic skeleton for the
stationary bandit code. It should be adapted
y the students for their work.
"""
import csv, random
class BanditSet(object):
"""
This object represents a set of arms for a stationary multi-armed
bandit problem it will store a fixed set of arms from a set and
will then maintain them over multiple iterations.
"""

def __init__(self, DataRows, ArmNames, ExpRate,
XXXXXXXXXXDistribParam, DecayRate, RewardWeight):
"""
XXXXXXXXXXThis initializes the set of choices by acting as a factory
XXXXXXXXXXclass to create one arm instance for each of the choices.
XXXXXXXXXXThe names and the rows will come from the file that
XXXXXXXXXXis read in.
"""
# Store the Data for later use.
XXXXXXXXXXself.Data = DataRows

# Initialize the parameters.
XXXXXXXXXXself.ExplorationRate = ExpRate
XXXXXXXXXXself.DistributionParameter = DistribParam
XXXXXXXXXXself.DecayRate = DecayRate
XXXXXXXXXXself.RewardWeight = RewardWeight
# Store items for each of the arms.
XXXXXXXXXXself.Names = ArmNames
# Store a list for the weights.
XXXXXXXXXXself.Weights = [-1 for I in range(len(ArmNames))]
# Calculate the starting probability and add it.
XXXXXXXXXXStartProb = 1 / float(len(ArmNames))
XXXXXXXXXXself.Probabilities = [StartProb for I in range(len(ArmNames))]
# And store the Cumulative Reward
XXXXXXXXXXself.CumulativeReward = 0
def handleRows(self):
"""
XXXXXXXXXXProcess each of the rows and update our running reward
XXXXXXXXXXand the basic probabilies for each one.
"""
# We initialize the cumulative
# Reward to be 0
XXXXXXXXXXself.CumulativeReward = 0
# Now iterate over the rows and make each
# of the choices.
XXXXXXXXXXfor Cu
Row in self.Rows:
# Now pick one from the list of probabilities.
# Get the reward value from the row.
# Update the reward weight.
# And update the probabilities.
# Return the cumulative reward.
XXXXXXXXXXreturn(self.CumulativeReward)

def pickArmIndex(self):
"""
XXXXXXXXXXPick an index based upon the probabilities
XXXXXXXXXXusing the cumulative score approach based
XXXXXXXXXXupon a random value.
"""
XXXXXXXXXXpass
def getReward(self, Index):
"""
XXXXXXXXXXUse the Armnames to get the reward for the
XXXXXXXXXXchosen arm.
"""
XXXXXXXXXXpass
def updateWeight(self, Index, Reward):
"""
XXXXXXXXXXUpdate the weight for the chosen index using
XXXXXXXXXXthe parameters.
"""
XXXXXXXXXXpass
def updateProbability(self, Index):
"""
XXXXXXXXXXUpdate the probability for the index from its weight.
"""
XXXXXXXXXXpass
def normalizeProbabilities(self, Index, Reward):
"""
XXXXXXXXXXNormalize the probability values.
"""
XXXXXXXXXXpass

"Sample A","Sample B","Sample C","Sample D"
0,0,0,0
1,0,0,0
0,0,1,0
0,0,0,0
0,0,0,1
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,1
0,1,0,1
0,0,0,0
0,1,0,0
0,0,0,1
0,0,0,0
1,1,0,0
0,0,0,0
1,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
1,1,0,0
1,0,0,0
0,1,1,0
0,0,0,0
0,1,0,0
0,1,0,1
0,1,0,0
0,0,0,0
0,0,1,0
0,0,1,0
0,0,0,1
0,1,0,0
0,0,0,0
1,0,0,0
0,0,0,0
0,0,0,0
0,1,0,0
1,0,1,0
1,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,1
1,0,0,0
0,1,0,1
0,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,1,0
1,0,0,1
0,0,0,0
0,1,0,0
0,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,1,0,0
0,0,0,0
0,0,0,0
0,0,1,0
0,0,1,0
0,0,0,0
1,1,0,1
0,0,0,0
0,1,1,0
1,0,0,0
0,1,0,0
1,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,1,0
0,1,1,1
1,0,0,0
1,1,1,0
0,0,0,1
0,0,1,0
0,0,1,0
0,0,0,0
0,1,0,0
0,0,0,0
0,0,0,0
1,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,1,0,0
0,1,1,0
0,0,0,0
0,0,0,0
0,0,1,0
0,1,0,0
0,0,0,0
0,0,0,0
0,1,0,1
0,0,0,0
0,0,0,0
0,1,0,0
0,0,0,0
0,0,1,0
1,1,0,0
1,0,0,0
0,0,0,0
0,0,0,0
0,1,0,0
0,1,0,0
0,0,0,0
0,0,1,0
0,0,0,0
0,1,0,0
0,0,0,0
0,0,0,0
0,1,0,0
0,0,0,0
0,1,1,0
0,0,0,0
1,0,1,0
1,0,0,0
0,0,0,0
0,1,0,0
0,0,0,1
0,1,0,0
0,0,0,0
0,0,0,0
0,0,0,1
0,0,0,0
0,0,1,1
0,0,0,0
0,0,1,0
0,0,0,0
0,1,0,0
1,0,0,0
0,1,0,1
0,1,0,0
1,0,0,0
1,0,1,1
0,0,0,0
0,0,0,0
1,1,0,0
0,1,0,0
0,0,0,1
0,0,0,0
0,0,0,0
0,1,0,0
1,0,0,0
0,0,0,0
0,1,0,0
0,0,0,0
0,0,0,0
1,0,0,0
0,1,1,0
0,0,0,0
0,0,0,1
0,0,0,0
1,0,1,0
0,0,0,0
0,0,0,0
0,1,0,0
0,0,1,0
0,0,0,1
0,0,0,0
1,1,0,0
1,0,0,0
0,0,0,0
1,0,0,0
0,1,1,0
0,0,0,0
0,0,0,0
1,0,1,0
0,0,0,0
0,0,0,0
0,0,0,0
0,1,0,0
1,1,0,0
1,0,1,0
0,0,0,0
0,0,0,0
1,1,0,0
0,1,0,0
0,0,0,0
0,1,0,0
1,0,0,0
1,0,0,0
0,1,0,0
0,0,0,0
1,1,0,0
0,0,0,0
0,0,0,1
0,1,0,0
1,0,0,0
0,1,1,0
0,1,0,0
0,0,1,0
1,0,1,0
0,1,0,0
0,0,0,0
0,0,1,0
1,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,1,0
0,0,0,0
0,0,0,0
0,1,0,0
0,0,0,0
1,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,1,0,1
0,0,1,0
0,0,1,0
0,0,0,1
0,0,0,0
0,0,0,0
1,0,0,0
1,0,0,0
0,1,0,0
0,0,0,0
0,0,0,0
0,0,0,0
0,0,0,0
1,1,0,0
1,0,0,0
0,1,1,0
0,0,0,0
0,0,0,0
0,0,0,0
1,1,0,0
0,0,0,0
1,0,0,0
0
Answered Same Day Aug 02, 2021

Solution

Swapnil answered on Aug 03 2021
146 Votes
89040/Bandits.py
import csv, random, sys

class BanditSet(object):
def __init__(self, DataRows, ArmNames, ExpRate, DistribParam, DecayRate, RewardWeight):

self.Data = DataRows
self.ExplorationRate = ExpRate
self.DistributionParameter = DistribParam
self.DecayRate = DecayRate
self.RewardWeight = RewardWeight
self.Names = ArmNames
StartProb = 1 / float(len(ArmNames))
self.Probabilities = [StartProb for I in range(len(ArmNames))]
self.Weights = [StartProb for I in range(len(ArmNames))]
self.CumulativeReward = 0
def handleRows(self):
self.CumulativeReward = 0

for Cu
Row in self.Data:
print(Cu
Row)
reward_arm_index = self.pickArmIndex()
rewardValue = self.getReward(reward_arm_index,Cu
Row)
self.updateWeight(reward_arm_index,rewardValue)
self.updateProbability(reward_arm_index)
self.normalizeProbabilities(reward_arm_index,rewardValue)
self.CumulativeReward = float(self.CumulativeReward)+float(rewardValue)
print('choice made: ',reward_arm_index)
print('reward from choice: ',rewardValue)
print('cumulative reward: ',self.CumulativeReward)
return(self.CumulativeReward)

def pickArmIndex(self):
return random.choices(range(len(self.Probabilities)), weights=self.Probabilities, k=1)[0]

def getReward(self, Index,row):
choosenArm = self.Names[Index]
return row[choosenArm]

def updateWeight(self, Index, Reward):
weight = float(self.DecayRate)*float(self.Weights[Index])+float(self.RewardWeight)*float(Reward)
totalWeight = 0
self.Weights[Index]=weight

for cu
entweight in self.Weights:
totalWeight = totalWeight+ cu
entweight
names = self.Names
for weightIndex in range(len(self.Weights)):
weightIndex
self.Weights[weightIndex] =float(self.Weights[weightIndex])/float(totalWeight)
def updateProbability(self, Index):
probability = float(self.Weights[Index]) * float(1-float((self.ExplorationRate)))+ float(self.ExplorationRate)*float(self.DistributionParameter)
self.Probabilities[Index]= probability
pass
def normalizeProbabilities(self, Index, Reward):
totalProbability=0

for probability in self.Probabilities:
totalProbability = totalProbability+ probability

for probabilityIndex in range(len(self.Probabilities)):
self.Probabilities[probabilityIndex] =float(self.Probabilities[probabilityIndex])/float(totalProbability)
def main():
fileName = sys.argv[1]
DataRows =[]
ExpRate= .3
DistribParam=.1
DecayRate=.6
RewardWeight=.9

with open(fileName, newline='') as csvfile:
reader = csv.DictReader(csvfile)
ArmNames = reader.fieldnames

for row in reader:
DataRows.append(row)
bandits = BanditSet(DataRows, ArmNames, ExpRate, DistribParam, DecayRate, RewardWeight)
bandits.handleRows();
main()
89040/BanditsData.csv
"Sample A","Sample B","Sample C","Sample...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here