Q
=
np.matrix(np.zeros([MATRIX_SIZE, MATRIX_SIZE]))
env_police
=
np.matrix(np.zeros([MATRIX_SIZE, MATRIX_SIZE]))
env_drugs
=
np.matrix(np.zeros([MATRIX_SIZE, MATRIX_SIZE]))
initial_state
=
1
def
available_actions(state):
current_state_row
=
M[state, ]
av_action
=
np.where(current_state_row >
=
0
)[
1
]
return
av_action
def
sample_next_action(available_actions_range):
next_action
=
int
(np.random.choice(available_action,
1
))
return
next_action
def
collect_environmental_data(action):
found
=
[]
if
action
in
police:
found.append(
'p'
)
if
action
in
drug_traces:
found.append(
'd'
)
return
(found)
available_action
=
available_actions(initial_state)
action
=
sample_next_action(available_action)
def
update(current_state, action, gamma):
max_index
=
np.where(Q[action, ]
=
=
np.
max
(Q[action, ]))[
1
]
if
max_index.shape[
0
] >
1
:
max_index
=
int
(np.random.choice(max_index, size
=
1
))
else
:
max_index
=
int
(max_index)
max_value
=
Q[action, max_index]
Q[current_state, action]
=
M[current_state, action]
+
gamma
*
max_value
environment
=
collect_environmental_data(action)
if
'p'
in
environment:
env_police[current_state, action]
+
=
1
if
'd'
in
environment:
env_drugs[current_state, action]
+
=
1
if
(np.
max
(Q) >
0
):
return
(np.
sum
(Q
/
np.
max
(Q)
*
100
))
else
:
return
(
0
)
update(initial_state, action, gamma)
def
available_actions_with_env_help(state):
current_state_row
=
M[state, ]
av_action
=
np.where(current_state_row >
=
0
)[
1
]
env_pos_row
=
env_matrix_snap[state, av_action]
if
(np.
sum
(env_pos_row <
0
)):
temp_av_action
=
av_action[np.array(env_pos_row)[
0
]>
=
0
]
if
len
(temp_av_action) >
0
:
av_action
=
temp_av_action
return
av_action