function critic = update_critic(~, critic, batch, critic_lr, gamma,~,~) states = cat(1, batch.state); actions = cat(1, batch.action); rewards = cat(1, batch.reward); next_states = cat(1, batch.next_state); target_next_Q_values = predict(critic, dlarray(next_states,'BC'),dlarray(actions,'BC')); rewards = reshape(rewards, [],size(target_next_Q_values,2)); target_Q_values = rewards + gamma.*target_next_Q_values; target_Q_values = reshape(target_Q_values, 1,[]); averageGrad = []; averageSqGrad = []; critic_gradients = gradient_Critic(critic, dlarray(states,'BC'),dlarray(actions,'BC'),target_Q_values); critic = adamupdate(critic, critic_gradients,averageGrad,averageSqGrad,1000,critic_lr); end