Created
May 29, 2025 17:28
-
-
Save DiegoHernanSalazar/9f0ab168921356d1e97efc464c056364 to your computer and use it in GitHub Desktop.
Stanford Online/ DeepLearning.AI. Unsupervised Learning, Recommenders Systems and Reinforcement Learning: State - Action Value Function Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# State Action Value Function Example\n", | |
| "\n", | |
| "In this Jupyter notebook, you can modify the mars rover example to see how the values of Q(s,a) will change depending on the rewards and discount factor changing." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np # Get numpy 'np' constructor for arrays handling and numeric computations\n", | |
| "from utils import * # Import ALL '*' helper functions from utils" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Do not modify\n", | |
| "num_states = 6 # states s = 1,2,3,4,5,6\n", | |
| "num_actions = 2 # action a = <- go left. action a = -> go right " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "terminal_left_reward = 100 # reward = 100 at terminal state s = 1 \n", | |
| "terminal_right_reward = 40 # reward = 40 at terminal state s = 6\n", | |
| "each_step_reward = 0 # reward = 0 at others states s = 2,3,4,5\n", | |
| "\n", | |
| "# Discount factor\n", | |
| "gamma = 0.5 # gamma = 0.5\n", | |
| "\n", | |
| "# Probability of going in the wrong direction\n", | |
| "misstep_prob = 0" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 864x144 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 1296x144 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "generate_visualization(terminal_left_reward, terminal_right_reward, each_step_reward, gamma, misstep_prob)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment