SuperSecureHuman
/

BipedalWalker-v3-TD3

@@ -29,23 +29,29 @@
   {
    "cell_type": "code",
    "execution_count": null,
    "outputs": [],
    "source": [
     "config = {\n",
     "    \"policy_type\": \"MlpPolicy\",\n",
     "    \"env_name\": \"BipedalWalker-v3\",\n",
     "}"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "outputs": [],
    "source": [
     "run = wandb.init(\n",
@@ -55,13 +61,7 @@
     "    monitor_gym=True,  # auto-upload the videos of agents playing the game\n",
     "    save_code=True,  # optional\n",
     ")"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
   },
   {
    "cell_type": "code",
@@ -80,10 +80,9 @@
    "source": [
     "import gym\n",
     "\n",
-    "# First, we create our environment called LunarLander-v2\n",
     "env = gym.make(\"BipedalWalker-v3\")\n",
     "\n",
-    "# Then we reset this environment\n",
     "observation = env.reset()\n",
     "\n",
     "for _ in range(200):\n",
@@ -92,7 +91,6 @@
     "  print(\"Action taken:\", action)\n",
     "  env.render()\n",
     "\n",
-    "\n",
     "  # Do this action in the environment and get\n",
     "  # next_state, reward, done and info\n",
     "  observation, reward, done, info = env.step(action)\n",
@@ -143,31 +141,31 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
-   "source": [
-    "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
-   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
-   "source": [
-    "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
-    "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
-   }
   },
   {
    "cell_type": "code",
@@ -200,44 +198,44 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
-   "source": [
-    "env_id = 'BipedalWalker-v3'"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
-   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
-   "source": [
-    "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
-   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "outputs": [],
-   "source": [
-    "model.save('300-Trained.zip')"
-   ],
    "metadata": {
-    "collapsed": false,
     "pycharm": {
      "name": "#%%\n"
     }
-   }
   },
   {
    "cell_type": "code",
@@ -278,18 +276,6 @@
     "eval_env.close()"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "de40c367",
-   "metadata": {
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   },
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -313,48 +299,26 @@
     "\n",
     "from huggingface_sb3 import package_to_hub\n",
     "\n",
-    "# PLACE the variables you've just defined two cells above\n",
-    "# Define the name of the environment\n",
     "env_id = \"BipedalWalker-v3\"\n",
     "\n",
-    "# TODO: Define the model architecture we used\n",
     "model_architecture = \"TD3\"\n",
     "model_name = \"TD3_BipedalWalker-v3\"\n",
     "\n",
-    "## Define a repo_id\n",
-    "## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
-    "## CHANGE WITH YOUR REPO ID\n",
     "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
     "\n",
-    "## Define the commit message\n",
     "commit_message = \"Upload score 300 trained bipedal walker\"\n",
     "\n",
-    "# Create the evaluation env\n",
     "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
     "\n",
-    "# PLACE the package_to_hub function you've just filled here\n",
     "package_to_hub(model=model, # Our trained model\n",
     "               model_name=model_name, # The name of our trained model \n",
     "               model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
     "               env_id=env_id, # Name of the environment\n",
     "               eval_env=eval_env, # Evaluation Environment\n",
-    "               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2\n",
-    "               commit_message=commit_message)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "outputs": [],
-   "source": [
     "eval_env.close()"
-   ],
-   "metadata": {
-    "collapsed": false,
-    "pycharm": {
-     "name": "#%%\n"
-    }
-   }
   }
  ],
  "metadata": {
@@ -373,7 +337,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.12"
   },
   "toc": {
    "base_numbering": 1,
@@ -420,4 +384,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}

   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "cc1d81f5",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "config = {\n",
     "    \"policy_type\": \"MlpPolicy\",\n",
     "    \"env_name\": \"BipedalWalker-v3\",\n",
     "}"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "d9c45ab2",
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
    "outputs": [],
    "source": [
     "run = wandb.init(\n",
     "    monitor_gym=True,  # auto-upload the videos of agents playing the game\n",
     "    save_code=True,  # optional\n",
     ")"
+   ]
   },
   {
    "cell_type": "code",
    "source": [
     "import gym\n",
     "\n",
+    "\n",
     "env = gym.make(\"BipedalWalker-v3\")\n",
     "\n",
     "observation = env.reset()\n",
     "\n",
     "for _ in range(200):\n",
     "  print(\"Action taken:\", action)\n",
     "  env.render()\n",
     "\n",
     "  # Do this action in the environment and get\n",
     "  # next_state, reward, done and info\n",
     "  observation, reward, done, info = env.step(action)\n",
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "7ca36c14",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
+   },
+   "outputs": [],
+   "source": [
+    "eval_env = make_vec_env(\"BipedalWalker-v3\", n_envs=1)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "94fe286d",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
+   },
+   "outputs": [],
+   "source": [
+    "callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=300, verbose=1)\n",
+    "eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)"
+   ]
   },
   {
    "cell_type": "code",
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "65c99875",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
+   },
+   "outputs": [],
+   "source": [
+    "env_id = 'BipedalWalker-v3'"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "71b5ef7f",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
+   },
+   "outputs": [],
+   "source": [
+    "model.learn(total_timesteps=50000000, callback=[WandbCallback() , eval_callback])"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "b18e1309",
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
+   },
+   "outputs": [],
+   "source": [
+    "model.save('300-Trained.zip')"
+   ]
   },
   {
    "cell_type": "code",
     "eval_env.close()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
     "\n",
     "from huggingface_sb3 import package_to_hub\n",
     "\n",
     "env_id = \"BipedalWalker-v3\"\n",
     "\n",
     "model_architecture = \"TD3\"\n",
     "model_name = \"TD3_BipedalWalker-v3\"\n",
     "\n",
     "repo_id = \"SuperSecureHuman/BipedalWalker-v3-TD3\"\n",
     "\n",
     "commit_message = \"Upload score 300 trained bipedal walker\"\n",
     "\n",
     "eval_env = DummyVecEnv([lambda: gym.make(env_id)])\n",
     "\n",
     "package_to_hub(model=model, # Our trained model\n",
     "               model_name=model_name, # The name of our trained model \n",
     "               model_architecture=model_architecture, # The model architecture we used: in our case PPO\n",
     "               env_id=env_id, # Name of the environment\n",
     "               eval_env=eval_env, # Evaluation Environment\n",
+    "               repo_id=repo_id, # id of the model repository from the Hugging Face Hub\n",
+    "               commit_message=commit_message)\n",
     "eval_env.close()"
+   ]
   }
  ],
  "metadata": {
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.8.0"
   },
   "toc": {
    "base_numbering": 1,
  },
  "nbformat": 4,
  "nbformat_minor": 5
+}