Arunprakash-A · August 18, 2024 08:34 · Jul 19, 2024 · Jul 19, 2024 · Jul 17, 2024 · Jul 17, 2024
diff --git a/gradientaccumulation-for-continual-pretraining.ipynb b/gradientaccumulation-for-continual-pretraining.ipynb
@@ -1388,11 +1388,10 @@
       },
       "source": [
         "* Note that the GPU memory required to train the model is 7 GB (as if we used SGD).\n",
-        "* This approach gives us a better test performance.",
-
-            "* BS:1, GAS:10 then in 100 iterations,  # of weight updates will be 10",
-            "* BS:2, GAS:10 then in 50 iterations,  # of weight updates will be 5",
-            "* BS:10, GAS:10 then in 1 iterations,  # of weight updates will be 1" 
+        "* This approach gives us a better test performance.\n",        
+            "* BS:1, GAS:10 then in 100 iterations,  # of weight updates will be 10 \n",
+            "* BS:2, GAS:10 then in 50 iterations,  # of weight updates will be 5 \n",
+            "* BS:10, GAS:10 then in 1 iterations,  # of weight updates will be 1 \n" 
       ]
     },
     {

diff --git a/gradientaccumulation-for-continual-pretraining.ipynb b/gradientaccumulation-for-continual-pretraining.ipynb
@@ -1388,7 +1388,11 @@
       },
       "source": [
         "* Note that the GPU memory required to train the model is 7 GB (as if we used SGD).\n",
-        "* This approach gives us a better test performance."
+        "* This approach gives us a better test performance.",
+
+            "* BS:1, GAS:10 then in 100 iterations,  # of weight updates will be 10",
+            "* BS:2, GAS:10 then in 50 iterations,  # of weight updates will be 5",
+            "* BS:10, GAS:10 then in 1 iterations,  # of weight updates will be 1" 
       ]
     },
     {

diff --git a/gradientaccumulation-for-continual-pretraining.ipynb b/gradientaccumulation-for-continual-pretraining.ipynb
@@ -1,5 +1,15 @@
 {
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/gist/Arunprakash-A/c27ebe06e6c8fbd21263fc54013bbf49/gradientaccumulation-for-continual-pretraining.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1394,7 +1404,8 @@
   "metadata": {
     "colab": {
       "provenance": [],
-      "gpuType": "T4"
+      "gpuType": "T4",
+      "include_colab_link": true
     },
     "kernelspec": {
       "display_name": "Python(hf)",

diff --git a/gradientaccumulation-for-continual-pretraining.ipynb b/gradientaccumulation-for-continual-pretraining.ipynb