model.zero_grad() # Reset gradients tensors for i, (inputs, labels) in enumerate(training_set): predictions = model(inputs) # Forward pass loss = loss_function(predictions, labels) # Compute loss function loss = loss / accumulation_steps # Normalize our loss (if averaged) loss.backward() # Backward pass if (i+1) % accumulation_steps == 0: # Wait for several backward steps optimizer.step() # Now we can do an optimizer step model.zero_grad() # Reset gradients tensors if (i+1) % evaluation_steps == 0: # Evaluate the model when we... evaluate_model() # ...have no gradients accumulated