This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| def multi_dim_cross_entropy_test(): | |
| """ This loss needs the shapes out of prediction order, rather than bs, seq_len, num_labels it needs: | |
| input: bs, num_labels, seq_len | |
| out: bs, seq_len ... where there is one label is = 0.. N per seq_len position | |
| """ | |
| # Example of target with class indices | |
| torch.manual_seed(0) | |
| loss = torch.nn.CrossEntropyLoss(reduction='none') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| sort ~/.bash_history | uniq | awk '{print ": :0:;"$0}' >> ~/.zsh_history |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model.zero_grad() # Reset gradients tensors | |
| for i, (inputs, labels) in enumerate(training_set): | |
| predictions = model(inputs) # Forward pass | |
| loss = loss_function(predictions, labels) # Compute loss function | |
| loss = loss / accumulation_steps # Normalize our loss (if averaged) | |
| loss.backward() # Backward pass | |
| if (i+1) % accumulation_steps == 0: # Wait for several backward steps | |
| optimizer.step() # Now we can do an optimizer step | |
| model.zero_grad() # Reset gradients tensors | |
| if (i+1) % evaluation_steps == 0: # Evaluate the model when we... |