Created
May 2, 2025 16:50
-
-
Save data2json/76e6ddad16fb698bd9f39ee06a5bacaa to your computer and use it in GitHub Desktop.
Parameter Deltas enables zero-cost transfer of post-training capabilities to new base models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| from transformers import AutoModelForCausalLM | |
| # Load models | |
| llama3_base = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B") | |
| llama3_inst = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") | |
| llama31_base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B") | |
| # Calculate Δθ = θpost - θbase | |
| delta_params = {} | |
| for name, param in llama3_base.named_parameters(): | |
| delta_params[name] = llama3_inst.get_parameter(name) - param | |
| # Create Param∆ model: θParam∆ = θ'base + Δθ | |
| param_delta_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B") | |
| for name, param in param_delta_model.named_parameters(): | |
| if name in delta_params: | |
| param.data += delta_params[name] | |
| # Save the resulting model | |
| param_delta_model.save_pretrained("llama31-with-llama3-inst-capabilities") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment