Skip to content

Instantly share code, notes, and snippets.

View YangWang92's full-sized avatar
🎯
Focusing

Yang Wang YangWang92

🎯
Focusing
View GitHub Profile
@YangWang92
YangWang92 / reshard.py
Created March 6, 2023 06:04 — forked from benob/reshard.py
Script to decompose/recompose LLAMA LLM models with different number of shards.
# script to decompose/recompose llama model in different number of shards
# note that it loads the full model * 2 in cpu memory
import os
import json
import sys
import torch
import glob
if len(sys.argv) != 4:
@YangWang92
YangWang92 / onnx_t5.py
Created February 20, 2021 08:29 — forked from patil-suraj/onnx_t5.py
Speeding up T5 with onnx 🚀
import inspect
import logging
import os
from pathlib import Path
import torch
from psutil import cpu_count
from transformers import T5Config, T5ForConditionalGeneration, T5Tokenizer
from transformers.generation_utils import GenerationMixin
from transformers.modeling_outputs import BaseModelOutputWithPast, Seq2SeqLMOutput