# you have got to be shitting me import huggingface_hub from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch import os import argparse parser = argparse.ArgumentParser( prog='loraize', description='Apply one or more loras to a model, and then save it', epilog='BOTTOM TEXT') parser.add_argument( 'model', type=str, help='path or HF name of a base model', ) parser.add_argument( 'lora', type=str, help='one or more LORAs to apply', nargs='+') parser.add_argument( 'output_dir', type=str, help='output directory', ) args = parser.parse_args() print(f"Loading bassoon model:", args.model) base_model = AutoModelForCausalLM.from_pretrained( args.model, return_dict=True, torch_dtype=torch.bfloat16, device_map="cpu", ) for lora in args.lora: print(f"Loading LORA: ",lora) model = PeftModel.from_pretrained( base_model, lora, device_map="cpu" ) print(f"Good luck, bitches. Unloading.") print("This gon' take a sec.") model = model.merge_and_unload() tokenizer = AutoTokenizer.from_pretrained(args.model) model.save_pretrained(args.output_dir, safe_serialization=True, max_shard_size='10GB') tokenizer.save_pretrained(args.output_dir)