1

Typically, we should be able to save a merged base + PEFT model, like this:

import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel


# Loading base MNTP model, along with custom code that enables bidirectional connections in decoder-only LLMs
tokenizer = AutoTokenizer.from_pretrained(
    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp"
)
config = AutoConfig.from_pretrained(
    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp", trust_remote_code=True
)
model = AutoModel.from_pretrained(
    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
    trust_remote_code=True,
    config=config,
    torch_dtype=torch.bfloat16,
    device_map="cuda" if torch.cuda.is_available() else "cpu",
)
model = PeftModel.from_pretrained(
    model,
    "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
)
model = model.merge_and_unload()  # This can take several minutes on cpu

model.save_pretrained("LLM2Vec-Mistral-7B-Instruct-v2-mnt-merged")

but it's throwing an error (that looks similar to https://github.com/huggingface/transformers/issues/26972)

[out]:

/usr/local/lib/python3.10/dist-packages/transformers/integrations/peft.py:391: FutureWarning: The `active_adapter` method is deprecated and will be removed in a future version.
  warnings.warn(
---------------------------------------------------------------------------
UnboundLocalError                         Traceback (most recent call last)
[<ipython-input-3-db27a2801af8>](https://localhost:8080/#) in <cell line: 1>()
----> 1 model.save_pretrained("LLM2Vec-Mistral-7B-Instruct-v2-mnt-merged")

3 frames
[/usr/local/lib/python3.10/dist-packages/transformers/integrations/peft.py](https://localhost:8080/#) in active_adapters(self)
    383 
    384         # For previous PEFT versions
--> 385         if isinstance(active_adapters, str):
    386             active_adapters = [active_adapters]
    387 

UnboundLocalError: local variable 'active_adapters' referenced before assignment

Tested on:

transformers==4.38.2
peft==0.10.0
accelerate==0.29.2

How to save the LLM2Vec model as a HuggingFace PreTrainedModel object?

1 Answer 1

0

Wrapping the LLM2Vec object around like in https://stackoverflow.com/a/74109727/610569

We can try this:

import torch.nn as nn
from transformers import PreTrainedModel, PretrainedConfig
from transformers import AutoModel, AutoConfig
import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig
from peft import PeftModel
from llm2vec import LLM2Vec

class MistralEncoderConfig(PretrainedConfig):
    model_type = 'MistralEncoder'
    def __init__(self, pooling_mode="mean", max_length=512, **kwargs):
        super().__init__(**kwargs)
        self.pooling_mode = pooling_mode
        self.max_length = max_length


class MistralEncoder(PreTrainedModel):
    config_class = MistralEncoderConfig
    def __init__(self, config):
        super().__init__(config)

        self.tokenizer = AutoTokenizer.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp"
            )

        self.base_peft_config = AutoConfig.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp", trust_remote_code=True
        )

        self.base_peft_model = AutoModel.from_pretrained(
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
            trust_remote_code=True,
            config=self.base_peft_config,
            torch_dtype=torch.bfloat16,
            device_map="cuda" if torch.cuda.is_available() else "cpu",
        )
        self.base_peft_model = PeftModel.from_pretrained(
            self.base_peft_model,
            "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
        )
        self.base_peft_model = self.base_peft_model.merge_and_unload()  # This can take several minutes on cp

        self.config = config

        self.model = LLM2Vec(self.base_peft_model, self.tokenizer, pooling_mode=self.config.pooling_mode, max_length=self.config.max_length)
        
    def forward(self, input):
        return self.model(input) 

    def encode(self, input)
        return self.model.encode(input)

config = MistralEncoderConfig(pooling_mode="mean", max_length=512)
model = MistralEncoder(config)
model.save_pretrained('mistral-encoder')

And to load the new model:

model = MistralEncoder.from_pretrained('mistral-encoder')
model.encode(["hello world", "foo bar"])

[out]:

tensor([[ 3.5312, -4.6875, -5.4688,  ..., -0.6406,  2.2188,  2.1562],
        [-3.1562, -1.4922, -1.4219,  ..., -2.7188,  3.3750,  1.7344]])

Not the answer you're looking for? Browse other questions tagged or ask your own question.