From f88f01d723718aa01eb9644d5d059eb8b9027a07 Mon Sep 17 00:00:00 2001 From: Swati Allabadi Date: Tue, 9 Dec 2025 07:20:38 +0000 Subject: [PATCH 1/3] Adding base class and Hf class Signed-off-by: Swati Allabadi --- .../finetune/experimental/core/model.py | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) diff --git a/QEfficient/finetune/experimental/core/model.py b/QEfficient/finetune/experimental/core/model.py index d647b73a6..1903a0f1e 100644 --- a/QEfficient/finetune/experimental/core/model.py +++ b/QEfficient/finetune/experimental/core/model.py @@ -4,3 +4,146 @@ # SPDX-License-Identifier: BSD-3-Clause # # ----------------------------------------------------------------------------- + +import logging +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional, Type + +import torch +import torch.nn as nn +from transformers import AutoTokenizer, BitsAndBytesConfig +import transformers + +from QEfficient.finetune.experimental.core.component_registry import registry +from QEfficient.finetune.experimental.utils.dataset_helper import insert_pad_token + +logger = get_logger(__name__) + + +class BaseModel(nn.Module, ABC): + """Shared skeleton for every finetunable model in the system.""" + + def __init__(self, model_name: str, **model_kwargs: Any) -> None: + super().__init__() + self.model_name = model_name + self.model_kwargs: Dict[str, Any] = model_kwargs + self._model: Optional[nn.Module] = None + self._tokenizer: Any = None # HF tokenizers are not nn.Modules. + + # Factory constructor: load model after __init__ finishes + @classmethod + def create(cls, model_name: str, **model_kwargs: Any) -> "BaseModel": + obj = cls(model_name, **model_kwargs) + module = obj.load_model() + if not isinstance(module, nn.Module): + raise TypeError(f"load_model() must return nn.Module, got {type(module)}") + obj._model = module + obj.add_module("_wrapped_model", module) # register + return obj + + @abstractmethod + def load_model(self) -> nn.Module: + """Create and return the underlying torch.nn.Module.""" + ... + + def load_tokenizer(self) -> Any: + """Override if the model exposes a tokenizer.""" + raise NotImplementedError(f"{type(self).__name__} does not provide a tokenizer.") + + # Lazy accessors + @property + def model(self) -> nn.Module: + if self._model is None: + raise RuntimeError("Model not loaded; use .create(...) to load.") + return self._model + + @property + def tokenizer(self) -> Any: + if self._tokenizer is None: + self._tokenizer = self.load_tokenizer() + return self._tokenizer + + # nn.Module API surface + def forward(self, *args, **kwargs): + return self.model(*args, **kwargs) + + def get_input_embeddings(self): + if hasattr(self.model, "get_input_embeddings"): + return self.model.get_input_embeddings() + logger.log_rank_zero(f"Model {self.model_name} does not expose input embeddings", logging.WARNING) + return None + + def resize_token_embeddings(self, new_num_tokens: int) -> None: + if hasattr(self.model, "resize_token_embeddings"): + self.model.resize_token_embeddings(new_num_tokens) + else: + logger.log_rank_zero(f"Model {self.model_name} cannot resize token embeddings", logging.WARNING) + + # optional + def to(self, *args, **kwargs): + self.model.to(*args, **kwargs) + return self + + def train(self, mode: bool = True): + self.model.train(mode) + return super().train(mode) + + def eval(self): + return self.train(False) + + +@registry.model("hf") +class HFModel(BaseModel): + """HuggingFace-backed model with optional quantization.""" + + def __init__( + self, + model_name: str, + auto_class_name: str = "AutoModelForCausalLM", + *, + tokenizer_name: Optional[str] = None, + **model_kwargs: Any, + ) -> None: + super().__init__(model_name, **model_kwargs) + self.tokenizer_name = tokenizer_name or model_name + self.auto_class: Type = self._resolve_auto_class(auto_class_name) + + @staticmethod + def _resolve_auto_class(auto_class_name: str) -> Type: + if not hasattr(transformers, auto_class_name): + candidates = sorted(name for name in dir(transformers) if name.startswith("AutoModel")) + raise ValueError( + f"Unsupported Auto class '{auto_class_name}'. Available candidates: {', '.join(candidates)}" + ) + return getattr(transformers, auto_class_name) + + # def _build_quant_config(self) -> Optional[BitsAndBytesConfig]: + # if not self.model_kwargs.get("load_in_4bit"): + # return None + # return BitsAndBytesConfig( + # load_in_4bit=True, + # bnb_4bit_quant_type=self.model_kwargs.get("bnb_4bit_quant_type", "nf4"), + # bnb_4bit_compute_dtype=self.model_kwargs.get("bnb_4bit_compute_dtype", torch.float16), + # bnb_4bit_use_double_quant=self.model_kwargs.get("bnb_4bit_use_double_quant", True), + # ) + + def configure_model_kwargs(self) -> Dict[str, Any]: + """Hook for subclasses to tweak HF `.from_pretrained` kwargs.""" + extra = dict(self.model_kwargs) + # extra["quantization_config"] = self._build_quant_config() + return extra + + def load_model(self) -> nn.Module: + logger.log_rank_zero(f"Loading HuggingFace model '{self.model_name}' via {self.auto_class.__name__}") + + return self.auto_class.from_pretrained( + self.model_name, + **self.configure_model_kwargs(), + ) + + def load_tokenizer(self) -> AutoTokenizer: + """Load Hugging Face tokenizer.""" + logger.log_rank_zero(f"Loading tokenizer '{self.tokenizer_name}'") + tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) + insert_pad_token(tokenizer) + return tokenizer From defab15b4c8cbbc1e43cc09dbdd449dd6034308b Mon Sep 17 00:00:00 2001 From: Swati Allabadi Date: Wed, 10 Dec 2025 21:16:16 +0000 Subject: [PATCH 2/3] Adding unit test cases Signed-off-by: Swati Allabadi --- .../finetune/experimental/core/model.py | 15 +- .../finetune/experimental/tests/test_model.py | 143 ++++++++++++++++++ 2 files changed, 150 insertions(+), 8 deletions(-) create mode 100644 QEfficient/finetune/experimental/tests/test_model.py diff --git a/QEfficient/finetune/experimental/core/model.py b/QEfficient/finetune/experimental/core/model.py index 1903a0f1e..2f967d85d 100644 --- a/QEfficient/finetune/experimental/core/model.py +++ b/QEfficient/finetune/experimental/core/model.py @@ -9,13 +9,13 @@ from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type -import torch import torch.nn as nn -from transformers import AutoTokenizer, BitsAndBytesConfig +from transformers import AutoTokenizer import transformers +from transformers.utils.logging import get_logger from QEfficient.finetune.experimental.core.component_registry import registry -from QEfficient.finetune.experimental.utils.dataset_helper import insert_pad_token +from QEfficient.finetune.experimental.core.utils.dataset_utils import insert_pad_token logger = get_logger(__name__) @@ -38,7 +38,6 @@ def create(cls, model_name: str, **model_kwargs: Any) -> "BaseModel": if not isinstance(module, nn.Module): raise TypeError(f"load_model() must return nn.Module, got {type(module)}") obj._model = module - obj.add_module("_wrapped_model", module) # register return obj @abstractmethod @@ -70,14 +69,14 @@ def forward(self, *args, **kwargs): def get_input_embeddings(self): if hasattr(self.model, "get_input_embeddings"): return self.model.get_input_embeddings() - logger.log_rank_zero(f"Model {self.model_name} does not expose input embeddings", logging.WARNING) + logger.info(f"Model {self.model_name} does not expose input embeddings", logging.WARNING) return None def resize_token_embeddings(self, new_num_tokens: int) -> None: if hasattr(self.model, "resize_token_embeddings"): self.model.resize_token_embeddings(new_num_tokens) else: - logger.log_rank_zero(f"Model {self.model_name} cannot resize token embeddings", logging.WARNING) + logger.info(f"Model {self.model_name} cannot resize token embeddings", logging.WARNING) # optional def to(self, *args, **kwargs): @@ -134,7 +133,7 @@ def configure_model_kwargs(self) -> Dict[str, Any]: return extra def load_model(self) -> nn.Module: - logger.log_rank_zero(f"Loading HuggingFace model '{self.model_name}' via {self.auto_class.__name__}") + logger.info(f"Loading HuggingFace model '{self.model_name}' via {self.auto_class.__name__}") return self.auto_class.from_pretrained( self.model_name, @@ -143,7 +142,7 @@ def load_model(self) -> nn.Module: def load_tokenizer(self) -> AutoTokenizer: """Load Hugging Face tokenizer.""" - logger.log_rank_zero(f"Loading tokenizer '{self.tokenizer_name}'") + logger.info(f"Loading tokenizer '{self.tokenizer_name}'") tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name) insert_pad_token(tokenizer) return tokenizer diff --git a/QEfficient/finetune/experimental/tests/test_model.py b/QEfficient/finetune/experimental/tests/test_model.py new file mode 100644 index 000000000..5174f971f --- /dev/null +++ b/QEfficient/finetune/experimental/tests/test_model.py @@ -0,0 +1,143 @@ +import pytest +import torch +import torch.nn as nn +from unittest import mock + +import transformers +from QEfficient.finetune.experimental.core import model +from QEfficient.finetune.experimental.core.model import BaseModel, HFModel + + +class TestMockModel(nn.Module): + def __init__(self): + super().__init__() + self.linear = nn.Linear(2, 2) + + def forward(self, x): + return self.linear(x) + + +class TestCustomModel(BaseModel): + def __init__(self, model_name): + super().__init__(model_name) + print("init of custom class") + + def load_model(self) -> nn.Module: + return TestMockModel() + + def load_tokenizer(self): + return "dummy-tokenizer" + + +# BaseModel tests +def test_model_property_errors_if_not_created(): + m = TestCustomModel("dummy") + with pytest.raises(RuntimeError): + _ = m.model # must call .create() + + +def test_create_builds_and_registers(): + breakpoint() + m = TestCustomModel.create("dummy") + # inner model exists and registered + assert "_model" in m._modules + assert isinstance(m.model, TestMockModel) + # forward works + out = m(torch.zeros(1, 2)) + assert out.shape == (1, 2) + + +def test_tokenizer_lazy_loading(): + m = TestCustomModel.create("dummy") + assert m._tokenizer is None + tok = m.tokenizer + assert tok == "dummy-tokenizer" + assert m._tokenizer == tok + + +def test_to_moves_inner_and_returns_self(): + m = TestCustomModel.create("dummy") + with mock.patch.object(TestMockModel, "to", autospec=True) as mocked_to: + ret = m.to("cuda:0") + mocked_to.assert_called_once_with(m.model, "cuda:0") + assert ret is m + + +def test_train_eval_sync_flags(): + m = TestCustomModel.create("dummy") + m.eval() + assert m.training is False + assert m.model.training is False + m.train() + assert m.training is True + assert m.model.training is True + + +def test_resize_token_embeddings_and_get_input_embeddings_warn(monkeypatch): + m = TestCustomModel.create("dummy") + + # resize_token_embeddings: underlying model lacks the method, should warn and not raise + with mock.patch("QEfficient.finetune.experimental.core.model.logger.info") as mocked_log: + m.resize_token_embeddings(10) + mocked_log.assert_called_once() + + # get_input_embeddings: underlying model lacks method, should warn and return None + with mock.patch("QEfficient.finetune.experimental.core.model.logger.info") as mocked_log: + assert m.get_input_embeddings() is None + mocked_log.assert_called_once() + + +def test_state_dict_contains_inner_params(): + m = TestCustomModel.create("dummy") + sd = m.state_dict() + # should contain params from TestMockModel.linear + assert any("linear.weight" in k for k in sd) + assert any("linear.bias" in k for k in sd) + + +# HFModel tests +def test_hfmodel_invalid_auto_class_raises(): + with pytest.raises(ValueError): + HFModel.create("hf-name", auto_class_name="AutoDoesNotExist") + + +def test_hfmodel_loads_auto_and_tokenizer(monkeypatch): + # fake HF Auto class + class FakeAuto(nn.Module): + @classmethod + def from_pretrained(cls, name, **kwargs): + inst = cls() + inst.loaded = (name, kwargs) + return inst + + def forward(self, x): + return x + + fake_tok = mock.Mock() + + # Monkeypatch transformer classes used in HFModel + monkeypatch.setattr( + "QEfficient.finetune.experimental.core.model.transformers.AutoModelForCausalLM", + FakeAuto, + raising=False, + ) + monkeypatch.setattr( + model, + "AutoTokenizer", + mock.Mock(from_pretrained=mock.Mock(return_value=fake_tok)), + ) + monkeypatch.setattr( + "QEfficient.finetune.experimental.core.model.insert_pad_token", + mock.Mock(), + raising=False, + ) + + m = HFModel.create("hf-name") + assert isinstance(m.model, FakeAuto) + + # load tokenizer + tok = m.load_tokenizer() + + # tokenizer was loaded and pad token inserted + model.AutoTokenizer.from_pretrained.assert_called_once_with("hf-name") + model.insert_pad_token.assert_called_once_with(fake_tok) From 51ae86a914f23ef8bcbfaecad28295b29aa60991 Mon Sep 17 00:00:00 2001 From: Swati Allabadi Date: Fri, 19 Dec 2025 09:31:09 +0000 Subject: [PATCH 3/3] Addressing review comments Signed-off-by: Swati Allabadi --- .../experimental/core/component_registry.py | 12 ++++++++- .../finetune/experimental/core/model.py | 5 ++-- .../finetune/experimental/tests/test_model.py | 27 ++++++++++++------- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/QEfficient/finetune/experimental/core/component_registry.py b/QEfficient/finetune/experimental/core/component_registry.py index 7744d71e6..d1f948031 100644 --- a/QEfficient/finetune/experimental/core/component_registry.py +++ b/QEfficient/finetune/experimental/core/component_registry.py @@ -5,7 +5,6 @@ # # ----------------------------------------------------------------------------- - import logging from typing import Callable, Dict, Optional, Type @@ -198,3 +197,14 @@ def list_callbacks(self) -> list[str]: # Global registry instance registry = ComponentRegistry() + + +class ComponentFactory: + @staticmethod + def create_model(model_type: str, model_name: str, **kwargs) -> any: + """Create a model instance.""" + model_class = registry.get_model(model_type) + if model_class is None: + raise ValueError(f"Unknown model: {model_type}. Available: {registry.list_models()}") + model_instance = model_class.create(model_name, **kwargs) + return model_instance diff --git a/QEfficient/finetune/experimental/core/model.py b/QEfficient/finetune/experimental/core/model.py index 2f967d85d..57e4f9761 100644 --- a/QEfficient/finetune/experimental/core/model.py +++ b/QEfficient/finetune/experimental/core/model.py @@ -34,6 +34,7 @@ def __init__(self, model_name: str, **model_kwargs: Any) -> None: @classmethod def create(cls, model_name: str, **model_kwargs: Any) -> "BaseModel": obj = cls(model_name, **model_kwargs) + # load model after __init__ finishes module = obj.load_model() if not isinstance(module, nn.Module): raise TypeError(f"load_model() must return nn.Module, got {type(module)}") @@ -42,8 +43,8 @@ def create(cls, model_name: str, **model_kwargs: Any) -> "BaseModel": @abstractmethod def load_model(self) -> nn.Module: - """Create and return the underlying torch.nn.Module.""" - ... + """Load and return the underlying torch.nn.Module.""" + pass def load_tokenizer(self) -> Any: """Override if the model exposes a tokenizer.""" diff --git a/QEfficient/finetune/experimental/tests/test_model.py b/QEfficient/finetune/experimental/tests/test_model.py index 5174f971f..6bc031dc3 100644 --- a/QEfficient/finetune/experimental/tests/test_model.py +++ b/QEfficient/finetune/experimental/tests/test_model.py @@ -1,3 +1,10 @@ +# ----------------------------------------------------------------------------- +# +# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. +# SPDX-License-Identifier: BSD-3-Clause +# +# ----------------------------------------------------------------------------- + import pytest import torch import torch.nn as nn @@ -6,6 +13,8 @@ import transformers from QEfficient.finetune.experimental.core import model from QEfficient.finetune.experimental.core.model import BaseModel, HFModel +from QEfficient.finetune.experimental.core.component_registry import registry +from QEfficient.finetune.experimental.core.component_registry import ComponentFactory class TestMockModel(nn.Module): @@ -17,6 +26,7 @@ def forward(self, x): return self.linear(x) +@registry.model("testcustom") class TestCustomModel(BaseModel): def __init__(self, model_name): super().__init__(model_name) @@ -37,8 +47,7 @@ def test_model_property_errors_if_not_created(): def test_create_builds_and_registers(): - breakpoint() - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") # inner model exists and registered assert "_model" in m._modules assert isinstance(m.model, TestMockModel) @@ -48,7 +57,7 @@ def test_create_builds_and_registers(): def test_tokenizer_lazy_loading(): - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") assert m._tokenizer is None tok = m.tokenizer assert tok == "dummy-tokenizer" @@ -56,7 +65,7 @@ def test_tokenizer_lazy_loading(): def test_to_moves_inner_and_returns_self(): - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") with mock.patch.object(TestMockModel, "to", autospec=True) as mocked_to: ret = m.to("cuda:0") mocked_to.assert_called_once_with(m.model, "cuda:0") @@ -64,7 +73,7 @@ def test_to_moves_inner_and_returns_self(): def test_train_eval_sync_flags(): - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") m.eval() assert m.training is False assert m.model.training is False @@ -74,7 +83,7 @@ def test_train_eval_sync_flags(): def test_resize_token_embeddings_and_get_input_embeddings_warn(monkeypatch): - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") # resize_token_embeddings: underlying model lacks the method, should warn and not raise with mock.patch("QEfficient.finetune.experimental.core.model.logger.info") as mocked_log: @@ -88,7 +97,7 @@ def test_resize_token_embeddings_and_get_input_embeddings_warn(monkeypatch): def test_state_dict_contains_inner_params(): - m = TestCustomModel.create("dummy") + m = ComponentFactory.create_model("testcustom", "dummy") sd = m.state_dict() # should contain params from TestMockModel.linear assert any("linear.weight" in k for k in sd) @@ -98,7 +107,7 @@ def test_state_dict_contains_inner_params(): # HFModel tests def test_hfmodel_invalid_auto_class_raises(): with pytest.raises(ValueError): - HFModel.create("hf-name", auto_class_name="AutoDoesNotExist") + ComponentFactory.create_model("hf", "hf-name", auto_class_name="AutoDoesNotExist") def test_hfmodel_loads_auto_and_tokenizer(monkeypatch): @@ -131,7 +140,7 @@ def forward(self, x): mock.Mock(), raising=False, ) - + m = ComponentFactory.create_model("hf", "hf-name") m = HFModel.create("hf-name") assert isinstance(m.model, FakeAuto)