togethercomputer · stainless-app · Jan 5, 2026 · Dec 17, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.0.0-alpha.11"
+  ".": "2.0.0-alpha.12"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 44
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-817bdc0e9a5082575f07386056968f56af20cbc40cbbc716ab4b8c4ec9220b53.yml
-openapi_spec_hash: 30b3f6d251dfd02bca8ffa3f755e7574
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-a12a8f76dabc5f09bc2925b8d82b9d365c82b5eb5d183afcffd9f459dc058bcb.yml
+openapi_spec_hash: 5d5efee2b3de68d1a572788125dbf77d
 config_hash: 9749f2f8998aa6b15452b2187ff675b9
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,30 @@
 # Changelog
 
+## 2.0.0-alpha.12 (2026-01-05)
+
+Full Changelog: [v2.0.0-alpha.11...v2.0.0-alpha.12](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.11...v2.0.0-alpha.12)
+
+### Features
+
+* Support VLM finetuning ([e4428b3](https://github.com/togethercomputer/together-py/commit/e4428b3c86080286643b0e287ff02ac6b8cd3864))
+* VLM Support update ([97c74a3](https://github.com/togethercomputer/together-py/commit/97c74a38da1ea0a7717b0172f5cd65bb85bcaee4))
+
+
+### Bug Fixes
+
+* use async_to_httpx_files in patch method ([dc293e6](https://github.com/togethercomputer/together-py/commit/dc293e68b49cce5b0c8437e94152e369bb09b625))
+
+
+### Chores
+
+* **internal:** add `--fix` argument to lint script ([c29463d](https://github.com/togethercomputer/together-py/commit/c29463dbe8a18fa02bf436ae4cbdd6b59644e641))
+* **internal:** codegen related update ([f7499fc](https://github.com/togethercomputer/together-py/commit/f7499fcd931834fcd16210cd25e14dc5b328fb0e))
+
+
+### Documentation
+
+* add more examples ([a048344](https://github.com/togethercomputer/together-py/commit/a048344c0daeeab4d7fefd41d3554bde860dd9d5))
+
 ## 2.0.0-alpha.11 (2025-12-16)
 
 Full Changelog: [v2.0.0-alpha.10...v2.0.0-alpha.11](https://github.com/togethercomputer/together-py/compare/v2.0.0-alpha.10...v2.0.0-alpha.11)

diff --git a/LICENSE b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2025 Together
+   Copyright 2026 Together
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/README.md b/README.md
@@ -137,7 +137,7 @@ stream = client.chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "Say this is a test",
+            "content": "Say this is a test!",
         }
     ],
     model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
@@ -158,7 +158,7 @@ stream = await client.chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "Say this is a test",
+            "content": "Say this is a test!",
         }
     ],
     model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "2.0.0-alpha.11"
+version = "2.0.0-alpha.12"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"

diff --git a/scripts/lint b/scripts/lint
@@ -4,8 +4,13 @@ set -e
 
 cd "$(dirname "$0")/.."
 
-echo "==> Running ruff"
-uv run ruff check .
+if [ "$1" = "--fix" ]; then
+  echo "==> Running ruff with --fix"
+  uv run ruff check . --fix
+else
+  echo "==> Running ruff"
+  uv run ruff check .
+fi
 
 echo "==> Running pyright"
 uv run pyright

diff --git a/src/together/_base_client.py b/src/together/_base_client.py
@@ -1774,7 +1774,7 @@ async def patch(
         options: RequestOptions = {},
     ) -> ResponseT:
         opts = FinalRequestOptions.construct(
-            method="patch", url=path, json_data=body, files=to_httpx_files(files), **options
+            method="patch", url=path, json_data=body, files=await async_to_httpx_files(files), **options
         )
         return await self.request(cast_to, opts)
 

diff --git a/src/together/_version.py b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "2.0.0-alpha.11"  # x-release-please-version
+__version__ = "2.0.0-alpha.12"  # x-release-please-version
diff --git a/src/together/lib/cli/api/fine_tuning.py b/src/together/lib/cli/api/fine_tuning.py
@@ -176,6 +176,12 @@ def fine_tuning(ctx: click.Context) -> None:
     help="Whether to mask the user messages in conversational data or prompts in instruction data. "
     "`auto` will automatically determine whether to mask the inputs based on the data format.",
 )
+@click.option(
+    "--train-vision",
+    type=bool,
+    default=False,
+    help="Whether to train the vision encoder. Only supported for multimodal models.",
+)
 @click.option(
     "--from-checkpoint",
     type=str,
@@ -231,6 +237,7 @@ def create(
     lora_dropout: float | None,
     lora_alpha: float | None,
     lora_trainable_modules: str | None,
+    train_vision: bool,
     suffix: str | None,
     wandb_api_key: str | None,
     wandb_base_url: str | None,
@@ -272,6 +279,7 @@ def create(
         lora_dropout=lora_dropout,
         lora_alpha=lora_alpha,
         lora_trainable_modules=lora_trainable_modules,
+        train_vision=train_vision,
         suffix=suffix,
         wandb_api_key=wandb_api_key,
         wandb_base_url=wandb_base_url,
@@ -363,6 +371,10 @@ def create(
             simpo_gamma=simpo_gamma or 0,
         )
 
+    if model_limits.supports_vision:
+        # Don't show price estimation for multimodal models yet
+        confirm = True
+
     finetune_price_estimation_result = client.fine_tuning.estimate_price(
         training_file=training_file,
         validation_file=validation_file,
@@ -426,9 +438,7 @@ def list(ctx: click.Context) -> None:
                 "Price": f"""${
                     finetune_price_to_dollars(float(str(i.total_price)))
                 }""",  # convert to string for mypy typing
-                "Progress": generate_progress_bar(
-                    i, datetime.now().astimezone(), use_rich=False
-                ),
+                "Progress": generate_progress_bar(i, datetime.now().astimezone(), use_rich=False),
             }
         )
     table = tabulate(display_list, headers="keys", tablefmt="grid", showindex=True)
@@ -449,9 +459,7 @@ def retrieve(ctx: click.Context, fine_tune_id: str) -> None:
     response.events = None
 
     rprint(JSON.from_data(response.model_json_schema()))
-    progress_text = generate_progress_bar(
-        response, datetime.now().astimezone(), use_rich=True
-    )
+    progress_text = generate_progress_bar(response, datetime.now().astimezone(), use_rich=True)
     prefix = f"Status: [bold]{response.status}[/bold],"
     rprint(f"{prefix} {progress_text}")
 

diff --git a/src/together/lib/cli/api/utils.py b/src/together/lib/cli/api/utils.py
@@ -28,9 +28,7 @@ def convert(  # pyright: ignore[reportImplicitOverride]
             return int(value)
         except ValueError:
             self.fail(
-                _("{value!r} is not a valid {number_type}.").format(
-                    value=value, number_type=self.name
-                ),
+                _("{value!r} is not a valid {number_type}.").format(value=value, number_type=self.name),
                 param,
                 ctx,
             )
@@ -39,7 +37,7 @@ def convert(  # pyright: ignore[reportImplicitOverride]
 class BooleanWithAutoParamType(click.ParamType):
     name = "boolean_or_auto"
 
-    def convert( # pyright: ignore[reportImplicitOverride]
+    def convert(  # pyright: ignore[reportImplicitOverride]
         self, value: str, param: click.Parameter | None, ctx: click.Context | None
     ) -> bool | Literal["auto"] | None:
         if value == "auto":
@@ -48,9 +46,7 @@ def convert( # pyright: ignore[reportImplicitOverride]
             return bool(value)
         except ValueError:
             self.fail(
-                _("{value!r} is not a valid {type}.").format(
-                    value=value, type=self.name
-                ),
+                _("{value!r} is not a valid {type}.").format(value=value, type=self.name),
                 param,
                 ctx,
             )
@@ -119,17 +115,13 @@ def generate_progress_bar(
                 return progress
 
             elapsed_time = (current_time - update_at).total_seconds()
-            ratio_filled = min(
-                elapsed_time / finetune_job.progress.seconds_remaining, 1.0
-            )
+            ratio_filled = min(elapsed_time / finetune_job.progress.seconds_remaining, 1.0)
             percentage = ratio_filled * 100
             filled = math.ceil(ratio_filled * _PROGRESS_BAR_WIDTH)
             bar = "█" * filled + "░" * (_PROGRESS_BAR_WIDTH - filled)
             time_left = "N/A"
             if finetune_job.progress.seconds_remaining > elapsed_time:
-                time_left = _human_readable_time(
-                    finetune_job.progress.seconds_remaining - elapsed_time
-                )
+                time_left = _human_readable_time(finetune_job.progress.seconds_remaining - elapsed_time)
             time_text = f"{time_left} left"
             progress = f"Progress: {bar} [bold]{percentage:>3.0f}%[/bold] [yellow]{time_text}[/yellow]"
 

diff --git a/src/together/lib/constants.py b/src/together/lib/constants.py
@@ -37,6 +37,12 @@
 # maximum number of GB sized files we support finetuning for
 MAX_FILE_SIZE_GB = 50.1
 
+# Multimodal limits
+MAX_IMAGES_PER_EXAMPLE = 10
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10MB
+# Max length = Header length + base64 factor (4/3) * image bytes
+MAX_BASE64_IMAGE_LENGTH = len("data:image/jpeg;base64,") + 4 * MAX_IMAGE_BYTES // 3
+
 # expected columns for Parquet files
 PARQUET_EXPECTED_COLUMNS = ["input_ids", "attention_mask", "labels"]
 

diff --git a/src/together/lib/resources/fine_tuning.py b/src/together/lib/resources/fine_tuning.py
@@ -22,6 +22,7 @@
     CosineLRSchedulerArgs,
     LinearLRSchedulerArgs,
     FinetuneTrainingLimits,
+    FinetuneMultimodalParams,
 )
 
 AVAILABLE_TRAINING_METHODS = {
@@ -51,6 +52,7 @@ def create_finetune_request(
     lora_dropout: float | None = 0,
     lora_alpha: float | None = None,
     lora_trainable_modules: str | None = "all-linear",
+    train_vision: bool = False,
     suffix: str | None = None,
     wandb_api_key: str | None = None,
     wandb_base_url: str | None = None,
@@ -207,6 +209,13 @@ def create_finetune_request(
             simpo_gamma=simpo_gamma,
         )
 
+    if model_limits.supports_vision:
+        multimodal_params = FinetuneMultimodalParams(train_vision=train_vision)
+    elif not model_limits.supports_vision and train_vision:
+        raise ValueError(f"Vision encoder training is not supported for the non-multimodal model `{model}`")
+    else:
+        multimodal_params = None
+
     finetune_request = FinetuneRequest(
         model=model,
         training_file=training_file,
@@ -227,6 +236,7 @@ def create_finetune_request(
         wandb_project_name=wandb_project_name,
         wandb_name=wandb_name,
         training_method=training_method_cls,  # pyright: ignore[reportPossiblyUnboundVariable]
+        multimodal_params=multimodal_params,
         from_checkpoint=from_checkpoint,
         from_hf_model=from_hf_model,
         hf_model_revision=hf_model_revision,
@@ -238,7 +248,10 @@ def create_finetune_request(
 
     return finetune_request, training_type_pe, training_method_pe
 
-def create_price_estimation_params(finetune_request: FinetuneRequest) -> tuple[pe_params.TrainingType, pe_params.TrainingMethod]:
+
+def create_price_estimation_params(
+    finetune_request: FinetuneRequest,
+) -> tuple[pe_params.TrainingType, pe_params.TrainingMethod]:
     training_type_cls: pe_params.TrainingType
     if isinstance(finetune_request.training_type, FullTrainingType):
         training_type_cls = pe_params.TrainingTypeFullTrainingType(
@@ -275,6 +288,7 @@ def create_price_estimation_params(finetune_request: FinetuneRequest) -> tuple[p
 
     return training_type_cls, training_method_cls
 
+
 def get_model_limits(client: Together, model: str) -> FinetuneTrainingLimits:
     """
     Requests training limits for a specific model

diff --git a/src/together/lib/types/fine_tuning.py b/src/together/lib/types/fine_tuning.py
@@ -189,6 +189,7 @@ class TrainingMethodUnknown(BaseModel):
 
     method: str
 
+
 TrainingMethod: TypeAlias = Union[
     TrainingMethodSFT,
     TrainingMethodDPO,
@@ -202,6 +203,7 @@ class FinetuneTrainingLimits(BaseModel):
     min_learning_rate: float
     full_training: Optional[FinetuneFullTrainingLimits] = None
     lora_training: Optional[FinetuneLoraTrainingLimits] = None
+    supports_vision: bool = False
 
 
 class LinearLRSchedulerArgs(BaseModel):
@@ -249,6 +251,7 @@ class EmptyLRScheduler(BaseModel):
     lr_scheduler_type: Literal[""]
     lr_scheduler_args: None = None
 
+
 class UnknownLRScheduler(BaseModel):
     """
     Unknown learning rate scheduler
@@ -268,6 +271,14 @@ class UnknownLRScheduler(BaseModel):
 ]
 
 
+class FinetuneMultimodalParams(BaseModel):
+    """
+    Multimodal parameters
+    """
+
+    train_vision: bool = False
+
+
 class FinetuneProgress(BaseModel):
     """
     Fine-tune job progress
@@ -303,6 +314,9 @@ class FinetuneResponse(BaseModel):
     from_checkpoint: Optional[str] = None
     """Checkpoint used to continue training"""
 
+    multimodal_params: Optional[FinetuneMultimodalParams] = None
+    """Multimodal parameters"""
+
     from_hf_model: Optional[str] = None
     """Hugging Face Hub repo to start training from"""
 
@@ -467,6 +481,9 @@ class FinetuneRequest(BaseModel):
     training_method: TrainingMethod = Field(default_factory=TrainingMethodSFT)
     # from step
     from_checkpoint: Union[str, None] = None
+    # multimodal parameters
+    multimodal_params: Union[FinetuneMultimodalParams, None] = None
+    # hugging face related fields
     from_hf_model: Union[str, None] = None
     hf_model_revision: Union[str, None] = None
     # hf related fields