diff --git a/.gitignore b/.gitignore
index 9edc9823c..c9d5349f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,3 +68,4 @@ docs/cuopt/build
 cpp/include/cuopt/semantic_version.hpp
 !datasets/quadratic_programming
 !datasets/quadratic_programming/**
+dev_scripts/test_c_api
diff --git a/GRPC_ARCHITECTURE.md b/GRPC_ARCHITECTURE.md
new file mode 100644
index 000000000..817ba742e
--- /dev/null
+++ b/GRPC_ARCHITECTURE.md
@@ -0,0 +1,82 @@
+# gRPC-based Remote Solve Architecture
+
+## Overview
+
+cuOpt remote solve uses gRPC for transport and protobuf-generated stubs for the service API.
+The request/response payloads are serialized with a protobuf-based serializer that maps
+cuOpt data structures to protobuf messages. This preserves existing semantics while
+moving the network layer to a standard, well-supported RPC stack.
+
+## Service Definition
+
+The gRPC service is defined in `cpp/src/linear_programming/utilities/cuopt_remote_service.proto`
+and imports the message schema in `cuopt_remote.proto`. Code is generated by `protoc`
+plus `grpc_cpp_plugin` during the build.
+
+Core RPCs include:
+
+- `SubmitJob` / `UploadAndSubmit`
+- `CheckStatus`
+- `GetResult` / `StreamResult`
+- `StreamLogs`
+- `CancelJob`
+- `DeleteResult`
+- `GetIncumbents`
+
+## Components
+
+### gRPC Server (`cuopt_grpc_server`)
+
+- Source: `cpp/cuopt_grpc_server.cpp`
+- Implements `CuOptRemoteService` and owns the worker process pool.
+- Workers communicate with the main server process via shared memory + pipes.
+- For results, the server calls `to_host()` before serialization.
+- Supports streaming logs and incumbents through gRPC streaming endpoints.
+
+### gRPC Client Path (C++)
+
+- Client logic lives in `cpp/src/linear_programming/utilities/remote_solve_grpc.cpp`
+  and is used by `remote_solve.cu` and `cuopt_cli`.
+- The client serializes problems using the protobuf serializer, submits them
+  via gRPC, and deserializes results back into cuOpt solution objects.
+
+### Serialization Layer
+
+- Default serializer: `cpp/src/linear_programming/utilities/protobuf_serializer.cu`
+- Interface: `cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp`
+- Optional plugin override: `CUOPT_SERIALIZER_LIB` can load a custom serializer.
+- The serializer uses protobuf message types defined in `cuopt_remote.proto`.
+
+## Data Flow (LP/MIP)
+
+1. Client builds a problem (LP/MIP).
+2. Serializer converts the problem + settings into protobuf bytes.
+3. gRPC `SubmitJob` or `UploadAndSubmit` sends the bytes to the server.
+4. Server deserializes to cuOpt data structures.
+5. Server runs `solve_lp` / `solve_mip` in a worker process.
+6. Server calls `to_host()` and serializes the solution to protobuf bytes.
+7. Client retrieves results via `GetResult` / `StreamResult` and deserializes.
+
+## Generated Code (protoc output)
+
+Generated files are written to the CMake binary directory (not checked into source):
+
+- `cuopt_remote.pb.cc/.h`
+- `cuopt_remote_service.pb.cc/.h`
+- `cuopt_remote_service.grpc.pb.cc/.h`
+
+## Build Integration
+
+`cpp/CMakeLists.txt` drives code generation:
+
+- Locates `protoc` and `grpc_cpp_plugin`
+- Runs `protoc` to generate the `*.pb.cc/.h` sources
+- Adds generated sources to the `cuopt` library
+- Builds `cuopt_grpc_server` only when gRPC is available
+
+## Security Notes
+
+- Service stubs and message parsing are generated by `protoc` and `grpc_cpp_plugin`.
+- Payload serialization uses protobuf message APIs rather than hand-written parsing.
+- gRPC provides HTTP/2 framing, flow control, and standard status codes.
+```
diff --git a/build.sh b/build.sh
index b5c35f510..330f7c0eb 100755
--- a/build.sh
+++ b/build.sh
@@ -15,7 +15,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd)
 LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build}
 LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build}
 
-VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
+VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client cuopt_grpc_server docs deb -a -b -g -fsanitize -tsan -msan -v -l= --verbose-pdlp --build-lp-only  --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"<args>\\\"] [--cache-tool=<tool>] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help"
 HELP="$0 [<target> ...] [<flag> ...]
  where <target> is:
    clean            - remove all existing build artifacts and configuration (start over)
@@ -25,6 +25,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    cuopt            - build the cuopt Python package
    cuopt_server     - build the cuopt_server Python package
    cuopt_sh_client  - build cuopt self host client
+  cuopt_grpc_server  - build the cuopt gRPC server executable (prototype)
    docs             - build the docs
    deb              - build deb package (requires libcuopt to be built first)
  and <flag> is:
@@ -53,7 +54,7 @@ HELP="$0 [<target> ...] [<flag> ...]
    --show_depr_warn - show cmake deprecation warnings
    -h               - print this text
 
- default action (no args) is to build and install 'libcuopt' then 'cuopt' then 'docs' targets
+ default action (no args) is to build and install 'libcuopt', 'cuopt', 'cuopt_grpc_server', then 'docs'
 
  libcuopt build dir is: ${LIBCUOPT_BUILD_DIR}
 
@@ -171,6 +172,13 @@ function cmakeArgs {
             ARGS=${ARGS//$EXTRA_CMAKE_ARGS/}
             # Filter the full argument down to just the extra string that will be added to cmake call
             EXTRA_CMAKE_ARGS=$(echo "$EXTRA_CMAKE_ARGS" | grep -Eo "\".+\"" | sed -e 's/^"//' -e 's/"$//')
+        else
+            # Support unquoted --cmake-args=VALUE form.
+            EXTRA_CMAKE_ARGS=$(echo "$ARGS" | { grep -Eo "\-\-cmake\-args=[^ ]+" || true; })
+            if [[ -n ${EXTRA_CMAKE_ARGS} ]]; then
+                ARGS=${ARGS//$EXTRA_CMAKE_ARGS/}
+                EXTRA_CMAKE_ARGS=${EXTRA_CMAKE_ARGS#--cmake-args=}
+            fi
         fi
     fi
 
@@ -280,6 +288,52 @@ if ! contains_string "DFIND_MPS_PARSER_CPP" "${EXTRA_CMAKE_ARGS[@]}"; then
     EXTRA_CMAKE_ARGS+=("-DFIND_MPS_PARSER_CPP=ON")
 fi
 
+# Prefer config packages to avoid mixing system and conda find modules.
+if ! contains_string "CMAKE_FIND_PACKAGE_PREFER_CONFIG" "${EXTRA_CMAKE_ARGS[@]}"; then
+    EXTRA_CMAKE_ARGS+=("-DCMAKE_FIND_PACKAGE_PREFER_CONFIG=ON")
+fi
+
+# Default to the active install prefix for dependency lookup unless overridden.
+if ! contains_string "CMAKE_PREFIX_PATH" "${EXTRA_CMAKE_ARGS[@]}"; then
+    EXTRA_CMAKE_ARGS+=("-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX}")
+fi
+
+# If conda-provided protobuf/grpc configs are available, prefer them by default.
+if [ -d "${INSTALL_PREFIX}/lib/cmake/protobuf" ]; then
+    if ! contains_string "Protobuf_DIR" "${EXTRA_CMAKE_ARGS[@]}"; then
+        EXTRA_CMAKE_ARGS+=("-DProtobuf_DIR=${INSTALL_PREFIX}/lib/cmake/protobuf")
+    fi
+fi
+if [ -d "${INSTALL_PREFIX}/lib/cmake/grpc" ]; then
+    if ! contains_string "gRPC_DIR" "${EXTRA_CMAKE_ARGS[@]}"; then
+        EXTRA_CMAKE_ARGS+=("-DgRPC_DIR=${INSTALL_PREFIX}/lib/cmake/grpc")
+    fi
+fi
+
+# Prefer conda's ZLIB config if available to avoid system static zlib references.
+if [ -d "${INSTALL_PREFIX}/lib/cmake/ZLIB" ]; then
+    if ! contains_string "ZLIB_DIR" "${EXTRA_CMAKE_ARGS[@]}"; then
+        EXTRA_CMAKE_ARGS+=("-DZLIB_DIR=${INSTALL_PREFIX}/lib/cmake/ZLIB")
+    fi
+fi
+
+# Avoid pulling system ZLIB config that references missing libz.a.
+if [ -d "/usr/lib64/cmake/ZLIB" ] || [ -d "/lib64/cmake/ZLIB" ]; then
+    if ! contains_string "CMAKE_IGNORE_PATH" "${EXTRA_CMAKE_ARGS[@]}"; then
+        EXTRA_CMAKE_ARGS+=("-DCMAKE_IGNORE_PATH=/usr/lib64/cmake/ZLIB;/lib64/cmake/ZLIB")
+    fi
+fi
+
+# Prefer shared zlib if FindZLIB is used.
+if ! contains_string "ZLIB_USE_STATIC_LIBS" "${EXTRA_CMAKE_ARGS[@]}"; then
+    EXTRA_CMAKE_ARGS+=("-DZLIB_USE_STATIC_LIBS=OFF")
+fi
+
+# Hint FindZLIB to use the active prefix if no config is found.
+if ! contains_string "ZLIB_ROOT" "${EXTRA_CMAKE_ARGS[@]}"; then
+    EXTRA_CMAKE_ARGS+=("-DZLIB_ROOT=${INSTALL_PREFIX}")
+fi
+
 # If clean given, run it prior to any other steps
 if hasArg clean; then
     # If the dirs to clean are mounted dirs in a container, the
@@ -362,6 +416,13 @@ fi
 if buildAll || hasArg libcuopt; then
     mkdir -p "${LIBCUOPT_BUILD_DIR}"
     cd "${LIBCUOPT_BUILD_DIR}"
+    # If the cache points at system ZLIB, clear it so updated hints take effect.
+    if [ -f "${LIBCUOPT_BUILD_DIR}/CMakeCache.txt" ]; then
+        if grep -Eq "/(usr/)?lib64/cmake/ZLIB" "${LIBCUOPT_BUILD_DIR}/CMakeCache.txt"; then
+            rm -f "${LIBCUOPT_BUILD_DIR}/CMakeCache.txt"
+            rm -rf "${LIBCUOPT_BUILD_DIR}/CMakeFiles"
+        fi
+    fi
     cmake -DDEFINE_ASSERT=${DEFINE_ASSERT} \
           -DDEFINE_BENCHMARK="${DEFINE_BENCHMARK}" \
           -DDEFINE_PDLP_VERBOSE_MODE=${DEFINE_PDLP_VERBOSE_MODE} \
@@ -394,6 +455,49 @@ if buildAll || hasArg libcuopt; then
     fi
 fi
 
+################################################################################
+# Build the cuopt gRPC server (prototype)
+if buildAll || hasArg cuopt_grpc_server; then
+    mkdir -p "${LIBCUOPT_BUILD_DIR}"
+    cd "${LIBCUOPT_BUILD_DIR}"
+
+    # Ensure gRPC is enabled and configured in this build directory.
+    cmake -DDEFINE_ASSERT=${DEFINE_ASSERT} \
+          -DDEFINE_BENCHMARK="${DEFINE_BENCHMARK}" \
+          -DDEFINE_PDLP_VERBOSE_MODE=${DEFINE_PDLP_VERBOSE_MODE} \
+          -DLIBCUOPT_LOGGING_LEVEL="${LOGGING_ACTIVE_LEVEL}" \
+          -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" \
+          -DCMAKE_CUDA_ARCHITECTURES=${CUOPT_CMAKE_CUDA_ARCHITECTURES} \
+          -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
+          -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+          -DFETCH_RAPIDS=${FETCH_RAPIDS} \
+          -DBUILD_LP_ONLY=${BUILD_LP_ONLY} \
+          -DBUILD_SANITIZER=${BUILD_SANITIZER} \
+          -DBUILD_TSAN=${BUILD_TSAN} \
+          -DBUILD_MSAN=${BUILD_MSAN} \
+          -DSKIP_C_PYTHON_ADAPTERS=${SKIP_C_PYTHON_ADAPTERS} \
+          -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \
+          -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \
+          -DWRITE_FATBIN=${WRITE_FATBIN} \
+          -DHOST_LINEINFO=${HOST_LINEINFO} \
+          -DINSTALL_TARGET="${INSTALL_TARGET}" \
+          -DCUOPT_ENABLE_GRPC=ON \
+          "${CACHE_ARGS[@]}" \
+          "${EXTRA_CMAKE_ARGS[@]}" \
+          "${REPODIR}"/cpp
+
+    # Build the server target
+    cmake --build "${LIBCUOPT_BUILD_DIR}" --target cuopt_grpc_server ${VERBOSE_FLAG} -j"${PARALLEL_LEVEL}"
+
+    # Install the server executable
+    if [ -z "${INSTALL_TARGET}" ]; then
+        echo "Skipping install of cuopt_grpc_server (-n flag set)"
+    else
+        install -m 755 "${LIBCUOPT_BUILD_DIR}/cuopt_grpc_server" "${INSTALL_PREFIX}/bin/"
+        echo "Installed cuopt_grpc_server to ${INSTALL_PREFIX}/bin/"
+    fi
+fi
+
 ################################################################################
 # Build deb package
 if hasArg deb; then
@@ -414,8 +518,20 @@ fi
 if buildAll || hasArg cuopt; then
     cd "${REPODIR}"/python/cuopt
 
-    # $EXTRA_CMAKE_ARGS gets concatenated into a string with [*] and then we find/replace spaces with semi-colons
-    SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUOPT_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUOPT_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS[*]// /;}" \
+    # Convert EXTRA_CMAKE_ARGS into a semicolon-delimited list, escaping
+    # any semicolons in values so scikit-build-core treats each -D as one arg.
+    SKBUILD_EXTRA_ARGS=()
+    for extra_arg in "${EXTRA_CMAKE_ARGS[@]}"; do
+        SKBUILD_EXTRA_ARGS+=("${extra_arg//;/\\;}")
+    done
+    SKBUILD_EXTRA_ARGS_JOINED=""
+    if [ ${#SKBUILD_EXTRA_ARGS[@]} -gt 0 ]; then
+        SKBUILD_EXTRA_ARGS_JOINED="$(IFS=';'; echo "${SKBUILD_EXTRA_ARGS[*]}")"
+    fi
+    SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUOPT_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUOPT_CMAKE_CUDA_ARCHITECTURES}"
+    if [ -n "${SKBUILD_EXTRA_ARGS_JOINED}" ]; then
+        SKBUILD_CMAKE_ARGS="${SKBUILD_CMAKE_ARGS};${SKBUILD_EXTRA_ARGS_JOINED}"
+    fi
         python "${PYTHON_ARGS_FOR_INSTALL[@]}" .
 fi
 
@@ -423,7 +539,18 @@ fi
 if buildAll || hasArg cuopt_mps_parser; then
     cd "${REPODIR}"/python/cuopt/cuopt/linear_programming
 
-    SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUOPT_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUOPT_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS[*]// /;}" \
+    SKBUILD_EXTRA_ARGS=()
+    for extra_arg in "${EXTRA_CMAKE_ARGS[@]}"; do
+        SKBUILD_EXTRA_ARGS+=("${extra_arg//;/\\;}")
+    done
+    SKBUILD_EXTRA_ARGS_JOINED=""
+    if [ ${#SKBUILD_EXTRA_ARGS[@]} -gt 0 ]; then
+        SKBUILD_EXTRA_ARGS_JOINED="$(IFS=';'; echo "${SKBUILD_EXTRA_ARGS[*]}")"
+    fi
+    SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUOPT_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUOPT_CMAKE_CUDA_ARCHITECTURES}"
+    if [ -n "${SKBUILD_EXTRA_ARGS_JOINED}" ]; then
+        SKBUILD_CMAKE_ARGS="${SKBUILD_CMAKE_ARGS};${SKBUILD_EXTRA_ARGS_JOINED}"
+    fi
         python "${PYTHON_ARGS_FOR_INSTALL[@]}" .
 fi
 
diff --git a/ci/build_wheel_libcuopt.sh b/ci/build_wheel_libcuopt.sh
index 640562ed3..1e4b9520b 100755
--- a/ci/build_wheel_libcuopt.sh
+++ b/ci/build_wheel_libcuopt.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 set -euo pipefail
@@ -17,7 +17,10 @@ fi
 # Install Boost and TBB
 bash ci/utils/install_boost_tbb.sh
 
-export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON"
+# Install Protobuf + gRPC (protoc + grpc_cpp_plugin)
+bash ci/utils/install_protobuf_grpc.sh
+
+export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON;-DCUOPT_ENABLE_GRPC=ON"
 
 # For pull requests we are enabling assert mode.
 if [ "$RAPIDS_BUILD_TYPE" = "pull-request" ]; then
diff --git a/ci/utils/install_protobuf_grpc.sh b/ci/utils/install_protobuf_grpc.sh
new file mode 100644
index 000000000..73956d58b
--- /dev/null
+++ b/ci/utils/install_protobuf_grpc.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+set -euo pipefail
+
+# Install Protobuf and gRPC C++ development libraries
+if [ -f /etc/os-release ]; then
+    . /etc/os-release
+    if [[ "$ID" == "rocky" ]]; then
+        echo "Detected Rocky Linux. Installing Protobuf + gRPC via dnf..."
+        # Enable PowerTools (Rocky 8) or CRB (Rocky 9) repository for protobuf-devel
+        if [[ "${VERSION_ID%%.*}" == "8" ]]; then
+            dnf config-manager --set-enabled powertools || dnf config-manager --set-enabled PowerTools || true
+        elif [[ "${VERSION_ID%%.*}" == "9" ]]; then
+            dnf config-manager --set-enabled crb || true
+        fi
+        # Protobuf (headers + protoc)
+        dnf install -y protobuf-devel protobuf-compiler
+
+        # gRPC C++ (headers/libs + grpc_cpp_plugin for codegen)
+        # Package names can vary by repo; try the common ones first.
+        dnf install -y grpc-devel grpc-plugins || dnf install -y grpc-devel || true
+    elif [[ "$ID" == "ubuntu" ]]; then
+        echo "Detected Ubuntu. Installing Protobuf + gRPC via apt..."
+        apt-get update
+        # Protobuf (headers + protoc)
+        apt-get install -y libprotobuf-dev protobuf-compiler
+
+        # gRPC C++ (headers/libs + grpc_cpp_plugin for codegen)
+        apt-get install -y libgrpc++-dev protobuf-compiler-grpc
+    else
+        echo "Unknown OS: $ID. Please install Protobuf + gRPC development libraries manually."
+        exit 1
+    fi
+else
+    echo "/etc/os-release not found. Cannot determine OS. Please install Protobuf + gRPC development libraries manually."
+    exit 1
+fi
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 37977ea28..80ef30e15 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -25,6 +25,7 @@ dependencies:
 - doxygen=1.9.1
 - fastapi
 - gcc_linux-aarch64=14.*
+- grpc-cpp
 - ipython
 - jsonref==1.1.0
 - libboost-devel
@@ -32,6 +33,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 0eaa7000a..98f3d7ef4 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -25,6 +25,7 @@ dependencies:
 - doxygen=1.9.1
 - fastapi
 - gcc_linux-64=14.*
+- grpc-cpp
 - ipython
 - jsonref==1.1.0
 - libboost-devel
@@ -32,6 +33,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml
index fb23f887a..187e0b3df 100644
--- a/conda/environments/all_cuda-131_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-131_arch-aarch64.yaml
@@ -25,6 +25,7 @@ dependencies:
 - doxygen=1.9.1
 - fastapi
 - gcc_linux-aarch64=14.*
+- grpc-cpp
 - ipython
 - jsonref==1.1.0
 - libboost-devel
@@ -32,6 +33,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml
index 501729acd..031efb877 100644
--- a/conda/environments/all_cuda-131_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-131_arch-x86_64.yaml
@@ -25,6 +25,7 @@ dependencies:
 - doxygen=1.9.1
 - fastapi
 - gcc_linux-64=14.*
+- grpc-cpp
 - ipython
 - jsonref==1.1.0
 - libboost-devel
@@ -32,6 +33,7 @@ dependencies:
 - libcurand-dev
 - libcusolver-dev
 - libcusparse-dev
+- libprotobuf
 - libraft-headers==26.2.*,>=0.0.0a0
 - librmm==26.2.*,>=0.0.0a0
 - make
diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml
index b4cccd714..c6462df8d 100644
--- a/conda/recipes/libcuopt/recipe.yaml
+++ b/conda/recipes/libcuopt/recipe.yaml
@@ -29,7 +29,7 @@ cache:
         export CXXFLAGS=$(echo $CXXFLAGS | sed -E 's@\-fdebug\-prefix\-map[^ ]*@@g')
         set +x
 
-        ./build.sh -n -v ${BUILD_EXTRA_FLAGS} libmps_parser libcuopt deb --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib\"
+        ./build.sh -n -v ${BUILD_EXTRA_FLAGS} libmps_parser libcuopt deb --allgpuarch --cmake-args=\"-DCMAKE_INSTALL_LIBDIR=lib -DBUILD_REMOTE_SERVER=ON -DCUOPT_ENABLE_GRPC=ON\"
       secrets:
         - AWS_ACCESS_KEY_ID
         - AWS_SECRET_ACCESS_KEY
@@ -60,13 +60,17 @@ cache:
       - cmake ${{ cmake_version }}
       - make
       - ninja
+      - grpc-cpp
+      - libprotobuf
       - tbb-devel
       - zlib
       - bzip2
     host:
       - cpp-argparse
       - cuda-version =${{ cuda_version }}
+      - grpc-cpp
       - libraft-headers =${{ minor_version }}
+      - libprotobuf
       - librmm =${{ minor_version }}
       - rapids-logger =0.2
       - cuda-nvtx-dev
@@ -146,6 +150,8 @@ outputs:
         - ${{ pin_subpackage("libmps-parser", exact=True) }}
         - libboost-devel
         - cuda-version =${{ cuda_version }}
+        - grpc-cpp
+        - libprotobuf
         - rapids-logger =0.2
         - librmm =${{ minor_version }}
         - libcublas
@@ -155,6 +161,7 @@ outputs:
         - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }}
         - ${{ pin_subpackage("libmps-parser", exact=True) }}
         - libboost-devel
+        - libprotobuf
         - librmm =${{ minor_version }}
         - cuda-nvrtc
         - libcudss
@@ -165,6 +172,7 @@ outputs:
           - libcudss
           - libcurand
           - libcusparse
+          - libprotobuf
           - librmm
           - libboost
           - libboost_iostreams
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index b6f9a12e1..c7005baa2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -235,7 +235,145 @@ include(${rapids-cmake-dir}/cpm/rapids_logger.cmake)
 rapids_cpm_rapids_logger(BUILD_EXPORT_SET cuopt-exports INSTALL_EXPORT_SET cuopt-exports)
 create_logger_macros(CUOPT "cuopt::default_logger()" include/cuopt)
 
+# Ensure downstream consumers prefer config packages (avoids Protobuf
+# module/config mixing when gRPC also depends on Protobuf).
+include(${rapids-cmake-dir}/export/detail/post_find_package_code.cmake)
+set(CUOPT_DEP_PREFER_CONFIG ON)
+rapids_export_post_find_package_code(BUILD rapids_logger
+  "set(CMAKE_FIND_PACKAGE_PREFER_CONFIG ON)"
+  EXPORT_SET cuopt-exports
+  CONDITION CUOPT_DEP_PREFER_CONFIG
+)
+rapids_export_post_find_package_code(INSTALL rapids_logger
+  "set(CMAKE_FIND_PACKAGE_PREFER_CONFIG ON)"
+  EXPORT_SET cuopt-exports
+  CONDITION CUOPT_DEP_PREFER_CONFIG
+)
+
+# Ensure ZLIB resolves to the active prefix for consumers (avoid broken system
+# ZLIB config referencing missing libz.a).
+set(CUOPT_ZLIB_HINT_CODE
+  "set(_CUOPT_PREFIX \"\${CMAKE_CURRENT_LIST_DIR}/../../..\")\n"
+  "if(EXISTS \"\${_CUOPT_PREFIX}/lib/cmake/ZLIB\")\n"
+  "  set(ZLIB_DIR \"\${_CUOPT_PREFIX}/lib/cmake/ZLIB\")\n"
+  "endif()\n"
+  "set(ZLIB_ROOT \"\${_CUOPT_PREFIX}\")\n"
+  "set(ZLIB_USE_STATIC_LIBS OFF)\n"
+  "list(APPEND CMAKE_IGNORE_PATH \"/usr/lib64/cmake/ZLIB\" \"/lib64/cmake/ZLIB\")\n"
+)
+rapids_export_post_find_package_code(BUILD rapids_logger
+  "${CUOPT_ZLIB_HINT_CODE}"
+  EXPORT_SET cuopt-exports
+  CONDITION CUOPT_DEP_PREFER_CONFIG
+)
+rapids_export_post_find_package_code(INSTALL rapids_logger
+  "${CUOPT_ZLIB_HINT_CODE}"
+  EXPORT_SET cuopt-exports
+  CONDITION CUOPT_DEP_PREFER_CONFIG
+)
+
 find_package(CUDSS REQUIRED)
+if(NOT CUDSS_INCLUDE AND DEFINED cudss_INCLUDE_DIR)
+  set(CUDSS_INCLUDE "${cudss_INCLUDE_DIR}")
+endif()
+if(NOT CUDSS_LIB_FILE AND DEFINED cudss_LIBRARY_DIR)
+  set(CUDSS_LIB_FILE "${cudss_LIBRARY_DIR}/libcudss.so.0")
+endif()
+if(NOT CUDSS_MT_LIB_FILE AND DEFINED cudss_LIBRARY_DIR)
+  set(CUDSS_MT_LIB_FILE "${cudss_LIBRARY_DIR}/libcudss_mtlayer_gomp.so.0")
+endif()
+if(TARGET cudss AND NOT CUDSS_LIBRARIES)
+  set(CUDSS_LIBRARIES cudss)
+endif()
+
+# Protocol Buffers for remote solve serialization
+#
+# Use plain find_package so system installs (that don't ship ProtobufConfig.cmake)
+# can fall back to CMake's built-in FindProtobuf module.
+find_package(Protobuf REQUIRED)
+
+# Normalize target names across Protobuf providers (config vs module).
+if(TARGET Protobuf::libprotobuf AND NOT TARGET protobuf::libprotobuf)
+  add_library(protobuf::libprotobuf ALIAS Protobuf::libprotobuf)
+endif()
+if(TARGET Protobuf::protoc AND NOT TARGET protobuf::protoc)
+  add_executable(protobuf::protoc ALIAS Protobuf::protoc)
+endif()
+include_directories(${Protobuf_INCLUDE_DIRS})
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+# gRPC for remote solve service.
+#
+# We treat gRPC as required, but allow it to come from either:
+# - an installed CMake package (gRPCConfig.cmake), or
+# - an in-tree build (e.g. python/libcuopt uses FetchContent(grpc), which defines gRPC::grpc++).
+if(NOT TARGET gRPC::grpc++)
+  find_package(gRPC CONFIG REQUIRED)
+endif()
+set(CUOPT_ENABLE_GRPC ON)
+add_compile_definitions(CUOPT_ENABLE_GRPC)
+message(STATUS "gRPC enabled (target gRPC::grpc++ is available)")
+
+# Generate C++ code from .proto file
+set(PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities/cuopt_remote.proto")
+set(PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.cc")
+set(PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote.pb.h")
+
+# Find protoc compiler (provided by config package or target)
+if(TARGET protobuf::protoc)
+  get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION_RELEASE)
+  if(NOT _PROTOBUF_PROTOC)
+    get_target_property(_PROTOBUF_PROTOC protobuf::protoc IMPORTED_LOCATION)
+  endif()
+else()
+  set(_PROTOBUF_PROTOC ${Protobuf_PROTOC_EXECUTABLE})
+endif()
+
+if(NOT _PROTOBUF_PROTOC)
+  message(FATAL_ERROR "protoc not found (Protobuf_PROTOC_EXECUTABLE is empty)")
+endif()
+
+# Generate gRPC service code if gRPC is available
+if(CUOPT_ENABLE_GRPC)
+  if(TARGET grpc_cpp_plugin)
+    set(_GRPC_CPP_PLUGIN_EXECUTABLE "$<TARGET_FILE:grpc_cpp_plugin>")
+  else()
+    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
+    if(NOT _GRPC_CPP_PLUGIN_EXECUTABLE)
+      message(FATAL_ERROR "grpc_cpp_plugin not found")
+    endif()
+  endif()
+
+  set(GRPC_PROTO_FILE "${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities/cuopt_remote_service.proto")
+  set(GRPC_PROTO_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.cc")
+  set(GRPC_PROTO_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.pb.h")
+  set(GRPC_SERVICE_SRCS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.cc")
+  set(GRPC_SERVICE_HDRS "${CMAKE_CURRENT_BINARY_DIR}/cuopt_remote_service.grpc.pb.h")
+
+  add_custom_command(
+    OUTPUT "${GRPC_PROTO_SRCS}" "${GRPC_PROTO_HDRS}" "${GRPC_SERVICE_SRCS}" "${GRPC_SERVICE_HDRS}"
+    COMMAND ${_PROTOBUF_PROTOC}
+    ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
+         --grpc_out ${CMAKE_CURRENT_BINARY_DIR}
+         --plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE}
+         --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities
+         ${GRPC_PROTO_FILE}
+    DEPENDS ${GRPC_PROTO_FILE} ${PROTO_FILE}
+    COMMENT "Generating gRPC C++ code from cuopt_remote_service.proto"
+    VERBATIM
+  )
+endif()
+
+add_custom_command(
+  OUTPUT "${PROTO_SRCS}" "${PROTO_HDRS}"
+  COMMAND ${_PROTOBUF_PROTOC}
+  ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR}
+       --proto_path ${CMAKE_CURRENT_SOURCE_DIR}/src/linear_programming/utilities
+       ${PROTO_FILE}
+  DEPENDS ${PROTO_FILE}
+  COMMENT "Generating C++ code from cuopt_remote.proto"
+  VERBATIM
+)
 
 if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
@@ -246,9 +384,19 @@ add_subdirectory(src)
 if (HOST_LINEINFO)
   set_source_files_properties(${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1")
 endif()
-add_library(cuopt SHARED
-  ${CUOPT_SRC_FILES}
-)
+if(CUOPT_ENABLE_GRPC)
+  add_library(cuopt SHARED
+    ${CUOPT_SRC_FILES}
+    ${PROTO_SRCS}
+    ${GRPC_PROTO_SRCS}
+    ${GRPC_SERVICE_SRCS}
+  )
+else()
+  add_library(cuopt SHARED
+    ${CUOPT_SRC_FILES}
+    ${PROTO_SRCS}
+  )
+endif()
 
 set_target_properties(cuopt
   PROPERTIES BUILD_RPATH "\$ORIGIN"
@@ -354,12 +502,18 @@ target_link_libraries(cuopt
   rapids_logger::rapids_logger
   CCCL::CCCL
   raft::raft
-  cuopt::mps_parser
   ${CUDSS_LIB_FILE}
   PRIVATE
+  protobuf::libprotobuf
   ${CUOPT_PRIVATE_CUDA_LIBS}
+  cuopt::mps_parser_static  # Static link - symbols embedded in libcuopt.so
   )
 
+# Link gRPC if available
+if(CUOPT_ENABLE_GRPC)
+  target_link_libraries(cuopt PRIVATE gRPC::grpc++)
+endif()
+
 
 # ##################################################################################################
 # - generate tests --------------------------------------------------------------------------------
@@ -564,6 +718,42 @@ if(BUILD_LP_BENCHMARKS)
   endif()
 endif()
 
+# cuopt_grpc_server - gRPC-based remote server (prototype)
+if(CUOPT_ENABLE_GRPC)
+  add_executable(cuopt_grpc_server cuopt_grpc_server.cpp)
+  target_compile_options(cuopt_grpc_server
+    PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${CUOPT_CXX_FLAGS}>"
+  )
+  target_include_directories(cuopt_grpc_server
+    PRIVATE
+    "${CMAKE_CURRENT_SOURCE_DIR}/src"
+    "${CMAKE_CURRENT_SOURCE_DIR}/include"
+    "${CMAKE_CURRENT_SOURCE_DIR}/libmps_parser/include"
+    "${CMAKE_CURRENT_BINARY_DIR}"
+    PUBLIC
+    "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+    "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/include>"
+  )
+  target_link_libraries(cuopt_grpc_server
+    PUBLIC
+    cuopt
+    OpenMP::OpenMP_CXX
+    PRIVATE
+    protobuf::libprotobuf
+    gRPC::grpc++
+  )
+  target_link_options(cuopt_grpc_server PRIVATE -Wl,--enable-new-dtags)
+
+  install(TARGETS cuopt_grpc_server
+    COMPONENT runtime
+    RUNTIME DESTINATION ${_BIN_DEST}
+  )
+  message(STATUS "Building cuopt_grpc_server (gRPC-based remote solve prototype)")
+endif()
+
+# ##################################################################################################
+# - Pluggable Serializers --------------------------------------------------------------------------
+# No serializer plugins are built.
 
 # ##################################################################################################
 # - CPack has to be the last item in the cmake file-------------------------------------------------
diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp
index 5023cefc6..81c93f40b 100644
--- a/cpp/cuopt_cli.cpp
+++ b/cpp/cuopt_cli.cpp
@@ -1,23 +1,32 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+#if CUOPT_ENABLE_GRPC
+#include <linear_programming/utilities/remote_solve_grpc.hpp>
+#endif
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
 
+// CUDA headers - only included for local solve path
 #include <raft/core/device_setter.hpp>
 #include <raft/core/handle.hpp>
-
 #include <rmm/mr/cuda_async_memory_resource.hpp>
 
 #include <unistd.h>
 #include <argparse/argparse.hpp>
+#include <atomic>
+#include <csignal>
+#include <cstring>
+#include <iomanip>
 #include <iostream>
 #include <stdexcept>
 #include <string>
@@ -29,6 +38,41 @@
 
 static char cuda_module_loading_env[] = "CUDA_MODULE_LOADING=EAGER";
 
+namespace {
+std::atomic<bool> handling_crash_signal{false};
+
+void write_stderr(const char* msg)
+{
+  if (!msg) { return; }
+  ::write(STDERR_FILENO, msg, std::strlen(msg));
+}
+
+void crash_signal_handler(int signum)
+{
+  if (handling_crash_signal.exchange(true)) { _Exit(128 + signum); }
+  write_stderr(
+    "cuopt_cli: received fatal signal; gRPC stream may have been closed due to message size "
+    "mismatch (check --max-message-mb / CUOPT_GRPC_MAX_MESSAGE_MB)\n");
+  std::signal(signum, SIG_DFL);
+  raise(signum);
+}
+
+void terminate_handler()
+{
+  std::cerr << "cuopt_cli: terminating due to unhandled exception; gRPC stream may have been "
+               "closed due to message size mismatch (check --max-message-mb / "
+               "CUOPT_GRPC_MAX_MESSAGE_MB)"
+            << std::endl;
+  std::abort();
+}
+
+void install_crash_handlers()
+{
+  std::set_terminate(terminate_handler);
+  std::signal(SIGABRT, crash_signal_handler);
+}
+}  // namespace
+
 /**
  * @file cuopt_cli.cpp
  * @brief Command line interface for solving Linear Programming (LP) and Mixed Integer Programming
@@ -66,6 +110,108 @@ static char cuda_module_loading_env[] = "CUDA_MODULE_LOADING=EAGER";
  */
 inline auto make_async() { return std::make_shared<rmm::mr::cuda_async_memory_resource>(); }
 
+/**
+ * @brief Create a data_model_view_t from mps_data_model_t
+ *
+ * This creates a non-owning view with spans pointing to the CPU data in the mps_data_model.
+ * Used for remote solve where data stays in CPU memory.
+ *
+ * @param mps_data_model The owning mps_data_model_t
+ * @return data_model_view_t with spans pointing to the mps_data_model's vectors
+ */
+template <typename i_t, typename f_t>
+cuopt::linear_programming::data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  cuopt::linear_programming::data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  if (mps_data_model.has_quadratic_objective()) {
+    view.set_quadratic_objective_matrix(mps_data_model.get_quadratic_objective_values().data(),
+                                        mps_data_model.get_quadratic_objective_values().size(),
+                                        mps_data_model.get_quadratic_objective_indices().data(),
+                                        mps_data_model.get_quadratic_objective_indices().size(),
+                                        mps_data_model.get_quadratic_objective_offsets().data(),
+                                        mps_data_model.get_quadratic_objective_offsets().size());
+  }
+
+  return view;
+}
+
 /**
  * @brief Handle logger when error happens before logger is initialized
  * @param settings Solver settings
@@ -83,13 +229,18 @@ inline cuopt::init_logger_t dummy_logger(
  * @param file_path Path to the MPS format input file containing the optimization problem
  * @param initial_solution_file Path to initial solution file in SOL format
  * @param settings_strings Map of solver parameters
+ * @param is_remote_solve Whether remote solve is enabled (skips CUDA handle creation)
  */
 int run_single_file(const std::string& file_path,
                     const std::string& initial_solution_file,
                     bool solve_relaxation,
-                    const std::map<std::string, std::string>& settings_strings)
+                    const std::map<std::string, std::string>& settings_strings,
+                    bool is_remote_solve)
 {
-  const raft::handle_t handle_{};
+  // Only create raft handle for local solve - it triggers CUDA initialization
+  std::unique_ptr<raft::handle_t> handle_ptr;
+  if (!is_remote_solve) { handle_ptr = std::make_unique<raft::handle_t>(); }
+
   cuopt::linear_programming::solver_settings_t<int, double> settings;
 
   try {
@@ -122,13 +273,15 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
-  auto op_problem =
-    cuopt::linear_programming::mps_data_model_to_optimization_problem(&handle_, mps_data_model);
-
-  const bool is_mip =
-    (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP ||
-     op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP) &&
-    !solve_relaxation;
+  // Determine if this is a MIP problem by checking variable types
+  bool has_integers = false;
+  for (const auto& vt : mps_data_model.get_variable_types()) {
+    if (vt == 'I' || vt == 'B') {
+      has_integers = true;
+      break;
+    }
+  }
+  const bool is_mip = has_integers && !solve_relaxation;
 
   try {
     auto initial_solution =
@@ -154,13 +307,29 @@ int run_single_file(const std::string& file_path,
     return -1;
   }
 
+  // Create a non-owning view from the mps_data_model
+  // solve_lp/solve_mip will handle remote vs local solve based on env vars
+  auto view = create_view_from_mps_data_model(mps_data_model);
+
   try {
+    // Pass handle_ptr.get() - can be nullptr for remote solve
     if (is_mip) {
       auto& mip_settings = settings.get_mip_settings();
-      auto solution      = cuopt::linear_programming::solve_mip(op_problem, mip_settings);
+      auto solution = cuopt::linear_programming::solve_mip(handle_ptr.get(), view, mip_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        auto log = dummy_logger(settings);
+        CUOPT_LOG_ERROR("MIP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
     } else {
       auto& lp_settings = settings.get_pdlp_settings();
-      auto solution     = cuopt::linear_programming::solve_lp(op_problem, lp_settings);
+      auto solution     = cuopt::linear_programming::solve_lp(handle_ptr.get(), view, lp_settings);
+      if (solution.get_error_status().get_error_type() != cuopt::error_type_t::Success) {
+        auto log = dummy_logger(settings);
+        CUOPT_LOG_ERROR("LP solve failed: %s", solution.get_error_status().what());
+        return -1;
+      }
+      // Note: Solution output is now handled by solve_lp/solve_lp_remote via CUOPT_LOG_INFO
     }
   } catch (const std::exception& e) {
     CUOPT_LOG_ERROR("Error: %s", e.what());
@@ -238,6 +407,7 @@ int set_cuda_module_loading(int argc, char* argv[])
  */
 int main(int argc, char* argv[])
 {
+  install_crash_handlers();
   if (set_cuda_module_loading(argc, argv) != 0) { return 1; }
 
   // Get the version string from the version_config.hpp file
@@ -249,7 +419,7 @@ int main(int argc, char* argv[])
   argparse::ArgumentParser program("cuopt_cli", version_string);
 
   // Define all arguments with appropriate defaults and help messages
-  program.add_argument("filename").help("input mps file").nargs(1).required();
+  program.add_argument("filename").help("input mps file").nargs(argparse::nargs_pattern::optional);
 
   // FIXME: use a standard format for initial solution file
   program.add_argument("--initial-solution")
@@ -261,6 +431,11 @@ int main(int argc, char* argv[])
     .default_value(false)
     .implicit_value(true);
 
+  program.add_argument("--print-grpc-max")
+    .help("print gRPC max message sizes (client default and server if configured)")
+    .default_value(false)
+    .implicit_value(true);
+
   program.add_argument("--presolve")
     .help("enable/disable presolve (default: true for MIP problems, false for LP problems)")
     .default_value(true)
@@ -328,25 +503,87 @@ int main(int argc, char* argv[])
       settings_strings[param_name] = program.get<std::string>(arg_name.c_str());
     }
   }
-  // Get the values
-  std::string file_name = program.get<std::string>("filename");
-
   const auto initial_solution_file = program.get<std::string>("--initial-solution");
   const auto solve_relaxation      = program.get<bool>("--relaxation");
+  const auto print_grpc_max        = program.get<bool>("--print-grpc-max");
+
+  if (print_grpc_max) {
+#if CUOPT_ENABLE_GRPC
+    constexpr int64_t kMiB             = 1024LL * 1024;
+    const int64_t client_default_bytes = 256LL * kMiB;
+    int64_t client_effective_bytes     = client_default_bytes;
+    if (const char* env_mb = std::getenv("CUOPT_GRPC_MAX_MESSAGE_MB")) {
+      try {
+        int64_t mb = std::stoll(env_mb);
+        if (mb <= 0) {
+          client_effective_bytes = -1;
+        } else {
+          client_effective_bytes = mb * kMiB;
+        }
+      } catch (...) {
+      }
+    }
+    std::cout << "Client default max message MiB: " << (client_default_bytes / kMiB) << "\n";
+    if (client_effective_bytes < 0) {
+      std::cout << "Client effective max message MiB: unlimited\n";
+    } else {
+      std::cout << "Client effective max message MiB: " << (client_effective_bytes / kMiB) << "\n";
+    }
 
-  // All arguments are parsed as string, default values are parsed as int if unused.
-  const auto num_gpus = program.is_used("--num-gpus")
-                          ? std::stoi(program.get<std::string>("--num-gpus"))
-                          : program.get<int>("--num-gpus");
+    const char* host = std::getenv("CUOPT_REMOTE_HOST");
+    const char* port = std::getenv("CUOPT_REMOTE_PORT");
+
+    if (host && port) {
+      std::string status;
+      std::string error_message;
+      int64_t result_size_bytes = 0;
+      int64_t max_message_bytes = 0;
+      const std::string address = std::string(host) + ":" + port;
+      cuopt::linear_programming::grpc_remote::check_status(address,
+                                                           "__cuopt_max_message_probe__",
+                                                           status,
+                                                           error_message,
+                                                           &result_size_bytes,
+                                                           &max_message_bytes);
+      std::cout << "Server max message MiB: " << (max_message_bytes / (1024 * 1024)) << "\n";
+    } else {
+      std::cout << "Server max message MiB: (unavailable; set CUOPT_REMOTE_HOST/PORT)\n";
+    }
+#else
+    std::cout << "gRPC support is disabled in this build.\n";
+#endif
+    return 0;
+  }
+
+  if (!program.is_used("filename")) {
+    std::cerr << "filename: 1 argument(s) expected. 0 provided." << std::endl;
+    std::cerr << program;
+    return 1;
+  }
+
+  // Get the values
+  std::string file_name = program.get<std::string>("filename");
+
+  // Check for remote solve BEFORE any CUDA initialization
+  const bool is_remote_solve = cuopt::linear_programming::is_remote_solve_enabled();
 
   std::vector<std::shared_ptr<rmm::mr::device_memory_resource>> memory_resources;
 
-  for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
-    cudaSetDevice(i);
-    memory_resources.push_back(make_async());
-    rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+  if (!is_remote_solve) {
+    // Only initialize CUDA resources for local solve
+    // All arguments are parsed as string, default values are parsed as int if unused.
+    const auto num_gpus = program.is_used("--num-gpus")
+                            ? std::stoi(program.get<std::string>("--num-gpus"))
+                            : program.get<int>("--num-gpus");
+
+    for (int i = 0; i < std::min(raft::device_setter::get_device_count(), num_gpus); ++i) {
+      cudaSetDevice(i);
+      memory_resources.push_back(make_async());
+      rmm::mr::set_per_device_resource(rmm::cuda_device_id{i}, memory_resources.back().get());
+    }
+    cudaSetDevice(0);
   }
-  cudaSetDevice(0);
 
-  return run_single_file(file_name, initial_solution_file, solve_relaxation, settings_strings);
+  return run_single_file(
+    file_name, initial_solution_file, solve_relaxation, settings_strings, is_remote_solve);
 }
diff --git a/cpp/cuopt_grpc_server.cpp b/cpp/cuopt_grpc_server.cpp
new file mode 100644
index 000000000..cb13d8b57
--- /dev/null
+++ b/cpp/cuopt_grpc_server.cpp
@@ -0,0 +1,3025 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+/**
+ * @file cuopt_grpc_server.cpp
+ * @brief gRPC-based remote solve server with full worker process infrastructure
+ *
+ * This server uses gRPC for client communication but preserves the
+ * existing worker process infrastructure:
+ * - Worker processes with shared memory job queues
+ * - Pipe-based IPC for problem/result data
+ * - Result tracking and retrieval threads
+ * - Log streaming
+ *
+ * Only the client-facing network layer is different (gRPC vs TCP).
+ */
+
+#ifdef CUOPT_ENABLE_GRPC
+
+#include <grpcpp/grpcpp.h>
+#include "cuopt_remote.pb.h"
+#include "cuopt_remote_service.grpc.pb.h"
+
+#include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/internals.hpp>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <mps_parser/mps_data_model.hpp>
+
+#include <cuda_runtime.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <cstdio>
+
+#include <algorithm>
+#include <atomic>
+#include <cerrno>
+#include <chrono>
+#include <condition_variable>
+#include <csignal>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <memory>
+#include <mutex>
+#include <random>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <vector>
+
+using grpc::Server;
+using grpc::ServerBuilder;
+using grpc::ServerContext;
+using grpc::ServerReaderWriter;
+using grpc::ServerWriter;
+using grpc::Status;
+using grpc::StatusCode;
+
+using namespace cuopt::linear_programming;
+// Note: NOT using "using namespace cuopt::remote" to avoid JobStatus enum conflict
+
+// ============================================================================
+// Shared Memory Structures (must match between main process and workers)
+// ============================================================================
+
+constexpr size_t MAX_JOBS    = 100;
+constexpr size_t MAX_RESULTS = 100;
+
+template <size_t N>
+void copy_cstr(char (&dst)[N], const std::string& src)
+{
+  std::snprintf(dst, N, "%s", src.c_str());
+}
+
+template <size_t N>
+void copy_cstr(char (&dst)[N], const char* src)
+{
+  std::snprintf(dst, N, "%s", src ? src : "");
+}
+
+// Job queue entry - small fixed size, data stored in separate per-job shared memory or sent via
+// pipe
+struct JobQueueEntry {
+  char job_id[64];
+  uint32_t problem_type;          // 0 = LP, 1 = MIP
+  uint64_t data_size;             // Size of problem data (uint64 for large problems)
+  char shm_data_name[128];        // Name of per-job shared memory segment (shm mode only)
+  std::atomic<bool> ready;        // Job is ready to be processed
+  std::atomic<bool> claimed;      // Worker has claimed this job
+  std::atomic<pid_t> worker_pid;  // PID of worker that claimed this job (0 if none)
+  std::atomic<bool> cancelled;    // Job has been cancelled (worker should skip)
+  // Pipe mode fields
+  std::atomic<int> worker_index;  // Index of worker that claimed this job (-1 if none)
+  std::atomic<bool> data_sent;    // Server has sent data to worker's pipe (pipe mode)
+};
+
+// Result queue entry - small fixed size, data stored in separate per-result shared memory or pipe
+struct ResultQueueEntry {
+  char job_id[64];
+  uint32_t status;          // 0 = success, 1 = error, 2 = cancelled
+  uint64_t data_size;       // Size of result data (uint64 for large results)
+  char shm_data_name[128];  // Name of per-result shared memory segment (shm mode only)
+  char error_message[1024];
+  std::atomic<bool> ready;        // Result is ready
+  std::atomic<bool> retrieved;    // Result has been retrieved
+  std::atomic<int> worker_index;  // Index of worker that produced this result (pipe mode)
+};
+
+// Shared memory control block
+struct SharedMemoryControl {
+  std::atomic<bool> shutdown_requested;
+  std::atomic<int> active_workers;
+};
+
+// ============================================================================
+// Job status tracking (main process only)
+// ============================================================================
+
+enum class JobStatus { QUEUED, PROCESSING, COMPLETED, FAILED, NOT_FOUND, CANCELLED };
+
+struct IncumbentEntry {
+  double objective = 0.0;
+  std::vector<double> assignment;
+};
+
+struct JobInfo {
+  std::string job_id;
+  JobStatus status;
+  std::chrono::steady_clock::time_point submit_time;
+  std::vector<uint8_t> result_data;
+  std::vector<IncumbentEntry> incumbents;
+  bool is_mip;
+  std::string error_message;
+  bool is_blocking;
+};
+
+struct JobWaiter {
+  std::mutex mutex;
+  std::condition_variable cv;
+  std::vector<uint8_t> result_data;
+  std::string error_message;
+  bool success;
+  bool ready;
+  std::atomic<int> waiters{0};
+  JobWaiter() : success(false), ready(false) {}
+};
+
+// ============================================================================
+// Global state
+// ============================================================================
+
+std::atomic<bool> keep_running{true};
+std::map<std::string, JobInfo> job_tracker;
+std::mutex tracker_mutex;
+std::condition_variable result_cv;
+
+std::map<std::string, std::shared_ptr<JobWaiter>> waiting_threads;
+std::mutex waiters_mutex;
+
+JobQueueEntry* job_queue       = nullptr;
+ResultQueueEntry* result_queue = nullptr;
+SharedMemoryControl* shm_ctrl  = nullptr;
+
+std::vector<pid_t> worker_pids;
+
+struct ServerConfig {
+  int port            = 8765;
+  int num_workers     = 1;
+  bool verbose        = true;
+  bool use_pipes      = true;
+  bool log_to_console = false;
+  // gRPC max message size in MiB. 0 => unlimited (gRPC uses -1 internally).
+  int max_message_mb  = 256;
+  bool enable_tls     = false;
+  bool require_client = false;
+  std::string tls_cert_path;
+  std::string tls_key_path;
+  std::string tls_root_path;
+};
+
+ServerConfig config;
+
+struct WorkerPipes {
+  int to_worker_fd;
+  int from_worker_fd;
+  int worker_read_fd;
+  int worker_write_fd;
+  int incumbent_from_worker_fd;
+  int worker_incumbent_write_fd;
+};
+
+std::vector<WorkerPipes> worker_pipes;
+
+std::mutex pending_data_mutex;
+std::map<std::string, std::vector<uint8_t>> pending_job_data;
+
+// Large payloads uploaded via gRPC streaming are spooled to disk to avoid
+// holding multi-GB request buffers in the server process.
+struct PendingJobFile {
+  std::string path;
+  uint64_t size_bytes{};
+};
+std::mutex pending_files_mutex;
+std::map<std::string, PendingJobFile> pending_job_files;
+
+const char* SHM_JOB_QUEUE    = "/cuopt_job_queue";
+const char* SHM_RESULT_QUEUE = "/cuopt_result_queue";
+const char* SHM_CONTROL      = "/cuopt_control";
+
+const std::string LOG_DIR = "/tmp/cuopt_logs";
+inline std::string get_log_file_path(const std::string& job_id)
+{
+  return LOG_DIR + "/job_" + job_id + ".log";
+}
+
+const std::string UPLOAD_DIR = "/tmp/cuopt_uploads";
+inline std::string get_upload_file_path(const std::string& upload_id)
+{
+  return UPLOAD_DIR + "/upload_" + upload_id + ".bin";
+}
+
+// ============================================================================
+// Signal handling
+// ============================================================================
+
+void signal_handler(int signal)
+{
+  if (signal == SIGINT || signal == SIGTERM) {
+    std::cout << "\n[gRPC Server] Received shutdown signal\n";
+    keep_running = false;
+    if (shm_ctrl) { shm_ctrl->shutdown_requested = true; }
+    result_cv.notify_all();
+  }
+}
+
+// ============================================================================
+// Forward declarations
+// ============================================================================
+
+std::string generate_job_id();
+void ensure_log_dir_exists();
+void delete_log_file(const std::string& job_id);
+void ensure_upload_dir_exists();
+void delete_upload_file(const std::string& upload_id);
+void cleanup_shared_memory();
+void spawn_workers();
+void wait_for_workers();
+void worker_monitor_thread();
+void result_retrieval_thread();
+void incumbent_retrieval_thread();
+
+// Pipe and shared memory functions
+static bool write_to_pipe(int fd, const void* data, size_t size);
+static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms = 120000);
+static bool send_job_data_pipe(int worker_idx, const std::vector<uint8_t>& data);
+static bool send_job_data_pipe_file(int worker_idx,
+                                    const std::string& path,
+                                    uint64_t expected_size);
+static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector<uint8_t>& data);
+static bool send_result_pipe(int fd, const std::vector<uint8_t>& data);
+static bool send_incumbent_pipe(int fd, const std::vector<uint8_t>& data);
+static bool recv_incumbent_pipe(int fd, std::vector<uint8_t>& data);
+static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector<uint8_t>& data);
+static std::string create_job_shm(const std::string& job_id,
+                                  const std::vector<uint8_t>& data,
+                                  const char* suffix);
+static bool read_job_shm(const char* shm_name, size_t data_size, std::vector<uint8_t>& data);
+static std::string write_result_shm(const std::string& job_id, const std::vector<uint8_t>& data);
+static void cleanup_job_shm(const char* shm_name);
+
+constexpr int64_t kMiB = 1024LL * 1024;
+constexpr int64_t kGiB = 1024LL * 1024 * 1024;
+
+class IncumbentPipeCallback : public cuopt::internals::get_solution_callback_t {
+ public:
+  IncumbentPipeCallback(std::string job_id, int fd) : job_id_(std::move(job_id)), fd_(fd) {}
+
+  void get_solution(void* data, void* objective_value) override
+  {
+    if (fd_ < 0 || n_variables == 0) { return; }
+
+    double objective = 0.0;
+    std::vector<double> assignment;
+    assignment.resize(n_variables);
+
+    if (isFloat) {
+      std::vector<float> tmp(n_variables);
+      if (cudaMemcpy(tmp.data(), data, n_variables * sizeof(float), cudaMemcpyDeviceToHost) !=
+          cudaSuccess) {
+        return;
+      }
+      for (size_t i = 0; i < n_variables; ++i) {
+        assignment[i] = static_cast<double>(tmp[i]);
+      }
+      float obj = 0.0f;
+      if (cudaMemcpy(&obj, objective_value, sizeof(float), cudaMemcpyDeviceToHost) != cudaSuccess) {
+        return;
+      }
+      objective = static_cast<double>(obj);
+    } else {
+      if (cudaMemcpy(
+            assignment.data(), data, n_variables * sizeof(double), cudaMemcpyDeviceToHost) !=
+          cudaSuccess) {
+        return;
+      }
+      double obj = 0.0;
+      if (cudaMemcpy(&obj, objective_value, sizeof(double), cudaMemcpyDeviceToHost) !=
+          cudaSuccess) {
+        return;
+      }
+      objective = obj;
+    }
+
+    cuopt::remote::Incumbent msg;
+    msg.set_job_id(job_id_);
+    msg.set_objective(objective);
+    for (double v : assignment) {
+      msg.add_assignment(v);
+    }
+
+    std::vector<uint8_t> buffer(msg.ByteSizeLong());
+    if (!msg.SerializeToArray(buffer.data(), buffer.size())) { return; }
+    std::cout << "[Worker] Incumbent callback job_id=" << job_id_ << " obj=" << objective
+              << " vars=" << assignment.size() << "\n";
+    std::cout.flush();
+    send_incumbent_pipe(fd_, buffer);
+  }
+
+ private:
+  std::string job_id_;
+  int fd_;
+};
+
+static void store_simple_result(const std::string& job_id,
+                                int worker_id,
+                                int status,
+                                const char* error_message)
+{
+  for (size_t i = 0; i < MAX_RESULTS; ++i) {
+    if (!result_queue[i].ready) {
+      copy_cstr(result_queue[i].job_id, job_id);
+      result_queue[i].status           = status;
+      result_queue[i].data_size        = 0;
+      result_queue[i].shm_data_name[0] = '\0';
+      result_queue[i].worker_index     = worker_id;
+      copy_cstr(result_queue[i].error_message, error_message);
+      result_queue[i].error_message[sizeof(result_queue[i].error_message) - 1] = '\0';
+      result_queue[i].retrieved                                                = false;
+      result_queue[i].ready                                                    = true;
+      break;
+    }
+  }
+}
+
+// ============================================================================
+// Worker Infrastructure (shared with the remote solve server implementation)
+// ============================================================================
+void cleanup_shared_memory()
+{
+  if (job_queue) {
+    munmap(job_queue, sizeof(JobQueueEntry) * MAX_JOBS);
+    shm_unlink(SHM_JOB_QUEUE);
+  }
+  if (result_queue) {
+    munmap(result_queue, sizeof(ResultQueueEntry) * MAX_RESULTS);
+    shm_unlink(SHM_RESULT_QUEUE);
+  }
+  if (shm_ctrl) {
+    munmap(shm_ctrl, sizeof(SharedMemoryControl));
+    shm_unlink(SHM_CONTROL);
+  }
+}
+
+void worker_process(int worker_id)
+{
+  std::cout << "[Worker " << worker_id << "] Started (PID: " << getpid() << ")\n";
+
+  // Increment active worker count
+  shm_ctrl->active_workers++;
+
+  // NOTE: We create raft::handle_t AFTER stdout redirect (per-job) so that
+  // CUDA logging uses the redirected output streams.
+
+  // Get serializer
+  auto serializer = get_serializer<int, double>();
+
+  while (!shm_ctrl->shutdown_requested) {
+    // Find a job to process
+    int job_slot = -1;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready && !job_queue[i].claimed) {
+        // Try to claim this job atomically
+        bool expected = false;
+        if (job_queue[i].claimed.compare_exchange_strong(expected, true)) {
+          job_queue[i].worker_pid   = getpid();   // Record our PID
+          job_queue[i].worker_index = worker_id;  // Record worker index for pipe mode
+          job_slot                  = i;
+          break;
+        }
+      }
+    }
+
+    if (job_slot < 0) {
+      // No job available, sleep briefly
+      usleep(10000);  // 10ms
+      continue;
+    }
+
+    // Process the job
+    JobQueueEntry& job = job_queue[job_slot];
+    std::string job_id(job.job_id);
+    bool is_mip = (job.problem_type == 1);
+
+    // Check if job was cancelled before we start processing
+    if (job.cancelled) {
+      std::cout << "[Worker " << worker_id << "] Job cancelled before processing: " << job_id
+                << "\n";
+      std::cout.flush();
+
+      // Cleanup job input shm (shm mode only)
+      if (!config.use_pipes) { cleanup_job_shm(job.shm_data_name); }
+
+      // Store cancelled result in result queue
+      store_simple_result(job_id, worker_id, 2, "Job was cancelled");
+
+      // Clear job slot (don't exit/restart worker)
+      job.worker_pid   = 0;
+      job.worker_index = -1;
+      job.data_sent    = false;
+      job.ready        = false;
+      job.claimed      = false;
+      job.cancelled    = false;
+      continue;  // Go back to waiting for next job
+    }
+
+    std::cout << "[Worker " << worker_id << "] Processing job: " << job_id
+              << " (type: " << (is_mip ? "MIP" : "LP") << ")\n";
+    std::cout.flush();
+
+    std::string log_file = get_log_file_path(job_id);
+
+    // Create RAFT handle before calling solver
+    std::cout << "[Worker] Creating raft::handle_t...\n" << std::flush;
+
+    raft::handle_t handle;
+
+    std::cout << "[Worker] Handle created, starting solve...\n" << std::flush;
+
+    // Read problem data (pipe mode or shm mode)
+    std::vector<uint8_t> request_data;
+    bool read_success = false;
+    if (config.use_pipes) {
+      // Pipe mode: read from pipe (blocks until server writes data)
+      // No need to wait for data_sent flag - pipe read naturally blocks
+      int read_fd  = worker_pipes[worker_id].worker_read_fd;
+      read_success = recv_job_data_pipe(read_fd, job.data_size, request_data);
+      if (!read_success) {
+        std::cerr << "[Worker " << worker_id << "] Failed to read job data from pipe\n";
+      }
+    } else {
+      // SHM mode: read from shared memory
+      read_success = read_job_shm(job.shm_data_name, job.data_size, request_data);
+      if (!read_success) {
+        std::cerr << "[Worker " << worker_id << "] Failed to read job data from shm\n";
+      }
+      // Cleanup job input shm now that we've read it
+      cleanup_job_shm(job.shm_data_name);
+    }
+
+    if (!read_success) {
+      // Store error result
+      store_simple_result(job_id, worker_id, 1, "Failed to read job data");
+      // Clear job slot
+      job.worker_pid   = 0;
+      job.worker_index = -1;
+      job.data_sent    = false;
+      job.ready        = false;
+      job.claimed      = false;
+      continue;
+    }
+
+    std::vector<uint8_t> result_data;
+    std::string error_message;
+    bool success = false;
+
+    try {
+      cuopt::mps_parser::mps_data_model_t<int, double> mps_data;
+      if (is_mip) {
+        mip_solver_settings_t<int, double> settings;
+        std::unique_ptr<IncumbentPipeCallback> incumbent_cb;
+        settings.log_file       = log_file;
+        settings.log_to_console = config.log_to_console;
+
+        if (serializer->deserialize_mip_request(request_data, mps_data, settings)) {
+          bool enable_incumbents = true;
+          cuopt::remote::SolveMIPRequest mip_request;
+          if (mip_request.ParseFromArray(request_data.data(), request_data.size()) &&
+              mip_request.has_enable_incumbents()) {
+            enable_incumbents = mip_request.enable_incumbents();
+          }
+          if (enable_incumbents) {
+            incumbent_cb = std::make_unique<IncumbentPipeCallback>(
+              job_id, worker_pipes[worker_id].worker_incumbent_write_fd);
+            settings.set_mip_callback(incumbent_cb.get());
+            std::cout << "[Worker] Registered incumbent callback for job_id=" << job_id
+                      << " callbacks=" << settings.get_mip_callbacks().size() << "\n";
+            std::cout.flush();
+          } else {
+            std::cout << "[Worker] Skipping incumbent callback for job_id=" << job_id << "\n";
+            std::cout.flush();
+          }
+          std::cout << "[Worker] Calling solve_mip...\n" << std::flush;
+          auto solution = solve_mip(&handle, mps_data, settings);
+          std::cout << "[Worker] solve_mip done\n" << std::flush;
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_mip_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize MIP request";
+        }
+      } else {
+        pdlp_solver_settings_t<int, double> settings;
+        settings.log_file       = log_file;
+        settings.log_to_console = config.log_to_console;
+
+        if (serializer->deserialize_lp_request(request_data, mps_data, settings)) {
+          std::cout << "[Worker] Calling solve_lp...\n" << std::flush;
+          auto solution = solve_lp(&handle, mps_data, settings);
+          std::cout << "[Worker] solve_lp done\n" << std::flush;
+          solution.to_host(handle.get_stream());
+          result_data = serializer->serialize_lp_solution(solution);
+          success     = true;
+        } else {
+          error_message = "Failed to deserialize LP request";
+        }
+      }
+    } catch (const std::exception& e) {
+      error_message = std::string("Exception: ") + e.what();
+    }
+
+    // Store result (pipe mode: write to pipe, shm mode: write to shared memory)
+    if (config.use_pipes) {
+      // PIPE MODE: Set result_queue metadata FIRST, THEN write to pipe.
+      // This avoids deadlock: the main thread's result_retrieval_thread
+      // needs to see ready=true before it will read from the pipe,
+      // but if we write to pipe first with a large result, we'll block
+      // waiting for the reader that will never come.
+
+      // Find a free result slot and populate metadata
+      int result_slot = -1;
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          result_slot              = i;
+          ResultQueueEntry& result = result_queue[i];
+          copy_cstr(result.job_id, job_id);
+          result.status           = success ? 0 : 1;
+          result.data_size        = success ? result_data.size() : 0;
+          result.shm_data_name[0] = '\0';  // Not used in pipe mode
+          result.worker_index     = worker_id;
+          if (!success) { copy_cstr(result.error_message, error_message); }
+          result.retrieved = false;
+          // Set ready=true BEFORE writing to pipe so reader thread starts reading
+          // This prevents deadlock with large results that exceed pipe buffer size
+          result.ready = true;
+          if (config.verbose) {
+            std::cout << "[Worker " << worker_id << "] Enqueued result metadata for job " << job_id
+                      << " in result_slot=" << result_slot << " status=" << result.status
+                      << " data_size=" << result.data_size << "\n";
+            std::cout.flush();
+          }
+          break;
+        }
+      }
+
+      // Now write result data to pipe (reader thread should be ready to receive)
+      if (success && !result_data.empty() && result_slot >= 0) {
+        int write_fd = worker_pipes[worker_id].worker_write_fd;
+        if (config.verbose) {
+          std::cout << "[Worker " << worker_id << "] Writing " << result_data.size()
+                    << " bytes of result payload to pipe for job " << job_id << "\n";
+          std::cout.flush();
+        }
+        bool write_success = send_result_pipe(write_fd, result_data);
+        if (!write_success) {
+          std::cerr << "[Worker " << worker_id << "] Failed to write result to pipe\n";
+          std::cerr.flush();
+          // Mark as failed in result queue
+          result_queue[result_slot].status = 1;
+          copy_cstr(result_queue[result_slot].error_message, "Failed to write result to pipe");
+        } else if (config.verbose) {
+          std::cout << "[Worker " << worker_id << "] Finished writing result payload for job "
+                    << job_id << "\n";
+          std::cout.flush();
+        }
+      } else if (config.verbose) {
+        std::cout << "[Worker " << worker_id << "] No result payload write needed for job "
+                  << job_id << " (success=" << success << ", result_slot=" << result_slot
+                  << ", payload_bytes=" << result_data.size() << ")\n";
+        std::cout.flush();
+      }
+    } else {
+      // SHM mode: store result in shared memory
+      for (size_t i = 0; i < MAX_RESULTS; ++i) {
+        if (!result_queue[i].ready) {
+          ResultQueueEntry& result = result_queue[i];
+          copy_cstr(result.job_id, job_id);
+          result.status       = success ? 0 : 1;
+          result.worker_index = worker_id;
+          if (success && !result_data.empty()) {
+            // Create per-result shared memory
+            std::string shm_name = write_result_shm(job_id, result_data);
+            if (shm_name.empty()) {
+              // Failed to create shm - report error
+              result.status           = 1;
+              result.data_size        = 0;
+              result.shm_data_name[0] = '\0';
+              copy_cstr(result.error_message, "Failed to create shared memory for result");
+            } else {
+              result.data_size = result_data.size();
+              copy_cstr(result.shm_data_name, shm_name);
+            }
+          } else if (!success) {
+            copy_cstr(result.error_message, error_message);
+            result.data_size        = 0;
+            result.shm_data_name[0] = '\0';
+          } else {
+            result.data_size        = 0;
+            result.shm_data_name[0] = '\0';
+          }
+          result.retrieved = false;
+          result.ready     = true;  // Mark as ready last
+          break;
+        }
+      }
+    }
+
+    // Clear job slot
+    job.worker_pid   = 0;
+    job.worker_index = -1;
+    job.data_sent    = false;
+    job.ready        = false;
+    job.claimed      = false;
+    job.cancelled    = false;
+
+    std::cout << "[Worker " << worker_id << "] Completed job: " << job_id
+              << " (success: " << success << ")\n";
+  }
+
+  shm_ctrl->active_workers--;
+  std::cout << "[Worker " << worker_id << "] Stopped\n";
+  _exit(0);
+}
+
+// Create pipes for a worker (incumbent always, data/result in pipe mode)
+bool create_worker_pipes(int worker_id)
+{
+  // Ensure worker_pipes has enough slots
+  while (static_cast<int>(worker_pipes.size()) <= worker_id) {
+    worker_pipes.push_back({-1, -1, -1, -1, -1, -1});
+  }
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+
+  if (config.use_pipes) {
+    // Create pipe for server -> worker data
+    int input_pipe[2];
+    if (pipe(input_pipe) < 0) {
+      std::cerr << "[Server] Failed to create input pipe for worker " << worker_id << "\n";
+      return false;
+    }
+    wp.worker_read_fd = input_pipe[0];  // Worker reads from this
+    wp.to_worker_fd   = input_pipe[1];  // Server writes to this
+
+    // Create pipe for worker -> server results
+    int output_pipe[2];
+    if (pipe(output_pipe) < 0) {
+      std::cerr << "[Server] Failed to create output pipe for worker " << worker_id << "\n";
+      close(input_pipe[0]);
+      close(input_pipe[1]);
+      return false;
+    }
+    wp.from_worker_fd  = output_pipe[0];  // Server reads from this
+    wp.worker_write_fd = output_pipe[1];  // Worker writes to this
+  }
+
+  int incumbent_pipe[2];
+  if (pipe(incumbent_pipe) < 0) {
+    std::cerr << "[Server] Failed to create incumbent pipe for worker " << worker_id << "\n";
+    if (config.use_pipes) {
+      if (wp.worker_read_fd >= 0) close(wp.worker_read_fd);
+      if (wp.to_worker_fd >= 0) close(wp.to_worker_fd);
+      if (wp.from_worker_fd >= 0) close(wp.from_worker_fd);
+      if (wp.worker_write_fd >= 0) close(wp.worker_write_fd);
+      wp.worker_read_fd  = -1;
+      wp.to_worker_fd    = -1;
+      wp.from_worker_fd  = -1;
+      wp.worker_write_fd = -1;
+    }
+    return false;
+  }
+  wp.incumbent_from_worker_fd  = incumbent_pipe[0];  // Server reads from this
+  wp.worker_incumbent_write_fd = incumbent_pipe[1];  // Worker writes to this
+
+  return true;
+}
+
+// Close server-side pipe ends for a worker (called when restarting)
+void close_worker_pipes_server(int worker_id)
+{
+  if (worker_id < 0 || worker_id >= static_cast<int>(worker_pipes.size())) return;
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+  if (config.use_pipes) {
+    if (wp.to_worker_fd >= 0) {
+      close(wp.to_worker_fd);
+      wp.to_worker_fd = -1;
+    }
+    if (wp.from_worker_fd >= 0) {
+      close(wp.from_worker_fd);
+      wp.from_worker_fd = -1;
+    }
+  }
+  if (wp.incumbent_from_worker_fd >= 0) {
+    close(wp.incumbent_from_worker_fd);
+    wp.incumbent_from_worker_fd = -1;
+  }
+}
+
+// Close worker-side pipe ends in parent after fork
+void close_worker_pipes_child_ends(int worker_id)
+{
+  if (worker_id < 0 || worker_id >= static_cast<int>(worker_pipes.size())) return;
+
+  WorkerPipes& wp = worker_pipes[worker_id];
+  if (config.use_pipes) {
+    if (wp.worker_read_fd >= 0) {
+      close(wp.worker_read_fd);
+      wp.worker_read_fd = -1;
+    }
+    if (wp.worker_write_fd >= 0) {
+      close(wp.worker_write_fd);
+      wp.worker_write_fd = -1;
+    }
+  }
+  if (wp.worker_incumbent_write_fd >= 0) {
+    close(wp.worker_incumbent_write_fd);
+    wp.worker_incumbent_write_fd = -1;
+  }
+}
+
+pid_t spawn_worker(int worker_id, bool is_replacement)
+{
+  if (is_replacement) { close_worker_pipes_server(worker_id); }
+
+  if (!create_worker_pipes(worker_id)) {
+    std::cerr << "[Server] Failed to create pipes for "
+              << (is_replacement ? "replacement worker " : "worker ") << worker_id << "\n";
+    return -1;
+  }
+
+  pid_t pid = fork();
+  if (pid < 0) {
+    std::cerr << "[Server] Failed to fork " << (is_replacement ? "replacement worker " : "worker ")
+              << worker_id << "\n";
+    close_worker_pipes_server(worker_id);
+    return -1;
+  } else if (pid == 0) {
+    // Child process
+    if (config.use_pipes) {
+      // Close all other workers' pipe fds
+      for (int j = 0; j < static_cast<int>(worker_pipes.size()); ++j) {
+        if (j != worker_id) {
+          if (worker_pipes[j].worker_read_fd >= 0) close(worker_pipes[j].worker_read_fd);
+          if (worker_pipes[j].worker_write_fd >= 0) close(worker_pipes[j].worker_write_fd);
+          if (worker_pipes[j].to_worker_fd >= 0) close(worker_pipes[j].to_worker_fd);
+          if (worker_pipes[j].from_worker_fd >= 0) close(worker_pipes[j].from_worker_fd);
+          if (worker_pipes[j].incumbent_from_worker_fd >= 0) {
+            close(worker_pipes[j].incumbent_from_worker_fd);
+          }
+          if (worker_pipes[j].worker_incumbent_write_fd >= 0) {
+            close(worker_pipes[j].worker_incumbent_write_fd);
+          }
+        }
+      }
+      // Close server ends of our pipes
+      close(worker_pipes[worker_id].to_worker_fd);
+      close(worker_pipes[worker_id].from_worker_fd);
+    }
+    if (worker_pipes[worker_id].incumbent_from_worker_fd >= 0) {
+      close(worker_pipes[worker_id].incumbent_from_worker_fd);
+      worker_pipes[worker_id].incumbent_from_worker_fd = -1;
+    }
+    worker_process(worker_id);
+    _exit(0);  // Should not reach here
+  }
+
+  // Parent: close worker ends of new pipes
+  close_worker_pipes_child_ends(worker_id);
+  return pid;
+}
+
+void spawn_workers()
+{
+  for (int i = 0; i < config.num_workers; ++i) {
+    pid_t pid = spawn_worker(i, false);
+    if (pid < 0) { continue; }
+    worker_pids.push_back(pid);
+  }
+}
+
+void wait_for_workers()
+{
+  for (pid_t pid : worker_pids) {
+    int status;
+    waitpid(pid, &status, 0);
+  }
+  worker_pids.clear();
+}
+
+// Spawn a single replacement worker and return its PID
+pid_t spawn_single_worker(int worker_id) { return spawn_worker(worker_id, true); }
+
+// Mark jobs being processed by a dead worker as failed (or cancelled if it was cancelled)
+void mark_worker_jobs_failed(pid_t dead_worker_pid)
+{
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (job_queue[i].ready && job_queue[i].claimed && job_queue[i].worker_pid == dead_worker_pid) {
+      std::string job_id(job_queue[i].job_id);
+      bool was_cancelled = job_queue[i].cancelled;
+
+      if (was_cancelled) {
+        std::cerr << "[Server] Worker " << dead_worker_pid
+                  << " killed for cancelled job: " << job_id << "\n";
+      } else {
+        std::cerr << "[Server] Worker " << dead_worker_pid
+                  << " died while processing job: " << job_id << "\n";
+      }
+
+      // Cleanup job data
+      if (config.use_pipes) {
+        // Pipe mode: remove from pending data if not yet sent
+        {
+          std::lock_guard<std::mutex> lock(pending_data_mutex);
+          pending_job_data.erase(job_id);
+        }
+        {
+          std::lock_guard<std::mutex> lock(pending_files_mutex);
+          auto itf = pending_job_files.find(job_id);
+          if (itf != pending_job_files.end()) {
+            unlink(itf->second.path.c_str());
+            pending_job_files.erase(itf);
+          }
+        }
+      } else {
+        // SHM mode: cleanup job input shm (worker may not have done it)
+        cleanup_job_shm(job_queue[i].shm_data_name);
+      }
+
+      // Store result in result queue (cancelled or failed)
+      for (size_t j = 0; j < MAX_RESULTS; ++j) {
+        if (!result_queue[j].ready) {
+          copy_cstr(result_queue[j].job_id, job_id);
+          result_queue[j].status           = was_cancelled ? 2 : 1;  // 2=cancelled, 1=error
+          result_queue[j].data_size        = 0;
+          result_queue[j].shm_data_name[0] = '\0';
+          result_queue[j].worker_index     = -1;
+          copy_cstr(result_queue[j].error_message,
+                    was_cancelled ? "Job was cancelled" : "Worker process died unexpectedly");
+          result_queue[j].retrieved = false;
+          result_queue[j].ready     = true;
+          break;
+        }
+      }
+
+      // Clear the job slot
+      job_queue[i].worker_pid   = 0;
+      job_queue[i].worker_index = -1;
+      job_queue[i].data_sent    = false;
+      job_queue[i].ready        = false;
+      job_queue[i].claimed      = false;
+      job_queue[i].cancelled    = false;
+
+      // Update job tracker
+      {
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        auto it = job_tracker.find(job_id);
+        if (it != job_tracker.end()) {
+          if (was_cancelled) {
+            it->second.status        = JobStatus::CANCELLED;
+            it->second.error_message = "Job was cancelled";
+          } else {
+            it->second.status        = JobStatus::FAILED;
+            it->second.error_message = "Worker process died unexpectedly";
+          }
+        }
+      }
+    }
+  }
+}
+
+// Worker monitor thread - detects dead workers and restarts them
+void worker_monitor_thread()
+{
+  std::cout << "[Server] Worker monitor thread started\n";
+  std::cout.flush();
+
+  while (keep_running) {
+    // Check all worker PIDs for dead workers
+    for (size_t i = 0; i < worker_pids.size(); ++i) {
+      pid_t pid = worker_pids[i];
+      if (pid <= 0) continue;
+
+      int status;
+      pid_t result = waitpid(pid, &status, WNOHANG);
+
+      if (result == pid) {
+        // Worker has exited
+        int exit_code  = WIFEXITED(status) ? WEXITSTATUS(status) : -1;
+        bool signaled  = WIFSIGNALED(status);
+        int signal_num = signaled ? WTERMSIG(status) : 0;
+
+        if (signaled) {
+          std::cerr << "[Server] Worker " << pid << " killed by signal " << signal_num << "\n";
+          std::cerr.flush();
+        } else if (exit_code != 0) {
+          std::cerr << "[Server] Worker " << pid << " exited with code " << exit_code << "\n";
+          std::cerr.flush();
+        } else {
+          // Clean exit during shutdown - don't restart
+          if (shm_ctrl && shm_ctrl->shutdown_requested) {
+            worker_pids[i] = 0;
+            continue;
+          }
+          std::cerr << "[Server] Worker " << pid << " exited unexpectedly\n";
+          std::cerr.flush();
+        }
+
+        // Mark any jobs this worker was processing as failed
+        mark_worker_jobs_failed(pid);
+
+        // Spawn replacement worker (unless shutting down)
+        if (keep_running && shm_ctrl && !shm_ctrl->shutdown_requested) {
+          pid_t new_pid = spawn_single_worker(static_cast<int>(i));
+          if (new_pid > 0) {
+            worker_pids[i] = new_pid;
+            std::cout << "[Server] Restarted worker " << i << " with PID " << new_pid << "\n";
+            std::cout.flush();
+          } else {
+            worker_pids[i] = 0;  // Failed to restart
+          }
+        } else {
+          worker_pids[i] = 0;
+        }
+      }
+    }
+
+    // Check every 100ms
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+
+  std::cout << "[Server] Worker monitor thread stopped\n";
+  std::cout.flush();
+}
+
+void result_retrieval_thread()
+{
+  std::cout << "[Server] Result retrieval thread started\n";
+  std::cout.flush();
+
+  while (keep_running) {
+    bool found = false;
+
+    // PIPE MODE: Check for jobs that need data sent to workers
+    if (config.use_pipes) {
+      for (size_t i = 0; i < MAX_JOBS; ++i) {
+        if (job_queue[i].ready && job_queue[i].claimed && !job_queue[i].data_sent &&
+            !job_queue[i].cancelled) {
+          std::string job_id(job_queue[i].job_id);
+          int worker_idx = job_queue[i].worker_index;
+
+          std::cout << "[DEBUG RRT] Found job needing data: " << job_id
+                    << " worker_idx=" << worker_idx << std::endl;
+          std::cout.flush();
+
+          if (worker_idx >= 0) {
+            // Prefer file-backed payloads (streaming upload).
+            PendingJobFile pending_file;
+            bool have_file = false;
+            {
+              std::lock_guard<std::mutex> lock(pending_files_mutex);
+              auto itf = pending_job_files.find(job_id);
+              if (itf != pending_job_files.end()) {
+                pending_file = itf->second;
+                pending_job_files.erase(itf);
+                have_file = true;
+              }
+            }
+
+            if (have_file) {
+              if (config.verbose) {
+                std::cout << "[Server] Sending file-backed payload to worker " << worker_idx
+                          << " for job " << job_id << " size=" << pending_file.size_bytes << "\n";
+                std::cout.flush();
+              }
+              bool ok =
+                send_job_data_pipe_file(worker_idx, pending_file.path, pending_file.size_bytes);
+              unlink(pending_file.path.c_str());  // best-effort cleanup
+              if (ok) {
+                job_queue[i].data_sent = true;
+              } else {
+                std::cerr << "[Server] Failed to send file-backed payload to worker " << worker_idx
+                          << "\n";
+                job_queue[i].cancelled = true;
+              }
+              found = true;
+              continue;
+            }
+
+            // Get pending job data
+            std::vector<uint8_t> job_data;
+            {
+              std::lock_guard<std::mutex> lock(pending_data_mutex);
+              std::cout << "[DEBUG RRT] Looking for pending data, pending_job_data size="
+                        << pending_job_data.size() << std::endl;
+              std::cout.flush();
+              auto it = pending_job_data.find(job_id);
+              if (it != pending_job_data.end()) {
+                job_data = std::move(it->second);
+                pending_job_data.erase(it);
+                std::cout << "[DEBUG RRT] Found and moved data for job " << job_id
+                          << ", size=" << job_data.size() << std::endl;
+                std::cout.flush();
+              } else {
+                std::cout << "[DEBUG RRT] NOT FOUND in pending_job_data for job_id=" << job_id
+                          << std::endl;
+                std::cout.flush();
+              }
+            }
+
+            if (!job_data.empty()) {
+              // Send data to worker's pipe
+              if (send_job_data_pipe(worker_idx, job_data)) {
+                job_queue[i].data_sent = true;
+                if (config.verbose) {
+                  std::cout << "[Server] Sent " << job_data.size() << " bytes to worker "
+                            << worker_idx << " for job " << job_id << "\n";
+                }
+              } else {
+                std::cerr << "[Server] Failed to send job data to worker " << worker_idx << "\n";
+                // Mark job as failed
+                job_queue[i].cancelled = true;
+              }
+              found = true;
+            }
+          }
+        }
+      }
+    }
+
+    // Check for completed results
+    for (size_t i = 0; i < MAX_RESULTS; ++i) {
+      if (result_queue[i].ready && !result_queue[i].retrieved) {
+        std::string job_id(result_queue[i].job_id);
+        uint32_t result_status = result_queue[i].status;
+        bool success           = (result_status == 0);
+        bool cancelled         = (result_status == 2);
+        int worker_idx         = result_queue[i].worker_index;
+        if (config.verbose) {
+          std::cout << "[Server] Detected ready result_slot=" << i << " for job " << job_id
+                    << " status=" << result_status << " data_size=" << result_queue[i].data_size
+                    << " worker_idx=" << worker_idx << "\n";
+          std::cout.flush();
+        }
+
+        std::vector<uint8_t> result_data;
+        std::string error_message;
+
+        if (success && result_queue[i].data_size > 0) {
+          if (config.use_pipes) {
+            // Pipe mode: read result from worker's output pipe
+            if (config.verbose) {
+              std::cout << "[Server] Reading " << result_queue[i].data_size
+                        << " bytes from worker pipe for job " << job_id << "\n";
+              std::cout.flush();
+            }
+            if (!recv_result_pipe(worker_idx, result_queue[i].data_size, result_data)) {
+              error_message = "Failed to read result data from pipe";
+              success       = false;
+            }
+          } else {
+            // SHM mode: read from shared memory
+            if (!read_job_shm(
+                  result_queue[i].shm_data_name, result_queue[i].data_size, result_data)) {
+              error_message = "Failed to read result data from shared memory";
+              success       = false;
+            }
+            // Cleanup result shm after reading
+            cleanup_job_shm(result_queue[i].shm_data_name);
+          }
+        } else if (!success) {
+          error_message = result_queue[i].error_message;
+        }
+
+        // Check if there's a blocking waiter
+        {
+          std::lock_guard<std::mutex> lock(waiters_mutex);
+          auto wit = waiting_threads.find(job_id);
+          if (wit != waiting_threads.end()) {
+            // Wake up all waiting threads sharing this waiter
+            auto waiter = wit->second;
+            {
+              std::lock_guard<std::mutex> waiter_lock(waiter->mutex);
+              waiter->result_data   = std::move(result_data);
+              waiter->error_message = error_message;
+              waiter->success       = success;
+              waiter->ready         = true;
+            }
+            waiter->cv.notify_all();
+            waiting_threads.erase(wit);
+          }
+        }
+
+        // Update job tracker
+        {
+          std::lock_guard<std::mutex> lock(tracker_mutex);
+          auto it = job_tracker.find(job_id);
+          if (it != job_tracker.end()) {
+            if (success) {
+              it->second.status      = JobStatus::COMPLETED;
+              it->second.result_data = result_data;
+              if (config.verbose) {
+                std::cout << "[Server] Marked job COMPLETED in job_tracker: " << job_id
+                          << " result_bytes=" << result_data.size() << "\n";
+                std::cout.flush();
+              }
+            } else if (cancelled) {
+              it->second.status        = JobStatus::CANCELLED;
+              it->second.error_message = error_message;
+              if (config.verbose) {
+                std::cout << "[Server] Marked job CANCELLED in job_tracker: " << job_id
+                          << " msg=" << error_message << "\n";
+                std::cout.flush();
+              }
+            } else {
+              it->second.status        = JobStatus::FAILED;
+              it->second.error_message = error_message;
+              if (config.verbose) {
+                std::cout << "[Server] Marked job FAILED in job_tracker: " << job_id
+                          << " msg=" << error_message << "\n";
+                std::cout.flush();
+              }
+            }
+          } else if (config.verbose) {
+            std::cout << "[Server] WARNING: result for unknown job_id (not in job_tracker): "
+                      << job_id << "\n";
+            std::cout.flush();
+          }
+        }
+
+        result_queue[i].retrieved    = true;
+        result_queue[i].worker_index = -1;
+        result_queue[i].ready        = false;  // Free slot
+        found                        = true;
+      }
+    }
+
+    if (!found) {
+      usleep(10000);  // 10ms
+    }
+
+    result_cv.notify_all();
+  }
+
+  std::cout << "[Server] Result retrieval thread stopped\n";
+  std::cout.flush();
+}
+
+void incumbent_retrieval_thread()
+{
+  std::cout << "[Server] Incumbent retrieval thread started\n";
+  std::cout.flush();
+
+  while (keep_running) {
+    std::vector<pollfd> pfds;
+    pfds.reserve(worker_pipes.size());
+    for (const auto& wp : worker_pipes) {
+      if (wp.incumbent_from_worker_fd >= 0) {
+        pollfd pfd;
+        pfd.fd      = wp.incumbent_from_worker_fd;
+        pfd.events  = POLLIN;
+        pfd.revents = 0;
+        pfds.push_back(pfd);
+      }
+    }
+
+    if (pfds.empty()) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+      continue;
+    }
+
+    int poll_result = poll(pfds.data(), pfds.size(), 100);
+    if (poll_result < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] poll() failed in incumbent thread: " << strerror(errno) << "\n";
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+      continue;
+    }
+    if (poll_result == 0) { continue; }
+
+    for (const auto& pfd : pfds) {
+      if (!(pfd.revents & POLLIN)) { continue; }
+      std::vector<uint8_t> data;
+      if (!recv_incumbent_pipe(pfd.fd, data)) { continue; }
+      if (data.empty()) { continue; }
+
+      cuopt::remote::Incumbent incumbent_msg;
+      if (!incumbent_msg.ParseFromArray(data.data(), data.size())) {
+        std::cerr << "[Server] Failed to parse incumbent payload\n";
+        continue;
+      }
+
+      const std::string job_id = incumbent_msg.job_id();
+      if (job_id.empty()) { continue; }
+
+      IncumbentEntry entry;
+      entry.objective = incumbent_msg.objective();
+      entry.assignment.reserve(incumbent_msg.assignment_size());
+      for (int i = 0; i < incumbent_msg.assignment_size(); ++i) {
+        entry.assignment.push_back(incumbent_msg.assignment(i));
+      }
+
+      {
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        auto it = job_tracker.find(job_id);
+        if (it != job_tracker.end()) {
+          it->second.incumbents.push_back(std::move(entry));
+          std::cout << "[Server] Stored incumbent job_id=" << job_id
+                    << " idx=" << (it->second.incumbents.size() - 1)
+                    << " obj=" << incumbent_msg.objective()
+                    << " vars=" << incumbent_msg.assignment_size() << "\n";
+          std::cout.flush();
+        }
+      }
+    }
+  }
+
+  std::cout << "[Server] Incumbent retrieval thread stopped\n";
+  std::cout.flush();
+}
+
+static std::string create_job_shm(const std::string& job_id,
+                                  const std::vector<uint8_t>& data,
+                                  const char* prefix)
+{
+  std::string shm_name = std::string("/cuopt_") + prefix + "_" + job_id;
+
+  int fd = shm_open(shm_name.c_str(), O_CREAT | O_RDWR, 0666);
+  if (fd < 0) {
+    std::cerr << "[Server] Failed to create shm " << shm_name << ": " << strerror(errno) << "\n";
+    return "";
+  }
+
+  if (ftruncate(fd, data.size()) < 0) {
+    std::cerr << "[Server] Failed to size shm " << shm_name << ": " << strerror(errno) << "\n";
+    close(fd);
+    shm_unlink(shm_name.c_str());
+    return "";
+  }
+
+  void* ptr = mmap(nullptr, data.size(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  close(fd);
+
+  if (ptr == MAP_FAILED) {
+    std::cerr << "[Server] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n";
+    shm_unlink(shm_name.c_str());
+    return "";
+  }
+
+  memcpy(ptr, data.data(), data.size());
+  munmap(ptr, data.size());
+
+  return shm_name;
+}
+
+static int64_t get_upload_mem_threshold_bytes()
+{
+  // Default to 1 GiB; set env CUOPT_GRPC_UPLOAD_MEM_THRESHOLD_BYTES to override.
+  // 0 => always use file, -1 => always use memory (not recommended for huge uploads).
+  const char* val = std::getenv("CUOPT_GRPC_UPLOAD_MEM_THRESHOLD_BYTES");
+  if (!val || val[0] == '\0') { return kGiB; }
+  try {
+    return std::stoll(val);
+  } catch (...) {
+    return kGiB;
+  }
+}
+
+static std::string read_file_to_string(const std::string& path)
+{
+  std::ifstream in(path, std::ios::in | std::ios::binary);
+  if (!in.is_open()) { return ""; }
+  std::ostringstream ss;
+  ss << in.rdbuf();
+  return ss.str();
+}
+
+// Read data from per-job shared memory segment
+static bool read_job_shm(const char* shm_name, size_t data_size, std::vector<uint8_t>& data)
+{
+  int fd = shm_open(shm_name, O_RDONLY, 0666);
+  if (fd < 0) {
+    std::cerr << "[Worker] Failed to open shm " << shm_name << ": " << strerror(errno) << "\n";
+    return false;
+  }
+
+  void* ptr = mmap(nullptr, data_size, PROT_READ, MAP_SHARED, fd, 0);
+  close(fd);
+
+  if (ptr == MAP_FAILED) {
+    std::cerr << "[Worker] Failed to map shm " << shm_name << ": " << strerror(errno) << "\n";
+    return false;
+  }
+
+  data.resize(data_size);
+  memcpy(data.data(), ptr, data_size);
+  munmap(ptr, data_size);
+
+  return true;
+}
+
+// Write data to per-result shared memory segment
+static std::string write_result_shm(const std::string& job_id, const std::vector<uint8_t>& data)
+{
+  return create_job_shm(job_id, data, "result");
+}
+
+// Cleanup per-job shared memory segment
+static void cleanup_job_shm(const char* shm_name)
+{
+  if (shm_name[0] != '\0') { shm_unlink(shm_name); }
+}
+
+static bool write_to_pipe(int fd, const void* data, size_t size)
+{
+  const uint8_t* ptr = static_cast<const uint8_t*>(data);
+  size_t remaining   = size;
+  while (remaining > 0) {
+    ssize_t written = ::write(fd, ptr, remaining);
+    if (written <= 0) {
+      if (errno == EINTR) continue;
+      return false;
+    }
+    ptr += written;
+    remaining -= written;
+  }
+  return true;
+}
+
+// Read all data from a pipe (handles partial reads) with timeout
+// timeout_ms: milliseconds to wait for data (default 120000 = 2 minutes)
+static bool read_from_pipe(int fd, void* data, size_t size, int timeout_ms)
+{
+  uint8_t* ptr     = static_cast<uint8_t*>(data);
+  size_t remaining = size;
+  while (remaining > 0) {
+    // Use poll() to wait for data with timeout
+    struct pollfd pfd;
+    pfd.fd     = fd;
+    pfd.events = POLLIN;
+
+    int poll_result = poll(&pfd, 1, timeout_ms);
+    if (poll_result < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] poll() failed on pipe: " << strerror(errno) << "\n";
+      return false;
+    }
+    if (poll_result == 0) {
+      std::cerr << "[Server] Timeout waiting for pipe data (waited " << timeout_ms << "ms)\n";
+      return false;
+    }
+    if (pfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
+      std::cerr << "[Server] Pipe error/hangup detected\n";
+      return false;
+    }
+
+    ssize_t nread = ::read(fd, ptr, remaining);
+    if (nread <= 0) {
+      if (errno == EINTR) continue;
+      if (nread == 0) { std::cerr << "[Server] Pipe EOF (writer closed)\n"; }
+      return false;
+    }
+    ptr += nread;
+    remaining -= nread;
+  }
+  return true;
+}
+
+// Send job data to worker via pipe (length-prefixed)
+static bool send_job_data_pipe(int worker_idx, const std::vector<uint8_t>& data)
+{
+  if (worker_idx < 0 || worker_idx >= static_cast<int>(worker_pipes.size())) { return false; }
+  int fd = worker_pipes[worker_idx].to_worker_fd;
+  if (fd < 0) return false;
+
+  // Send size first
+  uint64_t size = data.size();
+  if (!write_to_pipe(fd, &size, sizeof(size))) return false;
+  // Send data
+  if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false;
+  return true;
+}
+
+// Stream job data from a file to the worker via pipe (length-prefixed).
+// This avoids holding the entire job payload in server memory.
+static bool send_job_data_pipe_file(int worker_idx, const std::string& path, uint64_t expected_size)
+{
+  if (worker_idx < 0 || worker_idx >= static_cast<int>(worker_pipes.size())) { return false; }
+  int pipe_fd = worker_pipes[worker_idx].to_worker_fd;
+  if (pipe_fd < 0) return false;
+
+  int file_fd = open(path.c_str(), O_RDONLY);
+  if (file_fd < 0) {
+    std::cerr << "[Server] Failed to open payload file: " << path << " err=" << strerror(errno)
+              << "\n";
+    return false;
+  }
+
+  // Send size first (worker validates it against expected_size in shared memory).
+  uint64_t size = expected_size;
+  if (!write_to_pipe(pipe_fd, &size, sizeof(size))) {
+    close(file_fd);
+    return false;
+  }
+
+  std::vector<uint8_t> buf(kMiB);  // 1 MiB
+  uint64_t remaining = size;
+  while (remaining > 0) {
+    size_t to_read = buf.size();
+    if (remaining < to_read) { to_read = static_cast<size_t>(remaining); }
+
+    ssize_t nread = ::read(file_fd, buf.data(), to_read);
+    if (nread < 0) {
+      if (errno == EINTR) continue;
+      std::cerr << "[Server] Failed reading payload file: " << path << " err=" << strerror(errno)
+                << "\n";
+      close(file_fd);
+      return false;
+    }
+    if (nread == 0) {
+      std::cerr << "[Server] Unexpected EOF reading payload file: " << path << "\n";
+      close(file_fd);
+      return false;
+    }
+    if (!write_to_pipe(pipe_fd, buf.data(), static_cast<size_t>(nread))) {
+      close(file_fd);
+      return false;
+    }
+    remaining -= static_cast<uint64_t>(nread);
+  }
+
+  close(file_fd);
+  return true;
+}
+
+// Receive job data from pipe (length-prefixed) - called by worker
+static bool recv_job_data_pipe(int fd, uint64_t expected_size, std::vector<uint8_t>& data)
+{
+  // Read size
+  uint64_t size;
+  if (!read_from_pipe(fd, &size, sizeof(size))) return false;
+  if (size != expected_size) {
+    std::cerr << "[Worker] Size mismatch: expected " << expected_size << ", got " << size << "\n";
+    return false;
+  }
+  // Read data
+  data.resize(size);
+  if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false;
+  return true;
+}
+
+// Send result data to server via pipe (length-prefixed) - called by worker
+static bool send_result_pipe(int fd, const std::vector<uint8_t>& data)
+{
+  // Send size first
+  uint64_t size = data.size();
+  if (!write_to_pipe(fd, &size, sizeof(size))) return false;
+  // Send data
+  if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false;
+  return true;
+}
+
+// Send incumbent data to server via pipe (length-prefixed) - called by worker
+static bool send_incumbent_pipe(int fd, const std::vector<uint8_t>& data)
+{
+  uint64_t size = data.size();
+  if (!write_to_pipe(fd, &size, sizeof(size))) return false;
+  if (size > 0 && !write_to_pipe(fd, data.data(), data.size())) return false;
+  return true;
+}
+
+// Receive incumbent data from worker via pipe (length-prefixed)
+static bool recv_incumbent_pipe(int fd, std::vector<uint8_t>& data)
+{
+  uint64_t size;
+  if (!read_from_pipe(fd, &size, sizeof(size))) return false;
+  data.resize(size);
+  if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false;
+  return true;
+}
+
+// Receive result data from worker via pipe (length-prefixed)
+static bool recv_result_pipe(int worker_idx, uint64_t expected_size, std::vector<uint8_t>& data)
+{
+  if (worker_idx < 0 || worker_idx >= static_cast<int>(worker_pipes.size())) { return false; }
+  int fd = worker_pipes[worker_idx].from_worker_fd;
+  if (fd < 0) return false;
+
+  // Read size
+  uint64_t size;
+  if (!read_from_pipe(fd, &size, sizeof(size))) return false;
+  if (size != expected_size) {
+    std::cerr << "[Server] Result size mismatch: expected " << expected_size << ", got " << size
+              << "\n";
+    return false;
+  }
+  // Read data
+  data.resize(size);
+  if (size > 0 && !read_from_pipe(fd, data.data(), size)) return false;
+  return true;
+}
+
+// Submit a job asynchronously (returns job_id)
+std::pair<bool, std::string> submit_job_async(const std::vector<uint8_t>& request_data, bool is_mip)
+{
+  std::string job_id = generate_job_id();
+
+  std::string shm_name;
+  if (config.use_pipes) {
+    // Pipe mode: store data in pending map (will be sent when worker claims job)
+    {
+      std::lock_guard<std::mutex> lock(pending_data_mutex);
+      pending_job_data[job_id] = request_data;
+    }
+  } else {
+    // SHM mode: create per-job shared memory for problem data
+    shm_name = create_job_shm(job_id, request_data, "job");
+    if (shm_name.empty()) { return {false, "Failed to create shared memory for job data"}; }
+  }
+
+  // Find free job slot
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (!job_queue[i].ready && !job_queue[i].claimed) {
+      copy_cstr(job_queue[i].job_id, job_id);
+      job_queue[i].problem_type = is_mip ? 1 : 0;
+      job_queue[i].data_size    = request_data.size();
+      if (!config.use_pipes) {
+        copy_cstr(job_queue[i].shm_data_name, shm_name);
+      } else {
+        job_queue[i].shm_data_name[0] = '\0';
+      }
+      job_queue[i].worker_pid   = 0;
+      job_queue[i].worker_index = -1;
+      job_queue[i].data_sent    = false;
+      job_queue[i].claimed      = false;
+      job_queue[i].cancelled    = false;
+      job_queue[i].ready        = true;  // Mark as ready last
+
+      // Track job
+      {
+        std::lock_guard<std::mutex> lock(tracker_mutex);
+        JobInfo info;
+        info.job_id         = job_id;
+        info.status         = JobStatus::QUEUED;
+        info.submit_time    = std::chrono::steady_clock::now();
+        info.is_mip         = is_mip;
+        info.is_blocking    = false;
+        job_tracker[job_id] = info;
+      }
+
+      if (config.verbose) { std::cout << "[Server] Job submitted (async): " << job_id << "\n"; }
+
+      return {true, job_id};
+    }
+  }
+
+  // No free slot - cleanup
+  if (config.use_pipes) {
+    std::lock_guard<std::mutex> lock(pending_data_mutex);
+    pending_job_data.erase(job_id);
+  } else {
+    shm_unlink(shm_name.c_str());
+  }
+  return {false, "Job queue full"};
+}
+
+// Check job status
+JobStatus check_job_status(const std::string& job_id, std::string& message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    message = "Job ID not found";
+    return JobStatus::NOT_FOUND;
+  }
+
+  // If status is QUEUED, check if the job has been claimed by a worker
+  // (which means it's now PROCESSING)
+  if (it->second.status == JobStatus::QUEUED) {
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready && job_queue[i].claimed &&
+          std::string(job_queue[i].job_id) == job_id) {
+        it->second.status = JobStatus::PROCESSING;
+        break;
+      }
+    }
+  }
+
+  switch (it->second.status) {
+    case JobStatus::QUEUED: message = "Job is queued"; break;
+    case JobStatus::PROCESSING: message = "Job is being processed"; break;
+    case JobStatus::COMPLETED: message = "Job completed"; break;
+    case JobStatus::FAILED: message = "Job failed: " + it->second.error_message; break;
+    case JobStatus::CANCELLED: message = "Job was cancelled"; break;
+    default: message = "Unknown status";
+  }
+
+  return it->second.status;
+}
+
+// Check if a job is MIP (vs LP)
+bool get_job_is_mip(const std::string& job_id)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+  if (it == job_tracker.end()) {
+    return false;  // Default to LP if not found
+  }
+  return it->second.is_mip;
+}
+
+// Get job result
+bool get_job_result(const std::string& job_id,
+                    std::vector<uint8_t>& result_data,
+                    std::string& error_message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    error_message = "Job ID not found";
+    return false;
+  }
+
+  if (it->second.status == JobStatus::COMPLETED) {
+    result_data = it->second.result_data;
+    return true;
+  } else if (it->second.status == JobStatus::FAILED) {
+    error_message = it->second.error_message;
+    return false;
+  } else {
+    error_message = "Job not completed yet";
+    return false;
+  }
+}
+
+// Wait for job to complete (blocking)
+// This uses condition variables - the thread will sleep until the job is done
+bool wait_for_result(const std::string& job_id,
+                     std::vector<uint8_t>& result_data,
+                     std::string& error_message)
+{
+  // First check if job already completed
+  {
+    std::lock_guard<std::mutex> lock(tracker_mutex);
+    auto it = job_tracker.find(job_id);
+
+    if (it == job_tracker.end()) {
+      error_message = "Job ID not found";
+      return false;
+    }
+
+    // If already in terminal state, return immediately
+    if (it->second.status == JobStatus::COMPLETED) {
+      result_data = it->second.result_data;
+      return true;
+    } else if (it->second.status == JobStatus::FAILED) {
+      error_message = it->second.error_message;
+      return false;
+    } else if (it->second.status == JobStatus::CANCELLED) {
+      error_message = "Job was cancelled";
+      return false;
+    }
+  }
+
+  // Job is still running - reuse or create a shared waiter
+  std::shared_ptr<JobWaiter> waiter;
+  {
+    std::lock_guard<std::mutex> lock(waiters_mutex);
+    auto it = waiting_threads.find(job_id);
+    if (it != waiting_threads.end()) {
+      waiter = it->second;
+    } else {
+      waiter                  = std::make_shared<JobWaiter>();
+      waiting_threads[job_id] = waiter;
+    }
+  }
+
+  if (config.verbose) {
+    std::cout << "[Server] WAIT_FOR_RESULT: waiting for job " << job_id << "\n";
+  }
+
+  waiter->waiters.fetch_add(1, std::memory_order_relaxed);
+
+  // Wait on the condition variable - this thread will sleep until signaled
+  {
+    std::unique_lock<std::mutex> lock(waiter->mutex);
+    waiter->cv.wait(lock, [&waiter] { return waiter->ready; });
+  }
+
+  if (config.verbose) {
+    std::cout << "[Server] WAIT_FOR_RESULT: job " << job_id
+              << " completed, success=" << waiter->success << "\n";
+  }
+
+  if (waiter->success) {
+    if (waiter->waiters.load(std::memory_order_relaxed) > 1) {
+      result_data = waiter->result_data;
+    } else {
+      result_data = std::move(waiter->result_data);
+    }
+    waiter->waiters.fetch_sub(1, std::memory_order_relaxed);
+    return true;
+  } else {
+    error_message = waiter->error_message;
+    waiter->waiters.fetch_sub(1, std::memory_order_relaxed);
+    return false;
+  }
+}
+void ensure_log_dir_exists()
+{
+  struct stat st;
+  if (stat(LOG_DIR.c_str(), &st) != 0) { mkdir(LOG_DIR.c_str(), 0755); }
+}
+
+void ensure_upload_dir_exists()
+{
+  struct stat st;
+  if (stat(UPLOAD_DIR.c_str(), &st) != 0) { mkdir(UPLOAD_DIR.c_str(), 0755); }
+}
+
+// Delete log file for a job
+void delete_log_file(const std::string& job_id)
+{
+  std::string log_file = get_log_file_path(job_id);
+  unlink(log_file.c_str());  // Ignore errors if file doesn't exist
+}
+
+void delete_upload_file(const std::string& upload_id)
+{
+  // Uploads may be stored as either ".bin" (protobuf payload) or ".bin.mps" (raw MPS payload).
+  std::string f0 = get_upload_file_path(upload_id);
+  unlink(f0.c_str());
+  std::string f1 = f0 + ".mps";
+  unlink(f1.c_str());
+}
+
+// Cancel job - returns: 0=success, 1=job_not_found, 2=already_completed, 3=already_cancelled
+// Also returns the job's status after cancel attempt via job_status_out
+int cancel_job(const std::string& job_id, JobStatus& job_status_out, std::string& message)
+{
+  std::lock_guard<std::mutex> lock(tracker_mutex);
+  auto it = job_tracker.find(job_id);
+
+  if (it == job_tracker.end()) {
+    message        = "Job ID not found";
+    job_status_out = JobStatus::NOT_FOUND;
+    return 1;
+  }
+
+  JobStatus current_status = it->second.status;
+
+  // Can't cancel completed jobs
+  if (current_status == JobStatus::COMPLETED) {
+    message        = "Cannot cancel completed job";
+    job_status_out = JobStatus::COMPLETED;
+    return 2;
+  }
+
+  // Already cancelled
+  if (current_status == JobStatus::CANCELLED) {
+    message        = "Job already cancelled";
+    job_status_out = JobStatus::CANCELLED;
+    return 3;
+  }
+
+  // Can't cancel failed jobs
+  if (current_status == JobStatus::FAILED) {
+    message        = "Cannot cancel failed job";
+    job_status_out = JobStatus::FAILED;
+    return 2;
+  }
+
+  // Find the job in the shared memory queue
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    if (job_queue[i].ready && strcmp(job_queue[i].job_id, job_id.c_str()) == 0) {
+      // Check if job is being processed by a worker
+      pid_t worker_pid = job_queue[i].worker_pid;
+
+      if (worker_pid > 0 && job_queue[i].claimed) {
+        // Job is being processed - kill the worker
+        if (config.verbose) {
+          std::cout << "[Server] Cancelling running job " << job_id << " (killing worker "
+                    << worker_pid << ")\n";
+        }
+        // Mark cancelled BEFORE killing so the worker monitor path (mark_worker_jobs_failed)
+        // reliably observes was_cancelled=true and reports CANCELLED rather than FAILED.
+        job_queue[i].cancelled = true;
+        kill(worker_pid, SIGKILL);
+      } else {
+        // Job is queued but not yet claimed - mark as cancelled
+        if (config.verbose) { std::cout << "[Server] Cancelling queued job " << job_id << "\n"; }
+        job_queue[i].cancelled = true;
+      }
+
+      // Update job tracker
+      it->second.status        = JobStatus::CANCELLED;
+      it->second.error_message = "Job cancelled by user";
+      job_status_out           = JobStatus::CANCELLED;
+      message                  = "Job cancelled successfully";
+
+      // Delete the log file for this job
+      delete_log_file(job_id);
+
+      // Wake up any threads waiting for this job
+      {
+        std::lock_guard<std::mutex> wlock(waiters_mutex);
+        auto wit = waiting_threads.find(job_id);
+        if (wit != waiting_threads.end()) {
+          auto waiter = wit->second;
+          {
+            std::lock_guard<std::mutex> waiter_lock(waiter->mutex);
+            waiter->error_message = "Job cancelled by user";
+            waiter->success       = false;
+            waiter->ready         = true;
+          }
+          waiter->cv.notify_all();
+          waiting_threads.erase(wit);
+        }
+      }
+
+      return 0;
+    }
+  }
+
+  // Job not found in queue (might have already finished processing)
+  // Re-check status since we hold the lock
+  if (it->second.status == JobStatus::COMPLETED) {
+    message        = "Cannot cancel completed job";
+    job_status_out = JobStatus::COMPLETED;
+    return 2;
+  }
+
+  // Job must be in flight or in an edge case - mark as cancelled anyway
+  it->second.status        = JobStatus::CANCELLED;
+  it->second.error_message = "Job cancelled by user";
+  job_status_out           = JobStatus::CANCELLED;
+  message                  = "Job cancelled";
+
+  // Wake up any threads waiting for this job
+  {
+    std::lock_guard<std::mutex> wlock(waiters_mutex);
+    auto wit = waiting_threads.find(job_id);
+    if (wit != waiting_threads.end()) {
+      auto waiter = wit->second;
+      {
+        std::lock_guard<std::mutex> waiter_lock(waiter->mutex);
+        waiter->error_message = "Job cancelled by user";
+        waiter->success       = false;
+        waiter->ready         = true;
+      }
+      waiter->cv.notify_all();
+      waiting_threads.erase(wit);
+    }
+  }
+
+  return 0;
+}
+
+// ============================================================================
+// Helper Functions
+// ============================================================================
+
+std::string generate_job_id()
+{
+  static std::random_device rd;
+  static std::mt19937_64 gen(rd());
+  static std::uniform_int_distribution<uint64_t> dis;
+
+  std::stringstream ss;
+  ss << std::hex << dis(gen);
+  return ss.str();
+}
+
+// ============================================================================
+// gRPC Service Implementation
+// ============================================================================
+
+class CuOptRemoteServiceImpl final : public cuopt::remote::CuOptRemoteService::Service {
+ public:
+  // SubmitJob - Submit LP/MIP job for async processing
+  Status SubmitJob(ServerContext* context,
+                   const cuopt::remote::SubmitJobRequest* request,
+                   cuopt::remote::SubmitJobResponse* response) override
+  {
+    std::string job_id = generate_job_id();
+
+    // Determine problem type and serialize request
+    bool is_lp = request->has_lp_request();
+    std::vector<uint8_t> job_data;
+
+    if (is_lp) {
+      // Serialize LP request
+      auto& lp_req = request->lp_request();
+      if (config.verbose) {
+        std::cerr << "[gRPC] SubmitJob LP fields: bytes=" << lp_req.ByteSizeLong()
+                  << " objective_scaling_factor=" << lp_req.problem().objective_scaling_factor()
+                  << " objective_offset=" << lp_req.problem().objective_offset()
+                  << " iteration_limit=" << lp_req.settings().iteration_limit()
+                  << " method=" << lp_req.settings().method() << std::endl;
+      }
+      size_t size = lp_req.ByteSizeLong();
+      job_data.resize(size);
+      if (!lp_req.SerializeToArray(job_data.data(), size)) {
+        return Status(StatusCode::INTERNAL, "Failed to serialize LP request");
+      }
+    } else if (request->has_mip_request()) {
+      // Serialize MIP request
+      auto& mip_req = request->mip_request();
+      size_t size   = mip_req.ByteSizeLong();
+      job_data.resize(size);
+      if (!mip_req.SerializeToArray(job_data.data(), size)) {
+        return Status(StatusCode::INTERNAL, "Failed to serialize MIP request");
+      }
+    } else {
+      return Status(StatusCode::INVALID_ARGUMENT, "No problem data provided");
+    }
+
+    // Find and reserve a free slot in the shared job queue.
+    //
+    // NOTE: Unlike the legacy socket server (single-threaded accept loop),
+    // gRPC can dispatch multiple SubmitJob RPCs concurrently. We must reserve
+    // a slot so two SubmitJob calls don't pick the same entry.
+    //
+    // We use `claimed=true` as a temporary reservation while `ready=false`.
+    int job_idx = -1;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready.load()) { continue; }
+      bool expected_claimed = false;
+      if (job_queue[i].claimed.compare_exchange_strong(expected_claimed, true)) {
+        job_idx = static_cast<int>(i);
+        break;
+      }
+    }
+
+    if (job_idx < 0) { return Status(StatusCode::RESOURCE_EXHAUSTED, "Job queue full"); }
+
+    // Initialize job queue entry
+    copy_cstr(job_queue[job_idx].job_id, job_id);
+    job_queue[job_idx].problem_type = is_lp ? 0 : 1;
+    job_queue[job_idx].data_size    = job_data.size();
+    // `claimed` currently true as a reservation; keep it until the entry is fully initialized.
+    job_queue[job_idx].cancelled.store(false);
+    job_queue[job_idx].worker_index.store(-1);
+    job_queue[job_idx].data_sent.store(false);
+    job_queue[job_idx].shm_data_name[0] = '\0';  // Not used in pipe mode
+
+    // Store job data for pipe mode
+    {
+      std::lock_guard<std::mutex> lock(pending_data_mutex);
+      pending_job_data[job_id] = std::move(job_data);
+      std::cout << "[DEBUG SubmitJob] Stored " << pending_job_data[job_id].size()
+                << " bytes for job " << job_id << " in pending_job_data" << std::endl;
+      std::cout.flush();
+    }
+
+    // Add to job tracker
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      job_tracker[job_id] = JobInfo{
+        job_id, JobStatus::QUEUED, std::chrono::steady_clock::now(), {}, {}, !is_lp, "", false};
+    }
+
+    // Publish job to workers: release reservation and set ready last.
+    job_queue[job_idx].claimed.store(false);
+    job_queue[job_idx].ready.store(true);
+
+    response->set_job_id(job_id);
+    response->set_message("Job submitted successfully");
+
+    if (config.verbose) {
+      std::cout << "[gRPC] Job submitted: " << job_id << " (type=" << (is_lp ? "LP" : "MIP") << ")"
+                << std::endl;
+    }
+
+    return Status::OK;
+  }
+
+  Status UploadAndSubmit(ServerContext* context,
+                         ServerReaderWriter<cuopt::remote::UploadJobResponse,
+                                            cuopt::remote::UploadJobRequest>* stream) override
+  {
+    (void)context;
+
+    if (!config.use_pipes) {
+      return Status(StatusCode::FAILED_PRECONDITION,
+                    "UploadAndSubmit currently requires pipe mode (do not use --use-shm)");
+    }
+
+    ensure_upload_dir_exists();
+
+    cuopt::remote::UploadJobRequest in;
+    cuopt::remote::UploadJobResponse out;
+
+    const int64_t max_message_bytes =
+      (config.max_message_mb <= 0) ? -1 : (static_cast<int64_t>(config.max_message_mb) * kMiB);
+    auto set_upload_error =
+      [&](const std::string& upload_id, const std::string& message, int64_t committed_size) {
+        std::string full_message = message;
+        if (full_message.find("max_message_mb=") == std::string::npos) {
+          full_message += " (max_message_mb=" + std::to_string(config.max_message_mb) + ")";
+        }
+        out.Clear();
+        auto* err = out.mutable_error();
+        err->set_upload_id(upload_id);
+        err->set_message(full_message);
+        err->set_committed_size(committed_size);
+        err->set_max_message_bytes(max_message_bytes);
+      };
+
+    // First message must be UploadStart.
+    if (!stream->Read(&in) || !in.has_start()) {
+      set_upload_error("", "First message must be UploadStart", 0);
+      stream->Write(out);
+      return Status(StatusCode::INVALID_ARGUMENT, "Missing UploadStart");
+    }
+
+    const auto& start       = in.start();
+    std::string upload_id   = start.upload_id().empty() ? generate_job_id() : start.upload_id();
+    bool is_mip             = (start.problem_type() == cuopt::remote::MIP);
+    std::string upload_path = get_upload_file_path(upload_id);
+    int64_t committed       = 0;
+
+    const int64_t threshold_bytes = get_upload_mem_threshold_bytes();
+    const int64_t total_size_hint = start.total_size();
+    const bool force_file         = (threshold_bytes == 0) || start.resume();
+    bool use_memory               = !force_file;
+    if (threshold_bytes >= 0 && total_size_hint > 0 && total_size_hint > threshold_bytes) {
+      use_memory = false;
+    }
+    if (threshold_bytes < 0) { use_memory = true; }
+
+    int fd = -1;
+    std::vector<uint8_t> mem_buffer;
+    auto cleanup_file = [&]() {
+      if (fd >= 0) {
+        close(fd);
+        delete_upload_file(upload_id);
+        fd = -1;
+      }
+    };
+
+    if (config.verbose) {
+      std::cout << "[gRPC] UploadAndSubmit start upload_id=" << upload_id
+                << " total_size=" << total_size_hint << " threshold_bytes=" << threshold_bytes
+                << " resume=" << (start.resume() ? 1 : 0) << " use_memory=" << (use_memory ? 1 : 0)
+                << " upload_path=" << upload_path << "\n";
+      std::cout.flush();
+    }
+
+    auto open_upload_file = [&](bool resume) -> bool {
+      int flags = O_CREAT | O_WRONLY;
+      flags |= resume ? O_APPEND : O_TRUNC;
+      fd = open(upload_path.c_str(), flags | O_CLOEXEC, 0600);
+      if (fd < 0) {
+        set_upload_error(
+          upload_id, std::string("Failed to open upload file: ") + strerror(errno), committed);
+        stream->Write(out);
+        return false;
+      }
+      if (config.verbose) {
+        struct stat st;
+        if (fstat(fd, &st) == 0) {
+          std::cout << "[gRPC] Upload file opened path=" << upload_path << " mode=" << std::oct
+                    << (st.st_mode & 0777) << std::dec << " uid=" << st.st_uid
+                    << " gid=" << st.st_gid << "\n";
+        } else {
+          std::cout << "[gRPC] Upload file opened path=" << upload_path
+                    << " fstat_failed err=" << strerror(errno) << "\n";
+        }
+        std::cout.flush();
+      }
+      if (resume) {
+        struct stat st;
+        if (stat(upload_path.c_str(), &st) == 0) { committed = static_cast<int64_t>(st.st_size); }
+      }
+      return true;
+    };
+
+    if (!use_memory) {
+      if (!open_upload_file(start.resume())) {
+        return Status(StatusCode::INTERNAL, "Failed to open upload file");
+      }
+    }
+
+    // Ack start with committed size (resume point).
+    out.Clear();
+    auto* ack = out.mutable_ack();
+    ack->set_upload_id(upload_id);
+    ack->set_committed_size(committed);
+    ack->set_max_message_bytes(max_message_bytes);
+    stream->Write(out);
+
+    // Read chunks until finish.
+    while (stream->Read(&in)) {
+      if (in.has_chunk()) {
+        const auto& ch = in.chunk();
+        if (ch.upload_id() != upload_id) {
+          set_upload_error(upload_id, "upload_id mismatch", committed);
+          stream->Write(out);
+          cleanup_file();
+          return Status(StatusCode::INVALID_ARGUMENT, "upload_id mismatch");
+        }
+        if (ch.offset() != committed) {
+          set_upload_error(upload_id, "Non-sequential chunk offset", committed);
+          stream->Write(out);
+          close(fd);
+          return Status(StatusCode::OUT_OF_RANGE, "Non-sequential chunk offset");
+        }
+
+        const std::string& data = ch.data();
+        if (!data.empty()) {
+          if (use_memory) {
+            // Switch to file if threshold exceeded or unknown size grows too large.
+            if (threshold_bytes >= 0 &&
+                committed + static_cast<int64_t>(data.size()) > threshold_bytes) {
+              if (config.verbose) {
+                std::cout << "[gRPC] Upload spill to disk upload_id=" << upload_id
+                          << " committed=" << committed << " chunk=" << data.size()
+                          << " threshold_bytes=" << threshold_bytes << "\n";
+                std::cout.flush();
+              }
+              if (!open_upload_file(false)) {
+                return Status(StatusCode::INTERNAL, "Failed to open upload file");
+              }
+              if (!mem_buffer.empty()) {
+                if (!write_to_pipe(fd, mem_buffer.data(), mem_buffer.size())) {
+                  set_upload_error(upload_id, "Failed to spill memory buffer to disk", committed);
+                  stream->Write(out);
+                  cleanup_file();
+                  return Status(StatusCode::INTERNAL, "Failed to spill buffer");
+                }
+                mem_buffer.clear();
+              }
+              use_memory = false;
+            }
+          }
+
+          if (use_memory) {
+            mem_buffer.insert(mem_buffer.end(), data.begin(), data.end());
+          } else {
+            if (!write_to_pipe(fd, data.data(), data.size())) {
+              set_upload_error(upload_id, "Failed to write chunk to disk", committed);
+              stream->Write(out);
+              cleanup_file();
+              return Status(StatusCode::INTERNAL, "Failed to write chunk");
+            }
+          }
+          committed += static_cast<int64_t>(data.size());
+        }
+
+        // Light progress logging for large uploads
+        if (config.verbose && (committed % (256LL * kMiB) < static_cast<int64_t>(data.size()))) {
+          std::cout << "[gRPC] Upload progress upload_id=" << upload_id
+                    << " committed=" << committed << " bytes\n";
+          std::cout.flush();
+        }
+
+        out.Clear();
+        auto* chunk_ack = out.mutable_ack();
+        chunk_ack->set_upload_id(upload_id);
+        chunk_ack->set_committed_size(committed);
+        chunk_ack->set_max_message_bytes(max_message_bytes);
+        stream->Write(out);
+        continue;
+      }
+
+      if (in.has_finish()) {
+        const auto& fin = in.finish();
+        if (fin.upload_id() != upload_id) {
+          set_upload_error(upload_id, "upload_id mismatch on finish", committed);
+          stream->Write(out);
+          cleanup_file();
+          return Status(StatusCode::INVALID_ARGUMENT, "upload_id mismatch on finish");
+        }
+        break;
+      }
+
+      set_upload_error(upload_id, "Unexpected message type during upload", committed);
+      stream->Write(out);
+      cleanup_file();
+      return Status(StatusCode::INVALID_ARGUMENT, "Unexpected message type");
+    }
+
+    if (fd >= 0) { close(fd); }
+
+    if (total_size_hint > 0 && committed != total_size_hint) {
+      set_upload_error(upload_id,
+                       std::string("Upload incomplete: committed size mismatch (max_message_mb=") +
+                         std::to_string(config.max_message_mb) + ")",
+                       committed);
+      stream->Write(out);
+      cleanup_file();
+      return Status(StatusCode::OUT_OF_RANGE, "Upload incomplete: committed size mismatch");
+    }
+
+    // Enqueue job using file-backed payload or in-memory buffer
+    std::string job_id = generate_job_id();
+
+    int job_idx = -1;
+    for (size_t i = 0; i < MAX_JOBS; ++i) {
+      if (job_queue[i].ready.load()) { continue; }
+      bool expected_claimed = false;
+      if (job_queue[i].claimed.compare_exchange_strong(expected_claimed, true)) {
+        job_idx = static_cast<int>(i);
+        break;
+      }
+    }
+    if (job_idx < 0) {
+      set_upload_error(upload_id, "Job queue full", committed);
+      stream->Write(out);
+      cleanup_file();
+      return Status(StatusCode::RESOURCE_EXHAUSTED, "Job queue full");
+    }
+
+    copy_cstr(job_queue[job_idx].job_id, job_id);
+    job_queue[job_idx].problem_type = is_mip ? 1 : 0;
+    job_queue[job_idx].data_size    = static_cast<uint64_t>(committed);
+    job_queue[job_idx].cancelled.store(false);
+    job_queue[job_idx].worker_index.store(-1);
+    job_queue[job_idx].data_sent.store(false);
+    job_queue[job_idx].shm_data_name[0] = '\0';
+    if (use_memory) {
+      std::lock_guard<std::mutex> lock(pending_data_mutex);
+      pending_job_data[job_id] = std::move(mem_buffer);
+    } else {
+      std::lock_guard<std::mutex> lock(pending_files_mutex);
+      pending_job_files[job_id] = PendingJobFile{upload_path, static_cast<uint64_t>(committed)};
+    }
+
+    if (config.verbose) {
+      std::cout << "[gRPC] UploadAndSubmit stored payload upload_id=" << upload_id
+                << " job_id=" << job_id << " bytes=" << committed
+                << " storage=" << (use_memory ? "memory" : "file") << "\n";
+      std::cout.flush();
+    }
+
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      job_tracker[job_id] = JobInfo{
+        job_id, JobStatus::QUEUED, std::chrono::steady_clock::now(), {}, {}, is_mip, "", false};
+    }
+
+    job_queue[job_idx].claimed.store(false);
+    job_queue[job_idx].ready.store(true);
+
+    out.Clear();
+    out.mutable_submit()->set_job_id(job_id);
+    out.mutable_submit()->set_message("Job submitted successfully");
+    stream->Write(out);
+
+    if (config.verbose) {
+      std::cout << "[gRPC] UploadAndSubmit enqueued job: " << job_id
+                << " (type=" << (is_mip ? "MIP" : "LP") << ", bytes=" << committed << ")\n";
+      std::cout.flush();
+    }
+
+    return Status::OK;
+  }
+
+  // CheckStatus - Check job status
+  Status CheckStatus(ServerContext* context,
+                     const cuopt::remote::StatusRequest* request,
+                     cuopt::remote::StatusResponse* response) override
+  {
+    (void)context;
+    std::string job_id = request->job_id();
+
+    // Use shared-memory job queue state to expose PROCESSING when a worker claims the job.
+    // This enables reliable mid-solve cancellation tests.
+    std::string message;
+    JobStatus status = check_job_status(job_id, message);
+
+    switch (status) {
+      case JobStatus::QUEUED: response->set_job_status(cuopt::remote::QUEUED); break;
+      case JobStatus::PROCESSING: response->set_job_status(cuopt::remote::PROCESSING); break;
+      case JobStatus::COMPLETED: response->set_job_status(cuopt::remote::COMPLETED); break;
+      case JobStatus::FAILED: response->set_job_status(cuopt::remote::FAILED); break;
+      case JobStatus::CANCELLED: response->set_job_status(cuopt::remote::CANCELLED); break;
+      default: response->set_job_status(cuopt::remote::NOT_FOUND); break;
+    }
+    response->set_message(message);
+
+    const int64_t max_bytes =
+      (config.max_message_mb <= 0) ? -1 : (static_cast<int64_t>(config.max_message_mb) * kMiB);
+    response->set_max_message_bytes(max_bytes);
+
+    int64_t result_size_bytes = 0;
+    if (status == JobStatus::COMPLETED) {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      auto it = job_tracker.find(job_id);
+      if (it != job_tracker.end()) { result_size_bytes = it->second.result_data.size(); }
+    }
+    response->set_result_size_bytes(result_size_bytes);
+
+    return Status::OK;
+  }
+
+  // GetResult - Retrieve completed job result
+  Status GetResult(ServerContext* context,
+                   const cuopt::remote::GetResultRequest* request,
+                   cuopt::remote::ResultResponse* response) override
+  {
+    std::string job_id = request->job_id();
+
+    std::lock_guard<std::mutex> lock(tracker_mutex);
+    auto it = job_tracker.find(job_id);
+
+    if (it == job_tracker.end()) { return Status(StatusCode::NOT_FOUND, "Job not found"); }
+
+    if (it->second.status != JobStatus::COMPLETED && it->second.status != JobStatus::FAILED) {
+      return Status(StatusCode::UNAVAILABLE, "Result not ready");
+    }
+
+    if (it->second.status == JobStatus::FAILED) {
+      response->set_error_message(it->second.error_message);
+      return Status::OK;
+    }
+
+    // Parse result data
+    if (it->second.is_mip) {
+      cuopt::remote::MIPSolution mip_solution;
+      if (!mip_solution.ParseFromArray(it->second.result_data.data(),
+                                       it->second.result_data.size())) {
+        return Status(StatusCode::INTERNAL, "Failed to parse MIP result");
+      }
+      response->mutable_mip_solution()->CopyFrom(mip_solution);
+    } else {
+      cuopt::remote::LPSolution lp_solution;
+      if (!lp_solution.ParseFromArray(it->second.result_data.data(),
+                                      it->second.result_data.size())) {
+        return Status(StatusCode::INTERNAL, "Failed to parse LP result");
+      }
+      response->mutable_lp_solution()->CopyFrom(lp_solution);
+    }
+
+    if (config.verbose) { std::cout << "[gRPC] Result retrieved for job: " << job_id << std::endl; }
+
+    return Status::OK;
+  }
+
+  Status StreamResult(ServerContext* context,
+                      const cuopt::remote::GetResultRequest* request,
+                      ServerWriter<cuopt::remote::ResultChunk>* writer) override
+  {
+    (void)context;
+    std::string job_id = request->job_id();
+
+    std::vector<uint8_t> bytes;
+    bool is_mip = false;
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      auto it = job_tracker.find(job_id);
+      if (it == job_tracker.end()) {
+        cuopt::remote::ResultChunk chunk;
+        chunk.set_job_id(job_id);
+        chunk.set_offset(0);
+        chunk.set_done(true);
+        chunk.set_error_message("Job not found");
+        writer->Write(chunk);
+        return Status::OK;
+      }
+
+      if (it->second.status != JobStatus::COMPLETED) {
+        cuopt::remote::ResultChunk chunk;
+        chunk.set_job_id(job_id);
+        chunk.set_offset(0);
+        chunk.set_done(true);
+        chunk.set_error_message("Result not ready");
+        writer->Write(chunk);
+        return Status::OK;
+      }
+
+      bytes =
+        it->second.result_data;  // copy; acceptable for now (can optimize with shared_ptr later)
+      is_mip = it->second.is_mip;
+    }
+
+    const size_t chunk_size = kMiB;  // 1 MiB
+    size_t offset           = 0;
+    while (offset < bytes.size()) {
+      size_t n = bytes.size() - offset;
+      if (n > chunk_size) { n = chunk_size; }
+
+      cuopt::remote::ResultChunk chunk;
+      chunk.set_job_id(job_id);
+      chunk.set_offset(static_cast<int64_t>(offset));
+      chunk.set_data(reinterpret_cast<const char*>(bytes.data() + offset), n);
+      chunk.set_done(false);
+
+      if (!writer->Write(chunk)) { break; }  // client cancelled
+      offset += n;
+    }
+
+    cuopt::remote::ResultChunk done;
+    done.set_job_id(job_id);
+    done.set_offset(static_cast<int64_t>(bytes.size()));
+    done.set_done(true);
+    // encode type hint in error_message is ugly; leave empty (client can infer by trying parse or
+    // via status/is_mip call if needed). For now client will track is_mip separately.
+    done.set_error_message("");
+    writer->Write(done);
+
+    if (config.verbose) {
+      std::cout << "[gRPC] StreamResult finished job_id=" << job_id << " bytes=" << bytes.size()
+                << " is_mip=" << (is_mip ? 1 : 0) << "\n";
+      std::cout.flush();
+    }
+
+    return Status::OK;
+  }
+
+  // Other RPCs - stubs for now
+  Status DeleteResult(ServerContext* context,
+                      const cuopt::remote::DeleteRequest* request,
+                      cuopt::remote::DeleteResponse* response) override
+  {
+    std::string job_id = request->job_id();
+
+    {
+      std::lock_guard<std::mutex> lock(tracker_mutex);
+      job_tracker.erase(job_id);
+    }
+
+    delete_log_file(job_id);
+
+    response->set_message("Result deleted");
+
+    if (config.verbose) { std::cout << "[gRPC] Result deleted for job: " << job_id << std::endl; }
+
+    return Status::OK;
+  }
+
+  Status CancelJob(ServerContext* context,
+                   const cuopt::remote::CancelRequest* request,
+                   cuopt::remote::CancelResponse* response) override
+  {
+    (void)context;
+    std::string job_id = request->job_id();
+
+    JobStatus internal_status = JobStatus::NOT_FOUND;
+    std::string message;
+    int rc = cancel_job(job_id, internal_status, message);
+
+    // Map internal status -> protobuf JobStatus
+    cuopt::remote::JobStatus pb_status = cuopt::remote::NOT_FOUND;
+    switch (internal_status) {
+      case JobStatus::QUEUED: pb_status = cuopt::remote::QUEUED; break;
+      case JobStatus::PROCESSING: pb_status = cuopt::remote::PROCESSING; break;
+      case JobStatus::COMPLETED: pb_status = cuopt::remote::COMPLETED; break;
+      case JobStatus::FAILED: pb_status = cuopt::remote::FAILED; break;
+      case JobStatus::CANCELLED: pb_status = cuopt::remote::CANCELLED; break;
+      case JobStatus::NOT_FOUND: pb_status = cuopt::remote::NOT_FOUND; break;
+    }
+
+    response->set_job_status(pb_status);
+    response->set_message(message);
+
+    // Map rc -> ResponseStatus for backward-compatible response payload.
+    // (We still return gRPC Status::OK; clients should check fields.)
+    if (rc == 0 || rc == 3) {
+      response->set_status(cuopt::remote::SUCCESS);
+    } else if (rc == 1) {
+      response->set_status(cuopt::remote::ERROR_NOT_FOUND);
+    } else {
+      response->set_status(cuopt::remote::ERROR_INVALID_REQUEST);
+    }
+
+    if (config.verbose) {
+      std::cout << "[gRPC] CancelJob job_id=" << job_id << " rc=" << rc
+                << " status=" << static_cast<int>(pb_status) << " msg=" << message << "\n";
+      std::cout.flush();
+    }
+
+    return Status::OK;
+  }
+
+  Status WaitForResult(ServerContext* context,
+                       const cuopt::remote::WaitRequest* request,
+                       cuopt::remote::ResultResponse* response) override
+  {
+    (void)context;
+    const std::string job_id = request->job_id();
+
+    std::vector<uint8_t> result_data;
+    std::string error_message;
+    bool ok = wait_for_result(job_id, result_data, error_message);
+
+    if (!ok) {
+      response->set_status(cuopt::remote::ERROR_SOLVE_FAILED);
+      response->set_error_message(error_message);
+      return Status::OK;
+    }
+
+    response->set_status(cuopt::remote::SUCCESS);
+    response->set_error_message("");
+
+    // Determine LP vs MIP from job tracker
+    bool is_mip = get_job_is_mip(job_id);
+    if (is_mip) {
+      cuopt::remote::MIPSolution mip_solution;
+      if (!mip_solution.ParseFromArray(result_data.data(), result_data.size())) {
+        response->set_status(cuopt::remote::ERROR_INTERNAL);
+        response->set_error_message("Failed to parse MIP result");
+        return Status::OK;
+      }
+      response->mutable_mip_solution()->CopyFrom(mip_solution);
+    } else {
+      cuopt::remote::LPSolution lp_solution;
+      if (!lp_solution.ParseFromArray(result_data.data(), result_data.size())) {
+        response->set_status(cuopt::remote::ERROR_INTERNAL);
+        response->set_error_message("Failed to parse LP result");
+        return Status::OK;
+      }
+      response->mutable_lp_solution()->CopyFrom(lp_solution);
+    }
+
+    if (config.verbose) {
+      std::cout << "[gRPC] WaitForResult finished job_id=" << job_id
+                << " bytes=" << result_data.size() << " is_mip=" << (is_mip ? 1 : 0) << "\n";
+      std::cout.flush();
+    }
+
+    return Status::OK;
+  }
+
+  Status StreamLogs(ServerContext* context,
+                    const cuopt::remote::StreamLogsRequest* request,
+                    ServerWriter<cuopt::remote::LogMessage>* writer) override
+  {
+    const std::string job_id   = request->job_id();
+    int64_t from_byte          = request->from_byte();
+    const std::string log_path = get_log_file_path(job_id);
+
+    // Wait for the log file to appear (job might not have started yet).
+    int waited_ms = 0;
+    while (!context->IsCancelled()) {
+      struct stat st;
+      if (stat(log_path.c_str(), &st) == 0) { break; }
+      std::this_thread::sleep_for(std::chrono::milliseconds(50));
+      waited_ms += 50;
+      // Give up quickly if job doesn't exist.
+      if (waited_ms >= 2000) {
+        std::string msg;
+        JobStatus s = check_job_status(job_id, msg);
+        if (s == JobStatus::NOT_FOUND) {
+          cuopt::remote::LogMessage m;
+          m.set_line("Job not found");
+          m.set_byte_offset(from_byte);
+          m.set_job_complete(true);
+          writer->Write(m);
+          return Status::OK;
+        }
+        // else job exists but log not yet created; keep waiting
+        waited_ms = 0;
+      }
+    }
+
+    std::ifstream in(log_path, std::ios::in | std::ios::binary);
+    if (!in.is_open()) {
+      cuopt::remote::LogMessage m;
+      m.set_line("Failed to open log file");
+      m.set_byte_offset(from_byte);
+      m.set_job_complete(true);
+      writer->Write(m);
+      return Status::OK;
+    }
+
+    if (from_byte > 0) { in.seekg(from_byte, std::ios::beg); }
+
+    int64_t current_offset = from_byte;
+    std::string line;
+
+    while (!context->IsCancelled()) {
+      std::streampos before = in.tellg();
+      if (before >= 0) { current_offset = static_cast<int64_t>(before); }
+
+      if (std::getline(in, line)) {
+        // Account for the newline consumed by getline (1 byte) if present in file.
+        std::streampos after = in.tellg();
+        int64_t next_offset  = current_offset;
+        if (after >= 0) {
+          next_offset = static_cast<int64_t>(after);
+        } else {
+          // tellg can be -1 at EOF; approximate
+          next_offset = current_offset + static_cast<int64_t>(line.size());
+        }
+
+        cuopt::remote::LogMessage m;
+        m.set_line(line);
+        m.set_byte_offset(next_offset);
+        m.set_job_complete(false);
+        if (!writer->Write(m)) { break; }
+        continue;
+      }
+
+      // No new line available: clear EOF and sleep briefly
+      if (in.eof()) {
+        in.clear();
+      } else if (in.fail()) {
+        in.clear();
+      }
+
+      // If job is in terminal state and we've drained file, finish the stream.
+      std::string msg;
+      JobStatus s = check_job_status(job_id, msg);
+      if (s == JobStatus::COMPLETED || s == JobStatus::FAILED || s == JobStatus::CANCELLED) {
+        // One last attempt to read any remaining partial line
+        std::streampos before2 = in.tellg();
+        if (before2 >= 0) { current_offset = static_cast<int64_t>(before2); }
+        if (std::getline(in, line)) {
+          std::streampos after2 = in.tellg();
+          int64_t next_offset2  = current_offset + static_cast<int64_t>(line.size());
+          if (after2 >= 0) { next_offset2 = static_cast<int64_t>(after2); }
+          cuopt::remote::LogMessage m;
+          m.set_line(line);
+          m.set_byte_offset(next_offset2);
+          m.set_job_complete(false);
+          writer->Write(m);
+        }
+
+        cuopt::remote::LogMessage done;
+        done.set_line("");
+        done.set_byte_offset(current_offset);
+        done.set_job_complete(true);
+        writer->Write(done);
+        return Status::OK;
+      }
+
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+
+    return Status::OK;
+  }
+
+  Status GetIncumbents(ServerContext* context,
+                       const cuopt::remote::IncumbentRequest* request,
+                       cuopt::remote::IncumbentResponse* response) override
+  {
+    (void)context;
+    const std::string job_id = request->job_id();
+    int64_t from_index       = request->from_index();
+    int32_t max_count        = request->max_count();
+
+    if (from_index < 0) { from_index = 0; }
+
+    std::lock_guard<std::mutex> lock(tracker_mutex);
+    auto it = job_tracker.find(job_id);
+    if (it == job_tracker.end()) { return Status(StatusCode::NOT_FOUND, "Job not found"); }
+
+    const auto& incumbents = it->second.incumbents;
+    int64_t available      = static_cast<int64_t>(incumbents.size());
+    if (from_index > available) { from_index = available; }
+
+    int64_t count = available - from_index;
+    if (max_count > 0 && count > max_count) { count = max_count; }
+
+    for (int64_t i = 0; i < count; ++i) {
+      const auto& inc = incumbents[static_cast<size_t>(from_index + i)];
+      auto* out       = response->add_incumbents();
+      out->set_index(from_index + i);
+      out->set_objective(inc.objective);
+      for (double v : inc.assignment) {
+        out->add_assignment(v);
+      }
+      out->set_job_id(job_id);
+    }
+
+    response->set_next_index(available);
+    bool done =
+      (it->second.status == JobStatus::COMPLETED || it->second.status == JobStatus::FAILED ||
+       it->second.status == JobStatus::CANCELLED);
+    response->set_job_complete(done);
+    if (config.verbose) {
+      std::cout << "[gRPC] GetIncumbents job_id=" << job_id << " from=" << from_index
+                << " returned=" << response->incumbents_size() << " next=" << available
+                << " done=" << (done ? 1 : 0) << "\n";
+      std::cout.flush();
+    }
+    return Status::OK;
+  }
+};
+
+// ============================================================================
+// Main
+// ============================================================================
+
+void print_usage(const char* prog)
+{
+  std::cout
+    << "Usage: " << prog << " [options]\n"
+    << "Options:\n"
+    << "  -p, --port PORT         Listen port (default: 8765)\n"
+    << "  -w, --workers NUM       Number of worker processes (default: 1)\n"
+    << "      --use-shm           Use per-job shared memory for payload transfer (default: pipes)\n"
+    << "      --max-message-mb N  gRPC max send/recv message size in MiB (default: 256, "
+       "0=unlimited)\n"
+    << "      --tls               Enable TLS (requires --tls-cert and --tls-key)\n"
+    << "      --tls-cert PATH     Path to PEM-encoded server certificate\n"
+    << "      --tls-key PATH      Path to PEM-encoded server private key\n"
+    << "      --tls-root PATH     Path to PEM root certs for client verification\n"
+    << "      --require-client-cert  Require and verify client certs (mTLS)\n"
+    << "      --log-to-console    Enable solver log output to console (default: off)\n"
+    << "  -q, --quiet             Reduce verbosity\n"
+    << "  -h, --help              Show this help\n";
+}
+
+int main(int argc, char** argv)
+{
+  std::cerr << "[DEBUG] Starting cuopt_grpc_server main()" << std::endl;
+  std::cerr.flush();
+
+  // Parse arguments
+  for (int i = 1; i < argc; i++) {
+    std::string arg = argv[i];
+    if (arg == "-p" || arg == "--port") {
+      if (i + 1 < argc) { config.port = std::stoi(argv[++i]); }
+    } else if (arg == "-w" || arg == "--workers") {
+      if (i + 1 < argc) { config.num_workers = std::stoi(argv[++i]); }
+    } else if (arg == "--use-shm") {
+      config.use_pipes = false;
+    } else if (arg == "--max-message-mb") {
+      if (i + 1 < argc) { config.max_message_mb = std::stoi(argv[++i]); }
+    } else if (arg == "--tls") {
+      config.enable_tls = true;
+    } else if (arg == "--tls-cert") {
+      if (i + 1 < argc) { config.tls_cert_path = argv[++i]; }
+    } else if (arg == "--tls-key") {
+      if (i + 1 < argc) { config.tls_key_path = argv[++i]; }
+    } else if (arg == "--tls-root") {
+      if (i + 1 < argc) { config.tls_root_path = argv[++i]; }
+    } else if (arg == "--require-client-cert") {
+      config.require_client = true;
+    } else if (arg == "--log-to-console") {
+      config.log_to_console = true;
+    } else if (arg == "-q" || arg == "--quiet") {
+      config.verbose = false;
+    } else if (arg == "-h" || arg == "--help") {
+      print_usage(argv[0]);
+      return 0;
+    }
+  }
+
+  std::cerr << "[DEBUG] After argument parsing" << std::endl;
+  std::cerr.flush();
+
+  std::cout << "cuOpt gRPC Remote Solve Server\n"
+            << "==============================\n"
+            << "Port: " << config.port << "\n"
+            << "Workers: " << config.num_workers << "\n"
+            << std::endl;
+  std::cout.flush();
+
+  std::cerr << "[DEBUG] After printing header" << std::endl;
+  std::cerr.flush();
+
+  // Setup signal handling
+  signal(SIGINT, signal_handler);
+  signal(SIGTERM, signal_handler);
+
+  // Create log directory
+  ensure_log_dir_exists();
+  ensure_upload_dir_exists();
+
+  std::cerr << "[DEBUG] About to initialize shared memory" << std::endl;
+  std::cerr.flush();
+
+  // Avoid stale shared-memory state from prior crashed/force-killed runs.
+  // Old worker processes may still have the old segments mapped; unlinking
+  // here ensures this server creates fresh segments with clean state.
+  shm_unlink(SHM_JOB_QUEUE);
+  shm_unlink(SHM_RESULT_QUEUE);
+  shm_unlink(SHM_CONTROL);
+
+  // Initialize shared memory
+  int shm_fd = shm_open(SHM_JOB_QUEUE, O_CREAT | O_RDWR, 0600);
+  if (shm_fd < 0) {
+    std::cerr << "[Server] Failed to create shared memory for job queue: " << strerror(errno)
+              << "\n";
+    return 1;
+  }
+  std::cerr << "[DEBUG] shm_open succeeded for job queue" << std::endl;
+  std::cerr.flush();
+
+  if (ftruncate(shm_fd, sizeof(JobQueueEntry) * MAX_JOBS) < 0) {
+    std::cerr << "[Server] Failed to ftruncate job queue: " << strerror(errno) << "\n";
+    close(shm_fd);
+    return 1;
+  }
+  job_queue = static_cast<JobQueueEntry*>(
+    mmap(nullptr, sizeof(JobQueueEntry) * MAX_JOBS, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0));
+  close(shm_fd);
+
+  if (job_queue == MAP_FAILED) {
+    std::cerr << "[Server] Failed to mmap job queue: " << strerror(errno) << "\n";
+    return 1;
+  }
+  std::cerr << "[DEBUG] job_queue mmap succeeded" << std::endl;
+  std::cerr.flush();
+
+  int result_shm_fd = shm_open(SHM_RESULT_QUEUE, O_CREAT | O_RDWR, 0600);
+  if (result_shm_fd < 0) {
+    std::cerr << "[Server] Failed to create result queue shm: " << strerror(errno) << "\n";
+    return 1;
+  }
+  if (ftruncate(result_shm_fd, sizeof(ResultQueueEntry) * MAX_RESULTS) < 0) {
+    std::cerr << "[Server] Failed to ftruncate result queue: " << strerror(errno) << "\n";
+    close(result_shm_fd);
+    return 1;
+  }
+  result_queue = static_cast<ResultQueueEntry*>(mmap(nullptr,
+                                                     sizeof(ResultQueueEntry) * MAX_RESULTS,
+                                                     PROT_READ | PROT_WRITE,
+                                                     MAP_SHARED,
+                                                     result_shm_fd,
+                                                     0));
+  close(result_shm_fd);
+  if (result_queue == MAP_FAILED) {
+    std::cerr << "[Server] Failed to mmap result queue: " << strerror(errno) << "\n";
+    return 1;
+  }
+  std::cerr << "[DEBUG] result_queue mmap succeeded" << std::endl;
+  std::cerr.flush();
+
+  int ctrl_shm_fd = shm_open(SHM_CONTROL, O_CREAT | O_RDWR, 0600);
+  if (ctrl_shm_fd < 0) {
+    std::cerr << "[Server] Failed to create control shm: " << strerror(errno) << "\n";
+    return 1;
+  }
+  if (ftruncate(ctrl_shm_fd, sizeof(SharedMemoryControl)) < 0) {
+    std::cerr << "[Server] Failed to ftruncate control: " << strerror(errno) << "\n";
+    close(ctrl_shm_fd);
+    return 1;
+  }
+  shm_ctrl = static_cast<SharedMemoryControl*>(
+    mmap(nullptr, sizeof(SharedMemoryControl), PROT_READ | PROT_WRITE, MAP_SHARED, ctrl_shm_fd, 0));
+  close(ctrl_shm_fd);
+  if (shm_ctrl == MAP_FAILED) {
+    std::cerr << "[Server] Failed to mmap control: " << strerror(errno) << "\n";
+    return 1;
+  }
+  std::cerr << "[DEBUG] shm_ctrl mmap succeeded" << std::endl;
+  std::cerr.flush();
+
+  // Initialize shared memory
+  for (size_t i = 0; i < MAX_JOBS; ++i) {
+    memset(&job_queue[i], 0, sizeof(JobQueueEntry));
+    job_queue[i].ready.store(false);
+    job_queue[i].claimed.store(false);
+    job_queue[i].cancelled.store(false);
+    job_queue[i].worker_index.store(-1);
+  }
+
+  for (size_t i = 0; i < MAX_RESULTS; ++i) {
+    memset(&result_queue[i], 0, sizeof(ResultQueueEntry));
+    result_queue[i].ready.store(false);
+    result_queue[i].retrieved.store(false);
+  }
+
+  shm_ctrl->shutdown_requested.store(false);
+  shm_ctrl->active_workers.store(0);
+
+  std::cerr << "[DEBUG] About to spawn workers" << std::endl;
+  std::cerr.flush();
+
+  // Spawn worker processes
+  spawn_workers();
+
+  std::cerr << "[DEBUG] spawn_workers() returned, " << worker_pids.size() << " workers spawned"
+            << std::endl;
+  std::cerr.flush();
+
+  // Start result retrieval thread
+  std::thread result_thread(result_retrieval_thread);
+
+  // Start incumbent retrieval thread
+  std::thread incumbent_thread(incumbent_retrieval_thread);
+
+  // Start worker monitor thread
+  std::thread monitor_thread(worker_monitor_thread);
+
+  // Start gRPC server
+  std::string server_address = "0.0.0.0:" + std::to_string(config.port);
+  CuOptRemoteServiceImpl service;
+
+  ServerBuilder builder;
+  std::shared_ptr<grpc::ServerCredentials> creds;
+  if (config.enable_tls) {
+    if (config.tls_cert_path.empty() || config.tls_key_path.empty()) {
+      std::cerr << "[Server] TLS enabled but --tls-cert/--tls-key not provided\n";
+      return 1;
+    }
+    grpc::SslServerCredentialsOptions ssl_opts;
+    grpc::SslServerCredentialsOptions::PemKeyCertPair key_cert;
+    key_cert.cert_chain  = read_file_to_string(config.tls_cert_path);
+    key_cert.private_key = read_file_to_string(config.tls_key_path);
+    if (key_cert.cert_chain.empty() || key_cert.private_key.empty()) {
+      std::cerr << "[Server] Failed to read TLS cert/key files\n";
+      return 1;
+    }
+    ssl_opts.pem_key_cert_pairs.push_back(key_cert);
+
+    if (!config.tls_root_path.empty()) {
+      ssl_opts.pem_root_certs = read_file_to_string(config.tls_root_path);
+      if (ssl_opts.pem_root_certs.empty()) {
+        std::cerr << "[Server] Failed to read TLS root cert file\n";
+        return 1;
+      }
+    }
+
+    if (config.require_client) {
+      if (ssl_opts.pem_root_certs.empty()) {
+        std::cerr << "[Server] --require-client-cert requires --tls-root\n";
+        return 1;
+      }
+      ssl_opts.client_certificate_request =
+        GRPC_SSL_REQUEST_AND_REQUIRE_CLIENT_CERTIFICATE_AND_VERIFY;
+    } else if (!ssl_opts.pem_root_certs.empty()) {
+      ssl_opts.client_certificate_request = GRPC_SSL_REQUEST_CLIENT_CERTIFICATE_AND_VERIFY;
+    }
+
+    creds = grpc::SslServerCredentials(ssl_opts);
+  } else {
+    creds = grpc::InsecureServerCredentials();
+  }
+
+  builder.AddListeningPort(server_address, creds);
+  builder.RegisterService(&service);
+  // Allow large LP/MIP payloads (e.g. large MPS problems).
+  // Note: gRPC uses -1 to mean unlimited.
+  const int64_t max_bytes =
+    (config.max_message_mb <= 0) ? -1 : (static_cast<int64_t>(config.max_message_mb) * kMiB);
+  const int channel_limit =
+    (max_bytes <= 0)
+      ? -1
+      : static_cast<int>(std::min<int64_t>(max_bytes, std::numeric_limits<int>::max()));
+  builder.SetMaxReceiveMessageSize(channel_limit);
+  builder.SetMaxSendMessageSize(channel_limit);
+
+  std::unique_ptr<Server> server(builder.BuildAndStart());
+  std::cout << "[gRPC Server] Listening on " << server_address << std::endl;
+  std::cout << "[gRPC Server] Workers: " << config.num_workers << std::endl;
+  std::cout << "[gRPC Server] Max message MiB: " << config.max_message_mb << std::endl;
+  std::cout << "[gRPC Server] Press Ctrl+C to shutdown" << std::endl;
+
+  // Wait for shutdown signal. We can't rely on signal handler to break server->Wait(),
+  // so use a helper thread to call Shutdown() when keep_running flips false.
+  std::thread shutdown_thread([&server]() {
+    while (keep_running.load()) {
+      std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    }
+    if (server) { server->Shutdown(); }
+  });
+
+  server->Wait();
+  if (shutdown_thread.joinable()) shutdown_thread.join();
+
+  // Cleanup
+  std::cout << "\n[Server] Shutting down..." << std::endl;
+  keep_running                 = false;
+  shm_ctrl->shutdown_requested = true;
+  result_cv.notify_all();
+
+  if (result_thread.joinable()) result_thread.join();
+  if (incumbent_thread.joinable()) incumbent_thread.join();
+  if (monitor_thread.joinable()) monitor_thread.join();
+
+  wait_for_workers();
+  cleanup_shared_memory();
+
+  std::cout << "[Server] Shutdown complete" << std::endl;
+  return 0;
+}
+
+#else  // !CUOPT_ENABLE_GRPC
+
+#include <iostream>
+
+int main()
+{
+  std::cerr << "Error: cuopt_grpc_server requires gRPC support.\n"
+            << "Rebuild with gRPC enabled (CUOPT_ENABLE_GRPC=ON)" << std::endl;
+  return 1;
+}
+
+#endif  // CUOPT_ENABLE_GRPC
diff --git a/cpp/include/cuopt/linear_programming/data_model_view.hpp b/cpp/include/cuopt/linear_programming/data_model_view.hpp
new file mode 100644
index 000000000..296097d8b
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/data_model_view.hpp
@@ -0,0 +1,58 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+/**
+ * @file data_model_view.hpp
+ * @brief Provides data_model_view_t in the cuopt::linear_programming namespace.
+ *
+ * This header provides access to the data_model_view_t class, a non-owning view
+ * over LP/MIP problem data. The view uses span<T> to hold pointers that can
+ * reference either host or device memory, making it suitable for both local
+ * GPU-based solves and remote CPU-based solves.
+ *
+ * The canonical implementation lives in cuopt::mps_parser for historical reasons
+ * and to maintain mps_parser as a standalone library. This header provides
+ * convenient aliases in the cuopt::linear_programming namespace.
+ */
+
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/utilities/span.hpp>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Non-owning span type that can point to either host or device memory.
+ *
+ * This is an alias to the span type defined in mps_parser. The span holds
+ * a pointer and size, but does not own the underlying memory.
+ *
+ * @tparam T Element type
+ */
+template <typename T>
+using span = cuopt::mps_parser::span<T>;
+
+/**
+ * @brief Non-owning view of LP/MIP problem data.
+ *
+ * This is an alias to the data_model_view_t defined in mps_parser.
+ * The view stores problem data (constraint matrix, bounds, objective, etc.)
+ * as span<T> members, which can point to either host or device memory.
+ *
+ * Key features for remote solve support:
+ * - Non-owning: does not allocate or free memory
+ * - Memory-agnostic: spans can point to host OR device memory
+ * - Serializable: host data can be directly serialized for remote solve
+ *
+ * @tparam i_t Integer type for indices (typically int)
+ * @tparam f_t Floating point type for values (typically float or double)
+ */
+template <typename i_t, typename f_t>
+using data_model_view_t = cuopt::mps_parser::data_model_view_t<i_t, f_t>;
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
index 6ff8d324b..4d896cf7d 100644
--- a/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/mip/solver_solution.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -18,6 +18,7 @@
 #include <raft/core/handle.hpp>
 
 #include <fstream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -51,10 +52,38 @@ class mip_solution_t : public base_solution_t {
                  rmm::cuda_stream_view stream_view);
   mip_solution_t(const cuopt::logic_error& error_status, rmm::cuda_stream_view stream_view);
 
+  // CPU-only constructors for remote solve
+  mip_solution_t(std::vector<f_t> solution,
+                 std::vector<std::string> var_names,
+                 f_t objective,
+                 f_t mip_gap,
+                 mip_termination_status_t termination_status,
+                 f_t max_constraint_violation,
+                 f_t max_int_violation,
+                 f_t max_variable_bound_violation,
+                 solver_stats_t<i_t, f_t> stats);
+
+  mip_solution_t(mip_termination_status_t termination_status, solver_stats_t<i_t, f_t> stats);
+  mip_solution_t(const cuopt::logic_error& error_status);
+
   bool is_mip() const override { return true; }
+
+  /**
+   * @brief Check if solution data is stored in device (GPU) memory
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const;
+
   const rmm::device_uvector<f_t>& get_solution() const;
   rmm::device_uvector<f_t>& get_solution();
 
+  /**
+   * @brief Returns the solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   */
+  std::vector<f_t>& get_solution_host();
+  const std::vector<f_t>& get_solution_host() const;
+
   f_t get_objective_value() const;
   f_t get_mip_gap() const;
   f_t get_solution_bound() const;
@@ -75,8 +104,105 @@ class mip_solution_t : public base_solution_t {
   void write_to_sol_file(std::string_view filename, rmm::cuda_stream_view stream_view) const;
   void log_summary() const;
 
+  //============================================================================
+  // Setters for remote solve deserialization
+  //============================================================================
+
+  /**
+   * @brief Set the solution in host memory
+   * @param solution The solution vector
+   */
+  void set_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the objective value
+   */
+  void set_objective(f_t value);
+
+  /**
+   * @brief Set the MIP gap
+   */
+  void set_mip_gap(f_t value);
+
+  /**
+   * @brief Set the solution bound
+   */
+  void set_solution_bound(f_t value);
+
+  /**
+   * @brief Set total solve time
+   */
+  void set_total_solve_time(double value);
+
+  /**
+   * @brief Set presolve time
+   */
+  void set_presolve_time(double value);
+
+  /**
+   * @brief Set max constraint violation
+   */
+  void set_max_constraint_violation(f_t value);
+
+  /**
+   * @brief Set max integer violation
+   */
+  void set_max_int_violation(f_t value);
+
+  /**
+   * @brief Set max variable bound violation
+   */
+  void set_max_variable_bound_violation(f_t value);
+
+  /**
+   * @brief Set number of nodes
+   */
+  void set_nodes(i_t value);
+
+  /**
+   * @brief Set number of simplex iterations
+   */
+  void set_simplex_iterations(i_t value);
+
+  /**
+   * @brief Get error string
+   */
+  std::string get_error_string() const;
+
+  /**
+   * @brief Get number of nodes
+   */
+  i_t get_nodes() const;
+
+  /**
+   * @brief Get number of simplex iterations
+   */
+  i_t get_simplex_iterations() const;
+
+  /**
+   * @brief Copy solution data from GPU to CPU memory.
+   *
+   * After calling this method, is_device_memory() will return false and
+   * the solution can be accessed via get_solution_host().
+   * This is useful for remote solve scenarios where serialization requires
+   * CPU-accessible data.
+   *
+   * If the solution is already in CPU memory, this is a no-op.
+   *
+   * @param stream_view The CUDA stream to use for the copy
+   */
+  void to_host(rmm::cuda_stream_view stream_view);
+
  private:
-  rmm::device_uvector<f_t> solution_;
+  // GPU (device) storage - populated for local GPU solves
+  std::unique_ptr<rmm::device_uvector<f_t>> solution_;
+
+  // CPU (host) storage - populated for remote solves
+  std::unique_ptr<std::vector<f_t>> solution_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   std::vector<std::string> var_names_;
   f_t objective_;
   f_t mip_gap_;
diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
index c5fe96ef1..9d7e34831 100644
--- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
+++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -18,6 +18,7 @@
 #include <raft/core/handle.hpp>
 
 #include <fstream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -168,6 +169,42 @@ class optimization_problem_solution_t : public base_solution_t {
                                   const raft::handle_t* handler_ptr,
                                   bool deep_copy);
 
+  /**
+   * @brief Construct an optimization problem solution with CPU (host) memory storage.
+   * Used for remote solve scenarios where no GPU is available.
+   *
+   * @param[in] primal_solution The primal solution in host memory
+   * @param[in] dual_solution The dual solution in host memory
+   * @param[in] reduced_cost The reduced cost in host memory
+   * @param[in] objective_name The objective name
+   * @param[in] var_names The variables names
+   * @param[in] row_names The rows name
+   * @param[in] termination_stats The termination statistics
+   * @param[in] termination_status The termination reason
+   */
+  optimization_problem_solution_t(std::vector<f_t> primal_solution,
+                                  std::vector<f_t> dual_solution,
+                                  std::vector<f_t> reduced_cost,
+                                  const std::string objective_name,
+                                  const std::vector<std::string>& var_names,
+                                  const std::vector<std::string>& row_names,
+                                  additional_termination_information_t& termination_stats,
+                                  pdlp_termination_status_t termination_status);
+
+  /**
+   * @brief Construct an empty solution for CPU-only scenarios (e.g., remote solve error)
+   *
+   * @param[in] termination_status Reason for termination
+   */
+  optimization_problem_solution_t(pdlp_termination_status_t termination_status);
+
+  /**
+   * @brief Construct an error solution for CPU-only scenarios
+   *
+   * @param[in] error_status The error object
+   */
+  optimization_problem_solution_t(cuopt::logic_error error_status);
+
   /**
    * @brief Set the solve time in seconds
    *
@@ -234,6 +271,40 @@ class optimization_problem_solution_t : public base_solution_t {
    */
   rmm::device_uvector<f_t>& get_reduced_cost();
 
+  /**
+   * @brief Check if solution data is stored in device (GPU) memory
+   *
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const;
+
+  /**
+   * @brief Returns the primal solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the primal solution.
+   */
+  std::vector<f_t>& get_primal_solution_host();
+  const std::vector<f_t>& get_primal_solution_host() const;
+
+  /**
+   * @brief Returns the dual solution in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the dual solution.
+   */
+  std::vector<f_t>& get_dual_solution_host();
+  const std::vector<f_t>& get_dual_solution_host() const;
+
+  /**
+   * @brief Returns the reduced cost in host (CPU) memory.
+   * Only valid when is_device_memory() returns false.
+   *
+   * @return std::vector<f_t>& The host memory container for the reduced cost.
+   */
+  std::vector<f_t>& get_reduced_cost_host();
+  const std::vector<f_t>& get_reduced_cost_host() const;
+
   /**
    * @brief Get termination reason
    * @return Termination reason
@@ -255,6 +326,128 @@ class optimization_problem_solution_t : public base_solution_t {
 
   pdlp_warm_start_data_t<i_t, f_t>& get_pdlp_warm_start_data();
 
+  //============================================================================
+  // Setters for host solution data (used by remote solve deserialization)
+  //============================================================================
+
+  /**
+   * @brief Set the primal solution in host memory
+   * @param solution The primal solution vector
+   */
+  void set_primal_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the dual solution in host memory
+   * @param solution The dual solution vector
+   */
+  void set_dual_solution_host(std::vector<f_t> solution);
+
+  /**
+   * @brief Set the reduced cost in host memory
+   * @param reduced_cost The reduced cost vector
+   */
+  void set_reduced_cost_host(std::vector<f_t> reduced_cost);
+
+  /**
+   * @brief Set the termination statistics
+   * @param stats The termination statistics
+   */
+  void set_termination_stats(const additional_termination_information_t& stats);
+
+  //============================================================================
+  // Getters for termination statistics
+  //============================================================================
+
+  /**
+   * @brief Get the L2 primal residual
+   * @return L2 primal residual
+   */
+  f_t get_l2_primal_residual() const;
+
+  /**
+   * @brief Get the L2 dual residual
+   * @return L2 dual residual
+   */
+  f_t get_l2_dual_residual() const;
+
+  /**
+   * @brief Get the primal objective value
+   * @return Primal objective
+   */
+  f_t get_primal_objective() const;
+
+  /**
+   * @brief Get the dual objective value
+   * @return Dual objective
+   */
+  f_t get_dual_objective() const;
+
+  /**
+   * @brief Get the duality gap
+   * @return Gap
+   */
+  f_t get_gap() const;
+
+  /**
+   * @brief Get number of iterations
+   * @return Number of iterations
+   */
+  i_t get_nb_iterations() const;
+
+  /**
+   * @brief Check if solved by PDLP
+   * @return true if solved by PDLP
+   */
+  bool get_solved_by_pdlp() const;
+
+  /**
+   * @brief Set L2 primal residual
+   * @param value The value
+   */
+  void set_l2_primal_residual(f_t value);
+
+  /**
+   * @brief Set L2 dual residual
+   * @param value The value
+   */
+  void set_l2_dual_residual(f_t value);
+
+  /**
+   * @brief Set primal objective
+   * @param value The value
+   */
+  void set_primal_objective(f_t value);
+
+  /**
+   * @brief Set dual objective
+   * @param value The value
+   */
+  void set_dual_objective(f_t value);
+
+  /**
+   * @brief Set gap
+   * @param value The value
+   */
+  void set_gap(f_t value);
+
+  /**
+   * @brief Set number of iterations
+   * @param value The value
+   */
+  void set_nb_iterations(i_t value);
+
+  /**
+   * @brief Set solved by PDLP flag
+   * @param value The value
+   */
+  void set_solved_by_pdlp(bool value);
+
+  /**
+   * @brief Get error string
+   * @return Error message string
+   */
+  std::string get_error_string() const;
+
   /**
    * @brief Writes the solver_solution object as a JSON object to the 'filename' file using
    * 'stream_view' to transfer the data from device to host before it is written to the file.
@@ -282,12 +475,39 @@ class optimization_problem_solution_t : public base_solution_t {
   void copy_from(const raft::handle_t* handle_ptr,
                  const optimization_problem_solution_t<i_t, f_t>& other);
 
+  /**
+   * @brief Copy solution data from GPU to CPU memory.
+   *
+   * After calling this method, is_device_memory() will return false and
+   * the solution can be accessed via get_primal_solution_host(), etc.
+   * This is useful for remote solve scenarios where serialization requires
+   * CPU-accessible data.
+   *
+   * If the solution is already in CPU memory, this is a no-op.
+   *
+   * @param stream_view The CUDA stream to use for the copy
+   */
+  void to_host(rmm::cuda_stream_view stream_view);
+
  private:
   void write_additional_termination_statistics_to_file(std::ofstream& myfile);
 
-  rmm::device_uvector<f_t> primal_solution_;
-  rmm::device_uvector<f_t> dual_solution_;
-  rmm::device_uvector<f_t> reduced_cost_;
+  // GPU (device) storage - populated for local GPU solves
+  std::unique_ptr<rmm::device_uvector<f_t>> primal_solution_;
+  std::unique_ptr<rmm::device_uvector<f_t>> dual_solution_;
+  std::unique_ptr<rmm::device_uvector<f_t>> reduced_cost_;
+
+  // CPU (host) storage - populated for remote solves
+  std::unique_ptr<std::vector<f_t>> primal_solution_host_;
+  std::unique_ptr<std::vector<f_t>> dual_solution_host_;
+  std::unique_ptr<std::vector<f_t>> reduced_cost_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
+  // Flag indicating if solved by PDLP (vs dual simplex)
+  bool solved_by_pdlp_ = true;
+
   pdlp_warm_start_data_t<i_t, f_t> pdlp_warm_start_data_;
 
   pdlp_termination_status_t termination_status_;
diff --git a/cpp/include/cuopt/linear_programming/solve.hpp b/cpp/include/cuopt/linear_programming/solve.hpp
index 364fee30a..11303309a 100644
--- a/cpp/include/cuopt/linear_programming/solve.hpp
+++ b/cpp/include/cuopt/linear_programming/solve.hpp
@@ -1,12 +1,13 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #pragma once
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/mip/solver_settings.hpp>
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
@@ -14,6 +15,7 @@
 #include <cuopt/linear_programming/pdlp/solver_solution.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/internals.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mps_parser/mps_data_model.hpp>
 #include <string>
 #include <vector>
@@ -107,4 +109,74 @@ optimization_problem_t<i_t, f_t> mps_data_model_to_optimization_problem(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& data_model);
 
+/**
+ * @brief Convert a data_model_view_t to an optimization_problem_t.
+ *
+ * This function copies data from the view (which points to GPU memory)
+ * into an owning optimization_problem_t.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> object with spans pointing to GPU memory
+ * @return optimization_problem_t<i_t, f_t> owning container for the problem
+ */
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> data_model_view_to_optimization_problem(
+  raft::handle_t const* handle_ptr, const data_model_view_t<i_t, f_t>& view);
+
+/**
+ * @brief Linear programming solve function using data_model_view_t.
+ *
+ * This overload accepts a non-owning data_model_view_t which can point to either
+ * GPU memory (for local solves) or CPU memory (for remote solves).
+ * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST
+ * and CUOPT_REMOTE_PORT environment variables.
+ *
+ * @note Both primal and dual solutions are zero-initialized.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> with spans pointing to problem data
+ * @param[in] settings  A pdlp_solver_settings_t<i_t, f_t> object with the settings for the PDLP
+ * solver.
+ * @param[in] problem_checking  If true, the problem is checked for consistency.
+ * @param[in] use_pdlp_solver_mode  If true, the PDLP hyperparameters coming from the
+ * pdlp_solver_mode are used.
+ * @return optimization_problem_solution_t<i_t, f_t> owning container for the solver solution
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp(
+  raft::handle_t const* handle_ptr,
+  const data_model_view_t<i_t, f_t>& view,
+  pdlp_solver_settings_t<i_t, f_t> const& settings = pdlp_solver_settings_t<i_t, f_t>{},
+  bool problem_checking                            = true,
+  bool use_pdlp_solver_mode                        = true);
+
+/**
+ * @brief Mixed integer programming solve function using data_model_view_t.
+ *
+ * This overload accepts a non-owning data_model_view_t which can point to either
+ * GPU memory (for local solves) or CPU memory (for remote solves).
+ * The solve path is automatically determined by checking the CUOPT_REMOTE_HOST
+ * and CUOPT_REMOTE_PORT environment variables.
+ *
+ * @tparam i_t Data type of indexes
+ * @tparam f_t Data type of the variables and their weights in the equations
+ *
+ * @param[in] handle_ptr  A raft::handle_t object with its corresponding CUDA stream.
+ * @param[in] view  A data_model_view_t<i_t, f_t> with spans pointing to problem data
+ * @param[in] settings  A mip_solver_settings_t<i_t, f_t> object with the settings for the MIP
+ * solver.
+ * @return mip_solution_t<i_t, f_t> owning container for the solver solution
+ */
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip(
+  raft::handle_t const* handle_ptr,
+  const data_model_view_t<i_t, f_t>& view,
+  mip_solver_settings_t<i_t, f_t> const& settings = mip_solver_settings_t<i_t, f_t>{});
+
 }  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
index f0cd74c24..d73bb34bc 100644
--- a/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -40,8 +40,10 @@ class default_get_solution_callback_t : public get_solution_callback_t {
 
   void get_solution(void* data, void* objective_value) override
   {
-    PyObject* numba_matrix = get_numba_matrix(data, n_variables);
-    PyObject* numpy_array  = get_numba_matrix(objective_value, 1);
+    PyObject* numba_matrix =
+      data_on_device() ? get_numba_matrix(data, n_variables) : get_numpy_array(data, n_variables);
+    PyObject* numpy_array =
+      data_on_device() ? get_numba_matrix(objective_value, 1) : get_numpy_array(objective_value, 1);
     PyObject* res =
       PyObject_CallMethod(this->pyCallbackClass, "get_solution", "(OO)", numba_matrix, numpy_array);
     Py_DECREF(numba_matrix);
@@ -77,8 +79,10 @@ class default_set_solution_callback_t : public set_solution_callback_t {
 
   void set_solution(void* data, void* objective_value) override
   {
-    PyObject* numba_matrix = get_numba_matrix(data, n_variables);
-    PyObject* numpy_array  = get_numba_matrix(objective_value, 1);
+    PyObject* numba_matrix =
+      data_on_device() ? get_numba_matrix(data, n_variables) : get_numpy_array(data, n_variables);
+    PyObject* numpy_array =
+      data_on_device() ? get_numba_matrix(objective_value, 1) : get_numpy_array(objective_value, 1);
     PyObject* res =
       PyObject_CallMethod(this->pyCallbackClass, "set_solution", "(OO)", numba_matrix, numpy_array);
     Py_DECREF(numba_matrix);
diff --git a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
index e1a75747d..abe49a2be 100644
--- a/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/cython_solve.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -25,9 +25,19 @@ namespace cython {
 // aggregate for call_solve() return type
 // to be exposed to cython:
 struct linear_programming_ret_t {
+  // GPU (device) storage - populated for local GPU solves
   std::unique_ptr<rmm::device_buffer> primal_solution_;
   std::unique_ptr<rmm::device_buffer> dual_solution_;
   std::unique_ptr<rmm::device_buffer> reduced_cost_;
+
+  // CPU (host) storage - populated for remote solves
+  std::vector<double> primal_solution_host_;
+  std::vector<double> dual_solution_host_;
+  std::vector<double> reduced_cost_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   /* -- PDLP Warm Start Data -- */
   std::unique_ptr<rmm::device_buffer> current_primal_solution_;
   std::unique_ptr<rmm::device_buffer> current_dual_solution_;
@@ -64,8 +74,15 @@ struct linear_programming_ret_t {
 };
 
 struct mip_ret_t {
+  // GPU (device) storage - populated for local GPU solves
   std::unique_ptr<rmm::device_buffer> solution_;
 
+  // CPU (host) storage - populated for remote solves
+  std::vector<double> solution_host_;
+
+  // Flag indicating where solution data is stored
+  bool is_device_memory_ = true;
+
   linear_programming::mip_termination_status_t termination_status_;
   error_type_t error_status_;
   std::string error_message_;
diff --git a/cpp/include/cuopt/linear_programming/utilities/internals.hpp b/cpp/include/cuopt/linear_programming/utilities/internals.hpp
index 90d856b23..ef8e0bea8 100644
--- a/cpp/include/cuopt/linear_programming/utilities/internals.hpp
+++ b/cpp/include/cuopt/linear_programming/utilities/internals.hpp
@@ -21,6 +21,7 @@ class Callback {
 };
 
 enum class base_solution_callback_type { GET_SOLUTION, SET_SOLUTION };
+enum class callback_memory_location { DEVICE, HOST };
 
 class base_solution_callback_t : public Callback {
  public:
@@ -31,11 +32,18 @@ class base_solution_callback_t : public Callback {
     this->n_variables = n_variables_;
   }
 
+  void set_memory_location(callback_memory_location location) { memory_location = location; }
+
+  callback_memory_location get_memory_location() const { return memory_location; }
+
+  bool data_on_device() const { return memory_location == callback_memory_location::DEVICE; }
+
   virtual base_solution_callback_type get_type() const = 0;
 
  protected:
-  bool isFloat       = true;
-  size_t n_variables = 0;
+  bool isFloat                             = true;
+  size_t n_variables                       = 0;
+  callback_memory_location memory_location = callback_memory_location::DEVICE;
 };
 
 class get_solution_callback_t : public base_solution_callback_t {
diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp
new file mode 100644
index 000000000..c25a3a878
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp
@@ -0,0 +1,191 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/pdlp/solver_settings.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <mps_parser/data_model_view.hpp>
+#include <mps_parser/mps_data_model.hpp>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Abstract interface for serializing/deserializing cuOpt problems and solutions.
+ *
+ * This interface allows users to provide custom serialization implementations
+ * for different wire formats (protobuf, JSON, msgpack, custom binary, etc.).
+ *
+ * The default implementation uses Protocol Buffers and is built into libcuopt.
+ * Users can provide their own implementation by:
+ * 1. Implementing this interface
+ * 2. Compiling to a shared library
+ * 3. Setting CUOPT_SERIALIZER_LIB environment variable to the library path
+ *
+ * @tparam i_t Index type (int32_t or int64_t)
+ * @tparam f_t Float type (float or double)
+ */
+template <typename i_t, typename f_t>
+class remote_serializer_t {
+ public:
+  virtual ~remote_serializer_t() = default;
+
+  //============================================================================
+  // Problem Serialization
+  //============================================================================
+
+  /**
+   * @brief Serialize an LP problem with settings to a byte buffer.
+   *
+   * @param view The problem data view (can point to CPU or GPU memory)
+   * @param settings Solver settings
+   * @return Serialized byte buffer ready for network transmission
+   */
+  virtual std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Serialize a MIP problem with settings to a byte buffer.
+   *
+   * @param view The problem data view (can point to CPU or GPU memory)
+   * @param settings Solver settings
+   * @return Serialized byte buffer ready for network transmission
+   */
+  virtual std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  //============================================================================
+  // Solution Deserialization
+  //============================================================================
+
+  /**
+   * @brief Deserialize an LP solution from a byte buffer.
+   *
+   * @param data The serialized solution bytes received from the server
+   * @return The deserialized LP solution object
+   */
+  virtual optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize a MIP solution from a byte buffer.
+   *
+   * @param data The serialized solution bytes received from the server
+   * @return The deserialized MIP solution object
+   */
+  virtual mip_solution_t<i_t, f_t> deserialize_mip_solution(const std::vector<uint8_t>& data) = 0;
+
+  //============================================================================
+  // Server-side: Request Deserialization & Response Serialization
+  //============================================================================
+
+  /**
+   * @brief Check if serialized data is an LP or MIP request.
+   *
+   * @param data The serialized request bytes
+   * @return true if MIP request, false if LP request
+   */
+  virtual bool is_mip_request(const std::vector<uint8_t>& data) = 0;
+
+  /**
+   * @brief Deserialize an LP request (problem + settings) from bytes.
+   *
+   * This is used by the server to receive problems from clients.
+   *
+   * @param data The serialized request bytes
+   * @param[out] view_data CPU storage that will be populated with problem data
+   * @param[out] settings Settings will be populated here
+   * @return true on success, false on parse error
+   */
+  virtual bool deserialize_lp_request(const std::vector<uint8_t>& data,
+                                      mps_parser::mps_data_model_t<i_t, f_t>& view_data,
+                                      pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Deserialize a MIP request (problem + settings) from bytes.
+   *
+   * @param data The serialized request bytes
+   * @param[out] view_data CPU storage that will be populated with problem data
+   * @param[out] settings Settings will be populated here
+   * @return true on success, false on parse error
+   */
+  virtual bool deserialize_mip_request(const std::vector<uint8_t>& data,
+                                       mps_parser::mps_data_model_t<i_t, f_t>& view_data,
+                                       mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  /**
+   * @brief Serialize an LP solution to bytes for sending to client.
+   *
+   * @param solution The LP solution to serialize
+   * @return Serialized byte buffer
+   */
+  virtual std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) = 0;
+
+  /**
+   * @brief Serialize a MIP solution to bytes for sending to client.
+   *
+   * @param solution The MIP solution to serialize
+   * @return Serialized byte buffer
+   */
+  virtual std::vector<uint8_t> serialize_mip_solution(const mip_solution_t<i_t, f_t>& solution) = 0;
+
+  //============================================================================
+  // Metadata
+  //============================================================================
+
+  /**
+   * @brief Get the serialization format name (for logging/debugging).
+   *
+   * @return Format name string (e.g., "protobuf", "json", "msgpack")
+   */
+  virtual std::string format_name() const = 0;
+
+  /**
+   * @brief Get version of the serialization protocol.
+   *
+   * @return Protocol version number
+   */
+  virtual uint32_t protocol_version() const = 0;
+};
+
+/**
+ * @brief Factory function type for creating serializer instances.
+ *
+ * Custom serializer libraries must export a function with this signature
+ * named "create_cuopt_serializer".
+ */
+template <typename i_t, typename f_t>
+using serializer_factory_t = std::unique_ptr<remote_serializer_t<i_t, f_t>> (*)();
+
+/**
+ * @brief Get the default (protobuf) serializer instance.
+ *
+ * @return Shared pointer to the default serializer
+ */
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_default_serializer();
+
+/**
+ * @brief Get the currently configured serializer.
+ *
+ * Returns the custom serializer if CUOPT_SERIALIZER_LIB is set,
+ * otherwise returns the default protobuf serializer.
+ *
+ * @return Shared pointer to the serializer
+ */
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_serializer();
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
new file mode 100644
index 000000000..898a1cba5
--- /dev/null
+++ b/cpp/include/cuopt/linear_programming/utilities/remote_solve.hpp
@@ -0,0 +1,130 @@
+/* clang-format off */
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+/* clang-format on */
+
+#pragma once
+
+#include <cuopt/linear_programming/mip/solver_settings.hpp>
+#include <cuopt/linear_programming/mip/solver_solution.hpp>
+#include <cuopt/linear_programming/pdlp/solver_settings.hpp>
+#include <cuopt/linear_programming/pdlp/solver_solution.hpp>
+#include <mps_parser/data_model_view.hpp>
+
+#include <cstdlib>
+#include <optional>
+#include <string>
+
+namespace cuopt::linear_programming {
+
+/**
+ * @brief Configuration for remote solve connection
+ */
+struct remote_solve_config_t {
+  std::string host;
+  int port;
+};
+
+/**
+ * @brief Check if remote solve is enabled via environment variables.
+ *
+ * Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT
+ * environment variables are set.
+ *
+ * @return std::optional<remote_solve_config_t> containing the remote config if
+ *         remote solve is enabled, std::nullopt otherwise
+ */
+inline std::optional<remote_solve_config_t> get_remote_solve_config()
+{
+  const char* host = std::getenv("CUOPT_REMOTE_HOST");
+  const char* port = std::getenv("CUOPT_REMOTE_PORT");
+
+  if (host != nullptr && port != nullptr && host[0] != '\0' && port[0] != '\0') {
+    try {
+      int port_num = std::stoi(port);
+      return remote_solve_config_t{std::string(host), port_num};
+    } catch (...) {
+      // Invalid port number, fall back to local solve
+      return std::nullopt;
+    }
+  }
+  return std::nullopt;
+}
+
+/**
+ * @brief Check if remote solve is enabled.
+ *
+ * @return true if CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT are both set
+ */
+inline bool is_remote_solve_enabled() { return get_remote_solve_config().has_value(); }
+
+/**
+ * @brief Solve an LP problem on a remote server.
+ *
+ * @tparam i_t Index type (int32_t)
+ * @tparam f_t Float type (float or double)
+ * @param config Remote server configuration
+ * @param view Problem data view
+ * @param settings Solver settings
+ * @return Solution from the remote server
+ */
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const pdlp_solver_settings_t<i_t, f_t>& settings);
+
+/**
+ * @brief Solve a MIP problem on a remote server.
+ *
+ * @tparam i_t Index type (int32_t)
+ * @tparam f_t Float type (float or double)
+ * @param config Remote server configuration
+ * @param view Problem data view
+ * @param settings Solver settings
+ * @return Solution from the remote server
+ */
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const mip_solver_settings_t<i_t, f_t>& settings);
+
+/**
+ * @brief Job status enumeration for remote jobs.
+ */
+enum class remote_job_status_t {
+  QUEUED,      ///< Job is waiting in queue
+  PROCESSING,  ///< Job is being processed by a worker
+  COMPLETED,   ///< Job completed successfully
+  FAILED,      ///< Job failed with an error
+  NOT_FOUND,   ///< Job ID not found on server
+  CANCELLED    ///< Job was cancelled
+};
+
+/**
+ * @brief Result of a cancel job request.
+ */
+struct cancel_job_result_t {
+  bool success;                    ///< True if cancellation was successful
+  std::string message;             ///< Success/error message
+  remote_job_status_t job_status;  ///< Status of job after cancel attempt
+};
+
+/**
+ * @brief Cancel a job on a remote server.
+ *
+ * This function can cancel jobs that are queued (waiting for a worker) or
+ * currently running. For running jobs, the worker process is killed and
+ * automatically restarted by the server.
+ *
+ * @param config Remote server configuration
+ * @param job_id The job ID to cancel
+ * @return Result containing success status, message, and job status
+ */
+cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config,
+                                      const std::string& job_id);
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/libmps_parser/CMakeLists.txt b/cpp/libmps_parser/CMakeLists.txt
index 4fe497157..3018d9c31 100644
--- a/cpp/libmps_parser/CMakeLists.txt
+++ b/cpp/libmps_parser/CMakeLists.txt
@@ -67,7 +67,8 @@ if(BUILD_TESTS)
   include(cmake/thirdparty/get_gtest.cmake)
 endif()
 
-add_library(mps_parser SHARED
+# Source files for mps_parser
+set(MPS_PARSER_SOURCES
   src/data_model_view.cpp
   src/mps_data_model.cpp
   src/mps_parser.cpp
@@ -77,6 +78,12 @@ add_library(mps_parser SHARED
   src/utilities/cython_mps_parser.cpp
 )
 
+# Shared library for standalone use
+add_library(mps_parser SHARED ${MPS_PARSER_SOURCES})
+
+# Static library for linking into libcuopt
+add_library(mps_parser_static STATIC ${MPS_PARSER_SOURCES})
+
 set_target_properties(mps_parser
   PROPERTIES BUILD_RPATH "\$ORIGIN"
   INSTALL_RPATH "\$ORIGIN"
@@ -105,6 +112,7 @@ if(WRITE_FATBIN)
 endif()
 
 add_library(cuopt::mps_parser ALIAS mps_parser)
+add_library(cuopt::mps_parser_static ALIAS mps_parser_static)
 
 # ##################################################################################################
 # - include paths ---------------------------------------------------------------------------------
@@ -117,6 +125,15 @@ target_include_directories(mps_parser
   "$<INSTALL_INTERFACE:include>"
 )
 
+target_include_directories(mps_parser_static
+  PRIVATE
+  "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty"
+  "${CMAKE_CURRENT_SOURCE_DIR}/src"
+  PUBLIC
+  "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
+  "$<INSTALL_INTERFACE:include>"
+)
+
 if(MPS_PARSER_WITH_BZIP2)
     target_include_directories(mps_parser PRIVATE BZip2::BZip2)
 endif(MPS_PARSER_WITH_BZIP2)
diff --git a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
index eb34682ce..d92357179 100644
--- a/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
+++ b/cpp/libmps_parser/include/mps_parser/data_model_view.hpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -406,8 +406,24 @@ class data_model_view_t {
    */
   bool has_quadratic_objective() const noexcept;
 
+  /**
+   * @brief Set whether the data pointed to by this view is in device (GPU) memory.
+   * @note Default is false (CPU memory). Set to true when view points to GPU buffers.
+   *
+   * @param is_device true if data is in GPU memory, false if in CPU memory
+   */
+  void set_is_device_memory(bool is_device) noexcept { is_device_memory_ = is_device; }
+
+  /**
+   * @brief Check if the data pointed to by this view is in device (GPU) memory.
+   *
+   * @return true if data is in GPU memory, false if in CPU memory
+   */
+  bool is_device_memory() const noexcept { return is_device_memory_; }
+
  private:
   bool maximize_{false};
+  bool is_device_memory_{false};  // true if spans point to GPU memory, false for CPU
   span<f_t const> A_;
   span<i_t const> A_indices_;
   span<i_t const> A_offsets_;
diff --git a/cpp/libmps_parser/src/data_model_view.cpp b/cpp/libmps_parser/src/data_model_view.cpp
index 7db2b390c..8be1b899a 100644
--- a/cpp/libmps_parser/src/data_model_view.cpp
+++ b/cpp/libmps_parser/src/data_model_view.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -9,6 +9,8 @@
 #include <mps_parser/utilities/span.hpp>
 #include <utilities/error.hpp>
 
+#include <cstdint>
+
 namespace cuopt::mps_parser {
 
 template <typename i_t, typename f_t>
@@ -348,7 +350,8 @@ bool data_model_view_t<i_t, f_t>::has_quadratic_objective() const noexcept
 
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
 template class data_model_view_t<int, float>;
-
 template class data_model_view_t<int, double>;
+template class data_model_view_t<int64_t, float>;
+template class data_model_view_t<int64_t, double>;
 
 }  // namespace cuopt::mps_parser
diff --git a/cpp/libmps_parser/src/mps_data_model.cpp b/cpp/libmps_parser/src/mps_data_model.cpp
index 7d0d44a03..605d5cef6 100644
--- a/cpp/libmps_parser/src/mps_data_model.cpp
+++ b/cpp/libmps_parser/src/mps_data_model.cpp
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -9,6 +9,7 @@
 #include <utilities/error.hpp>
 
 #include <algorithm>
+#include <cstdint>
 
 namespace cuopt::mps_parser {
 
@@ -462,8 +463,9 @@ bool mps_data_model_t<i_t, f_t>::has_quadratic_objective() const noexcept
 
 // NOTE: Explicitly instantiate all types here in order to avoid linker error
 template class mps_data_model_t<int, float>;
-
 template class mps_data_model_t<int, double>;
+template class mps_data_model_t<int64_t, float>;
+template class mps_data_model_t<int64_t, double>;
 //  TODO current raft to cusparse wrappers only support int64_t
 //  can be CUSPARSE_INDEX_16U, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_64I
 
diff --git a/cpp/src/linear_programming/CMakeLists.txt b/cpp/src/linear_programming/CMakeLists.txt
index c3f673e03..b64475214 100644
--- a/cpp/src/linear_programming/CMakeLists.txt
+++ b/cpp/src/linear_programming/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -8,6 +8,8 @@ set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/solver_settings.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/optimization_problem.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/problem_checking.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/utilities/protobuf_serializer.cu
+  ${CMAKE_CURRENT_SOURCE_DIR}/utilities/remote_solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/solve.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdlp.cu
   ${CMAKE_CURRENT_SOURCE_DIR}/pdhg.cu
@@ -26,6 +28,13 @@ set(LP_CORE_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/termination_strategy/convergence_information.cu
 )
 
+# gRPC remote solve transport (host-only). Only build when gRPC is enabled.
+if(CUOPT_ENABLE_GRPC)
+  list(APPEND LP_CORE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/utilities/remote_solve_grpc.cpp
+  )
+endif()
+
 # C and Python adapter files
 set(LP_ADAPTER_FILES
   ${CMAKE_CURRENT_SOURCE_DIR}/utilities/cython_solve.cu
diff --git a/cpp/src/linear_programming/cuopt_c.cpp b/cpp/src/linear_programming/cuopt_c.cpp
index 0772dd14b..717752e70 100644
--- a/cpp/src/linear_programming/cuopt_c.cpp
+++ b/cpp/src/linear_programming/cuopt_c.cpp
@@ -1,15 +1,17 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
 
 #include <cuopt/linear_programming/cuopt_c.h>
 
+#include <cuopt/linear_programming/data_model_view.hpp>
 #include <cuopt/linear_programming/optimization_problem.hpp>
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <cuopt/utilities/timestamp_utils.hpp>
 #include <utilities/logger.hpp>
 
@@ -17,36 +19,263 @@
 
 #include <cuopt/version_config.hpp>
 
+#include <raft/core/copy.hpp>
+
 #include <cstdlib>
 #include <memory>
+#include <optional>
 #include <string>
+#include <vector>
 
 using namespace cuopt::mps_parser;
 using namespace cuopt::linear_programming;
 
+/**
+ * @brief CPU-side storage for problem data.
+ *
+ * This struct stores all problem data in CPU memory. At solve time, a data_model_view_t
+ * is created pointing to this data, and the solve_lp/solve_mip routines handle
+ * local vs remote solve automatically.
+ */
+struct problem_cpu_data_t {
+  // Problem dimensions
+  cuopt_int_t num_constraints = 0;
+  cuopt_int_t num_variables   = 0;
+
+  // Objective
+  bool maximize                  = false;
+  cuopt_float_t objective_offset = 0.0;
+  std::vector<cuopt_float_t> objective_coefficients;
+
+  // Quadratic objective (optional)
+  std::vector<cuopt_float_t> Q_values;
+  std::vector<cuopt_int_t> Q_indices;
+  std::vector<cuopt_int_t> Q_offsets;
+
+  // Constraint matrix (CSR format)
+  std::vector<cuopt_float_t> A_values;
+  std::vector<cuopt_int_t> A_indices;
+  std::vector<cuopt_int_t> A_offsets;
+
+  // Constraint bounds (two representations)
+  std::vector<char> row_types;                         // '<', '>', '=' style
+  std::vector<cuopt_float_t> constraint_bounds;        // single RHS for row_types style
+  std::vector<cuopt_float_t> constraint_lower_bounds;  // ranged style
+  std::vector<cuopt_float_t> constraint_upper_bounds;  // ranged style
+  bool uses_ranged_constraints = false;
+
+  // Variable bounds
+  std::vector<cuopt_float_t> variable_lower_bounds;
+  std::vector<cuopt_float_t> variable_upper_bounds;
+
+  // Variable types
+  std::vector<char> variable_types;  // 'C' for continuous, 'I' for integer
+
+  /**
+   * @brief Create a data_model_view_t pointing to this CPU data.
+   */
+  cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> create_view() const
+  {
+    cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> view;
+
+    view.set_maximize(maximize);
+    view.set_objective_offset(objective_offset);
+
+    if (!objective_coefficients.empty()) {
+      view.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+
+    if (!Q_values.empty()) {
+      view.set_quadratic_objective_matrix(Q_values.data(),
+                                          Q_values.size(),
+                                          Q_indices.data(),
+                                          Q_indices.size(),
+                                          Q_offsets.data(),
+                                          Q_offsets.size());
+    }
+
+    if (!A_values.empty()) {
+      view.set_csr_constraint_matrix(A_values.data(),
+                                     A_values.size(),
+                                     A_indices.data(),
+                                     A_indices.size(),
+                                     A_offsets.data(),
+                                     A_offsets.size());
+    }
+
+    if (uses_ranged_constraints) {
+      if (!constraint_lower_bounds.empty()) {
+        view.set_constraint_lower_bounds(constraint_lower_bounds.data(),
+                                         constraint_lower_bounds.size());
+      }
+      if (!constraint_upper_bounds.empty()) {
+        view.set_constraint_upper_bounds(constraint_upper_bounds.data(),
+                                         constraint_upper_bounds.size());
+      }
+    } else {
+      if (!row_types.empty()) { view.set_row_types(row_types.data(), row_types.size()); }
+      if (!constraint_bounds.empty()) {
+        view.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+      }
+    }
+
+    if (!variable_lower_bounds.empty()) {
+      view.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+
+    if (!variable_upper_bounds.empty()) {
+      view.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+
+    if (!variable_types.empty()) {
+      view.set_variable_types(variable_types.data(), variable_types.size());
+    }
+
+    return view;
+  }
+
+  /**
+   * @brief Check if this is a MIP (has integer variables).
+   */
+  bool is_mip() const
+  {
+    for (char vt : variable_types) {
+      if (vt == CUOPT_INTEGER) { return true; }
+    }
+    return false;
+  }
+};
+
 struct problem_and_stream_view_t {
-  problem_and_stream_view_t()
-    : op_problem(nullptr), stream_view(rmm::cuda_stream_per_thread), handle(stream_view)
+  problem_and_stream_view_t() : cpu_data(nullptr), gpu_problem(nullptr), handle(nullptr) {}
+
+  /**
+   * @brief Ensure CUDA resources are initialized (lazy initialization).
+   * Only call this when local solve is needed.
+   */
+  void ensure_cuda_initialized()
+  {
+    if (!handle) { handle = std::make_unique<raft::handle_t>(); }
+  }
+
+  raft::handle_t* get_handle_ptr()
   {
+    ensure_cuda_initialized();
+    return handle.get();
+  }
+
+  /**
+   * @brief Check if this is a MIP problem.
+   */
+  bool is_mip() const
+  {
+    if (view.is_device_memory()) {
+      // GPU path: check gpu_problem's problem category
+      if (!gpu_problem) return false;
+      auto cat = gpu_problem->get_problem_category();
+      return (cat == problem_category_t::MIP) || (cat == problem_category_t::IP);
+    } else {
+      // CPU path: check variable types in cpu_data
+      if (!cpu_data) return false;
+      return cpu_data->is_mip();
+    }
+  }
+
+  // Only ONE of these is allocated (optimized memory usage):
+  std::unique_ptr<problem_cpu_data_t> cpu_data;  // for remote solve (CPU memory)
+  std::unique_ptr<optimization_problem_t<cuopt_int_t, cuopt_float_t>>
+    gpu_problem;  // for local solve (GPU memory)
+
+  // Non-owning view pointing to whichever storage is active
+  // Use view.is_device_memory() to check if data is on GPU or CPU
+  cuopt::linear_programming::data_model_view_t<cuopt_int_t, cuopt_float_t> view;
+  std::vector<char> gpu_variable_types;  // host copy for view when GPU data is used
+
+  // Lazy-initialized CUDA handle (only created for local solve)
+  std::unique_ptr<raft::handle_t> handle;
+
+  /**
+   * @brief Create a view pointing to GPU data from the gpu_problem.
+   * Call this after gpu_problem is fully populated.
+   */
+  void create_view_from_gpu_problem()
+  {
+    if (!gpu_problem) return;
+    auto& gpu = *gpu_problem;
+
+    view.set_maximize(gpu.get_sense());
+    view.set_objective_offset(gpu.get_objective_offset());
+    view.set_objective_coefficients(gpu.get_objective_coefficients().data(), gpu.get_n_variables());
+    view.set_csr_constraint_matrix(gpu.get_constraint_matrix_values().data(),
+                                   gpu.get_constraint_matrix_values().size(),
+                                   gpu.get_constraint_matrix_indices().data(),
+                                   gpu.get_constraint_matrix_indices().size(),
+                                   gpu.get_constraint_matrix_offsets().data(),
+                                   gpu.get_constraint_matrix_offsets().size());
+
+    if (!gpu.get_constraint_lower_bounds().is_empty()) {
+      view.set_constraint_lower_bounds(gpu.get_constraint_lower_bounds().data(),
+                                       gpu.get_n_constraints());
+      view.set_constraint_upper_bounds(gpu.get_constraint_upper_bounds().data(),
+                                       gpu.get_n_constraints());
+    } else if (!gpu.get_row_types().is_empty()) {
+      view.set_row_types(gpu.get_row_types().data(), gpu.get_n_constraints());
+      view.set_constraint_bounds(gpu.get_constraint_bounds().data(), gpu.get_n_constraints());
+    }
+
+    view.set_variable_lower_bounds(gpu.get_variable_lower_bounds().data(), gpu.get_n_variables());
+    view.set_variable_upper_bounds(gpu.get_variable_upper_bounds().data(), gpu.get_n_variables());
+
+    if (gpu.get_n_variables() > 0) {
+      std::vector<var_t> gpu_var_types(gpu.get_n_variables());
+      raft::copy(gpu_var_types.data(),
+                 gpu.get_variable_types().data(),
+                 gpu.get_n_variables(),
+                 gpu.get_handle_ptr()->get_stream());
+      gpu.get_handle_ptr()->sync_stream();
+
+      gpu_variable_types.resize(gpu.get_n_variables());
+      for (cuopt_int_t i = 0; i < gpu.get_n_variables(); ++i) {
+        gpu_variable_types[i] = (gpu_var_types[i] == var_t::INTEGER) ? 'I' : 'C';
+      }
+      view.set_variable_types(gpu_variable_types.data(), gpu.get_n_variables());
+    }
+
+    if (gpu.has_quadratic_objective()) {
+      view.set_quadratic_objective_matrix(gpu.get_quadratic_objective_values().data(),
+                                          gpu.get_quadratic_objective_values().size(),
+                                          gpu.get_quadratic_objective_indices().data(),
+                                          gpu.get_quadratic_objective_indices().size(),
+                                          gpu.get_quadratic_objective_offsets().data(),
+                                          gpu.get_quadratic_objective_offsets().size());
+    }
+
+    view.set_is_device_memory(true);
+  }
+
+  /**
+   * @brief Create a view pointing to CPU data from cpu_data.
+   * Call this after cpu_data is fully populated.
+   */
+  void create_view_from_cpu_data()
+  {
+    if (!cpu_data) return;
+    view = cpu_data->create_view();
+    view.set_is_device_memory(false);
   }
-  raft::handle_t* get_handle_ptr() { return &handle; }
-  cuopt::linear_programming::optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem;
-  rmm::cuda_stream_view stream_view;
-  raft::handle_t handle;
 };
 
 struct solution_and_stream_view_t {
-  solution_and_stream_view_t(bool solution_for_mip, rmm::cuda_stream_view stream_view)
-    : is_mip(solution_for_mip),
-      mip_solution_ptr(nullptr),
-      lp_solution_ptr(nullptr),
-      stream_view(stream_view)
+  solution_and_stream_view_t(bool solution_for_mip, raft::handle_t* handle_ptr = nullptr)
+    : is_mip(solution_for_mip), mip_solution_ptr(nullptr), lp_solution_ptr(nullptr)
   {
+    // Store stream only if we have a handle (local solve)
+    if (handle_ptr) { stream_view = handle_ptr->get_stream(); }
   }
   bool is_mip;
   mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution_ptr;
   optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* lp_solution_ptr;
-  rmm::cuda_stream_view stream_view;
+  std::optional<rmm::cuda_stream_view> stream_view;  // Only present for local solve
 };
 
 int8_t cuOptGetFloatSize() { return sizeof(cuopt_float_t); }
@@ -77,6 +306,7 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       parse_mps<cuopt_int_t, cuopt_float_t>(filename_str, input_mps_strict));
   } catch (const std::exception& e) {
     CUOPT_LOG_INFO("Error parsing MPS file: %s", e.what());
+    delete problem_and_stream;
     *problem_ptr = nullptr;
     if (std::string(e.what()).find("Error opening MPS file") != std::string::npos) {
       return CUOPT_MPS_FILE_ERROR;
@@ -84,11 +314,64 @@ cuopt_int_t cuOptReadProblem(const char* filename, cuOptOptimizationProblem* pro
       return CUOPT_MPS_PARSE_ERROR;
     }
   }
-  optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(mps_data_model_to_optimization_problem(
-      problem_and_stream->get_handle_ptr(), *mps_data_model_ptr));
-  problem_and_stream->op_problem = op_problem;
-  *problem_ptr                   = static_cast<cuOptOptimizationProblem>(problem_and_stream);
+
+  // Check remote solve configuration at creation time
+  bool is_remote = is_remote_solve_enabled();
+
+  if (is_remote) {
+    // Remote: store in CPU memory
+    problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+    auto& cpu_data               = *problem_and_stream->cpu_data;
+    const auto& mps              = *mps_data_model_ptr;
+
+    cpu_data.num_constraints =
+      static_cast<cuopt_int_t>(mps.get_constraint_matrix_offsets().size() - 1);
+    cpu_data.num_variables    = static_cast<cuopt_int_t>(mps.get_objective_coefficients().size());
+    cpu_data.maximize         = mps.get_sense();
+    cpu_data.objective_offset = mps.get_objective_offset();
+
+    cpu_data.objective_coefficients = mps.get_objective_coefficients();
+    cpu_data.A_values               = mps.get_constraint_matrix_values();
+    cpu_data.A_indices              = mps.get_constraint_matrix_indices();
+    cpu_data.A_offsets              = mps.get_constraint_matrix_offsets();
+
+    if (!mps.get_constraint_lower_bounds().empty() || !mps.get_constraint_upper_bounds().empty()) {
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds = mps.get_constraint_lower_bounds();
+      cpu_data.constraint_upper_bounds = mps.get_constraint_upper_bounds();
+    } else {
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.constraint_bounds       = mps.get_constraint_bounds();
+      const auto& mps_row_types        = mps.get_row_types();
+      cpu_data.row_types.resize(mps_row_types.size());
+      for (size_t i = 0; i < mps_row_types.size(); ++i) {
+        cpu_data.row_types[i] = mps_row_types[i];
+      }
+    }
+
+    cpu_data.variable_lower_bounds = mps.get_variable_lower_bounds();
+    cpu_data.variable_upper_bounds = mps.get_variable_upper_bounds();
+
+    const auto& mps_var_types = mps.get_variable_types();
+    cpu_data.variable_types.resize(mps_var_types.size());
+    for (size_t i = 0; i < mps_var_types.size(); ++i) {
+      cpu_data.variable_types[i] =
+        (mps_var_types[i] == 'I' || mps_var_types[i] == 'B') ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
+    }
+
+    // Create view pointing to CPU data
+    problem_and_stream->create_view_from_cpu_data();
+  } else {
+    // Local: store in GPU memory using existing mps_data_model_to_optimization_problem
+    problem_and_stream->gpu_problem =
+      std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+        mps_data_model_to_optimization_problem(problem_and_stream->get_handle_ptr(),
+                                               *mps_data_model_ptr));
+    // Create view pointing to GPU data
+    problem_and_stream->create_view_from_gpu_problem();
+  }
+
+  *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
   return CUOPT_SUCCESS;
 }
 
@@ -118,32 +401,76 @@ cuopt_int_t cuOptCreateProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
-    std::vector<var_t> variable_types_host(num_variables);
-    for (int j = 0; j < num_variables; j++) {
-      variable_types_host[j] =
-        variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints);
+      cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables);
+      cpu_data.variable_types.assign(variable_types, variable_types + num_variables);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_row_types(constraint_sense, num_constraints);
+      gpu_problem.set_constraint_bounds(rhs, num_constraints);
+      gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables);
+
+      // Convert variable types to enum
+      std::vector<var_t> variable_types_host(num_variables);
+      for (cuopt_int_t j = 0; j < num_variables; j++) {
+        variable_types_host[j] =
+          variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+      }
+      gpu_problem.set_variable_types(variable_types_host.data(), num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -175,34 +502,79 @@ cuopt_int_t cuOptCreateRangedProblem(cuopt_int_t num_constraints,
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
-    std::vector<var_t> variable_types_host(num_variables);
-    for (int j = 0; j < num_variables; j++) {
-      variable_types_host[j] =
-        variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds,
+                                              constraint_lower_bounds + num_constraints);
+      cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds,
+                                              constraint_upper_bounds + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(variable_lower_bounds,
+                                            variable_lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(variable_upper_bounds,
+                                            variable_upper_bounds + num_variables);
+      cpu_data.variable_types.assign(variable_types, variable_types + num_variables);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+      gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+      gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+      std::vector<var_t> variable_types_host(num_variables);
+      for (cuopt_int_t j = 0; j < num_variables; j++) {
+        variable_types_host[j] =
+          variable_types[j] == CUOPT_CONTINUOUS ? var_t::CONTINUOUS : var_t::INTEGER;
+      }
+      gpu_problem.set_variable_types(variable_types_host.data(), num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
     }
-    problem_and_stream->op_problem->set_variable_types(variable_types_host.data(), num_variables);
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -239,34 +611,83 @@ cuopt_int_t cuOptCreateQuadraticProblem(
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
-    cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_row_types(constraint_sense, num_constraints);
-    problem_and_stream->op_problem->set_constraint_bounds(rhs, num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(upper_bounds, num_variables);
+    cuopt_int_t nnz   = constraint_matrix_row_offsets[num_constraints];
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+
+      cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values,
+                               quadratic_objective_matrix_coefficent_values + Q_nnz);
+      cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices,
+                                quadratic_objective_matrix_column_indices + Q_nnz);
+      cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets,
+                                quadratic_objective_matrix_row_offsets + num_variables + 1);
+
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = false;
+      cpu_data.row_types.assign(constraint_sense, constraint_sense + num_constraints);
+      cpu_data.constraint_bounds.assign(rhs, rhs + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(lower_bounds, lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(upper_bounds, upper_bounds + num_variables);
+      cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_column_indices,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_row_offsets,
+                                                 num_variables + 1);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_row_types(constraint_sense, num_constraints);
+      gpu_problem.set_constraint_bounds(rhs, num_constraints);
+      gpu_problem.set_variable_lower_bounds(lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(upper_bounds, num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
+    }
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -304,36 +725,87 @@ cuopt_int_t cuOptCreateQuadraticRangedProblem(
   }
 
   problem_and_stream_view_t* problem_and_stream = new problem_and_stream_view_t();
-  problem_and_stream->op_problem =
-    new optimization_problem_t<cuopt_int_t, cuopt_float_t>(problem_and_stream->get_handle_ptr());
+  bool is_remote                                = is_remote_solve_enabled();
+
   try {
-    problem_and_stream->op_problem->set_maximize(objective_sense == CUOPT_MAXIMIZE);
-    problem_and_stream->op_problem->set_objective_offset(objective_offset);
-    problem_and_stream->op_problem->set_objective_coefficients(objective_coefficients,
-                                                               num_variables);
     cuopt_int_t Q_nnz = quadratic_objective_matrix_row_offsets[num_variables];
-    problem_and_stream->op_problem->set_quadratic_objective_matrix(
-      quadratic_objective_matrix_coefficent_values,
-      Q_nnz,
-      quadratic_objective_matrix_column_indices,
-      Q_nnz,
-      quadratic_objective_matrix_row_offsets,
-      num_variables + 1);
-    cuopt_int_t nnz = constraint_matrix_row_offsets[num_constraints];
-    problem_and_stream->op_problem->set_csr_constraint_matrix(constraint_matrix_coefficent_values,
-                                                              nnz,
-                                                              constraint_matrix_column_indices,
-                                                              nnz,
-                                                              constraint_matrix_row_offsets,
-                                                              num_constraints + 1);
-    problem_and_stream->op_problem->set_constraint_lower_bounds(constraint_lower_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_constraint_upper_bounds(constraint_upper_bounds,
-                                                                num_constraints);
-    problem_and_stream->op_problem->set_variable_lower_bounds(variable_lower_bounds, num_variables);
-    problem_and_stream->op_problem->set_variable_upper_bounds(variable_upper_bounds, num_variables);
+    cuopt_int_t nnz   = constraint_matrix_row_offsets[num_constraints];
+
+    if (is_remote) {
+      // Remote: store in CPU memory
+      problem_and_stream->cpu_data = std::make_unique<problem_cpu_data_t>();
+      auto& cpu_data               = *problem_and_stream->cpu_data;
+
+      cpu_data.num_constraints  = num_constraints;
+      cpu_data.num_variables    = num_variables;
+      cpu_data.maximize         = (objective_sense == CUOPT_MAXIMIZE);
+      cpu_data.objective_offset = objective_offset;
+
+      cpu_data.objective_coefficients.assign(objective_coefficients,
+                                             objective_coefficients + num_variables);
+
+      cpu_data.Q_values.assign(quadratic_objective_matrix_coefficent_values,
+                               quadratic_objective_matrix_coefficent_values + Q_nnz);
+      cpu_data.Q_indices.assign(quadratic_objective_matrix_column_indices,
+                                quadratic_objective_matrix_column_indices + Q_nnz);
+      cpu_data.Q_offsets.assign(quadratic_objective_matrix_row_offsets,
+                                quadratic_objective_matrix_row_offsets + num_variables + 1);
+
+      cpu_data.A_values.assign(constraint_matrix_coefficent_values,
+                               constraint_matrix_coefficent_values + nnz);
+      cpu_data.A_indices.assign(constraint_matrix_column_indices,
+                                constraint_matrix_column_indices + nnz);
+      cpu_data.A_offsets.assign(constraint_matrix_row_offsets,
+                                constraint_matrix_row_offsets + num_constraints + 1);
+
+      cpu_data.uses_ranged_constraints = true;
+      cpu_data.constraint_lower_bounds.assign(constraint_lower_bounds,
+                                              constraint_lower_bounds + num_constraints);
+      cpu_data.constraint_upper_bounds.assign(constraint_upper_bounds,
+                                              constraint_upper_bounds + num_constraints);
+
+      cpu_data.variable_lower_bounds.assign(variable_lower_bounds,
+                                            variable_lower_bounds + num_variables);
+      cpu_data.variable_upper_bounds.assign(variable_upper_bounds,
+                                            variable_upper_bounds + num_variables);
+      cpu_data.variable_types.assign(num_variables, CUOPT_CONTINUOUS);
+
+      // Create view pointing to CPU data
+      problem_and_stream->create_view_from_cpu_data();
+    } else {
+      // Local: store in GPU memory
+      problem_and_stream->gpu_problem =
+        std::make_unique<optimization_problem_t<cuopt_int_t, cuopt_float_t>>(
+          problem_and_stream->get_handle_ptr());
+      auto& gpu_problem = *problem_and_stream->gpu_problem;
+
+      gpu_problem.set_maximize(objective_sense == CUOPT_MAXIMIZE);
+      gpu_problem.set_objective_offset(objective_offset);
+      gpu_problem.set_objective_coefficients(objective_coefficients, num_variables);
+      gpu_problem.set_quadratic_objective_matrix(quadratic_objective_matrix_coefficent_values,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_column_indices,
+                                                 Q_nnz,
+                                                 quadratic_objective_matrix_row_offsets,
+                                                 num_variables + 1);
+      gpu_problem.set_csr_constraint_matrix(constraint_matrix_coefficent_values,
+                                            nnz,
+                                            constraint_matrix_column_indices,
+                                            nnz,
+                                            constraint_matrix_row_offsets,
+                                            num_constraints + 1);
+      gpu_problem.set_constraint_lower_bounds(constraint_lower_bounds, num_constraints);
+      gpu_problem.set_constraint_upper_bounds(constraint_upper_bounds, num_constraints);
+      gpu_problem.set_variable_lower_bounds(variable_lower_bounds, num_variables);
+      gpu_problem.set_variable_upper_bounds(variable_upper_bounds, num_variables);
+
+      // Create view pointing to GPU data
+      problem_and_stream->create_view_from_gpu_problem();
+    }
+
     *problem_ptr = static_cast<cuOptOptimizationProblem>(problem_and_stream);
-  } catch (const raft::exception& e) {
+  } catch (const std::exception& e) {
+    delete problem_and_stream;
     return CUOPT_INVALID_ARGUMENT;
   }
   return CUOPT_SUCCESS;
@@ -343,7 +815,9 @@ void cuOptDestroyProblem(cuOptOptimizationProblem* problem_ptr)
 {
   if (problem_ptr == nullptr) { return; }
   if (*problem_ptr == nullptr) { return; }
-  delete static_cast<problem_and_stream_view_t*>(*problem_ptr);
+  problem_and_stream_view_t* problem_and_stream =
+    static_cast<problem_and_stream_view_t*>(*problem_ptr);
+  delete problem_and_stream;
   *problem_ptr = nullptr;
 }
 
@@ -354,7 +828,11 @@ cuopt_int_t cuOptGetNumConstraints(cuOptOptimizationProblem problem,
   if (num_constraints_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_constraints_ptr = problem_and_stream_view->op_problem->get_n_constraints();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_constraints_ptr = problem_and_stream_view->cpu_data->num_constraints;
+  } else {
+    *num_constraints_ptr = problem_and_stream_view->gpu_problem->get_n_constraints();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -364,7 +842,11 @@ cuopt_int_t cuOptGetNumVariables(cuOptOptimizationProblem problem, cuopt_int_t*
   if (num_variables_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_variables_ptr = problem_and_stream_view->op_problem->get_n_variables();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_variables_ptr = problem_and_stream_view->cpu_data->num_variables;
+  } else {
+    *num_variables_ptr = problem_and_stream_view->gpu_problem->get_n_variables();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -375,8 +857,13 @@ cuopt_int_t cuOptGetObjectiveSense(cuOptOptimizationProblem problem,
   if (objective_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *objective_sense_ptr =
-    problem_and_stream_view->op_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *objective_sense_ptr =
+      problem_and_stream_view->cpu_data->maximize ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  } else {
+    *objective_sense_ptr =
+      problem_and_stream_view->gpu_problem->get_sense() ? CUOPT_MAXIMIZE : CUOPT_MINIMIZE;
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -387,7 +874,11 @@ cuopt_int_t cuOptGetObjectiveOffset(cuOptOptimizationProblem problem,
   if (objective_offset_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *objective_offset_ptr = problem_and_stream_view->op_problem->get_objective_offset();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *objective_offset_ptr = problem_and_stream_view->cpu_data->objective_offset;
+  } else {
+    *objective_offset_ptr = problem_and_stream_view->gpu_problem->get_objective_offset();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -398,13 +889,17 @@ cuopt_int_t cuOptGetObjectiveCoefficients(cuOptOptimizationProblem problem,
   if (objective_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& objective_coefficients =
-    problem_and_stream_view->op_problem->get_objective_coefficients();
-  raft::copy(objective_coefficients_ptr,
-             objective_coefficients.data(),
-             objective_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& coeffs = problem_and_stream_view->cpu_data->objective_coefficients;
+    std::copy(coeffs.begin(), coeffs.end(), objective_coefficients_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(objective_coefficients_ptr,
+               gpu_problem.get_objective_coefficients().data(),
+               gpu_problem.get_n_variables(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -415,7 +910,13 @@ cuopt_int_t cuOptGetNumNonZeros(cuOptOptimizationProblem problem,
   if (num_non_zero_elements_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  *num_non_zero_elements_ptr = problem_and_stream_view->op_problem->get_nnz();
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    *num_non_zero_elements_ptr =
+      static_cast<cuopt_int_t>(problem_and_stream_view->cpu_data->A_values.size());
+  } else {
+    *num_non_zero_elements_ptr = static_cast<cuopt_int_t>(
+      problem_and_stream_view->gpu_problem->get_constraint_matrix_values().size());
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -430,25 +931,32 @@ cuopt_int_t cuOptGetConstraintMatrix(cuOptOptimizationProblem problem,
   if (constraint_matrix_coefficients_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& constraint_matrix_coefficients =
-    problem_and_stream_view->op_problem->get_constraint_matrix_values();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_column_indices =
-    problem_and_stream_view->op_problem->get_constraint_matrix_indices();
-  const rmm::device_uvector<cuopt_int_t>& constraint_matrix_row_offsets =
-    problem_and_stream_view->op_problem->get_constraint_matrix_offsets();
-  raft::copy(constraint_matrix_coefficients_ptr,
-             constraint_matrix_coefficients.data(),
-             constraint_matrix_coefficients.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_column_indices_ptr,
-             constraint_matrix_column_indices.data(),
-             constraint_matrix_column_indices.size(),
-             problem_and_stream_view->stream_view);
-  raft::copy(constraint_matrix_row_offsets_ptr,
-             constraint_matrix_row_offsets.data(),
-             constraint_matrix_row_offsets.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& cpu_data = *problem_and_stream_view->cpu_data;
+    std::copy(
+      cpu_data.A_values.begin(), cpu_data.A_values.end(), constraint_matrix_coefficients_ptr);
+    std::copy(
+      cpu_data.A_indices.begin(), cpu_data.A_indices.end(), constraint_matrix_column_indices_ptr);
+    std::copy(
+      cpu_data.A_offsets.begin(), cpu_data.A_offsets.end(), constraint_matrix_row_offsets_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    auto stream             = gpu_problem.get_handle_ptr()->get_stream();
+    raft::copy(constraint_matrix_coefficients_ptr,
+               gpu_problem.get_constraint_matrix_values().data(),
+               gpu_problem.get_constraint_matrix_values().size(),
+               stream);
+    raft::copy(constraint_matrix_column_indices_ptr,
+               gpu_problem.get_constraint_matrix_indices().data(),
+               gpu_problem.get_constraint_matrix_indices().size(),
+               stream);
+    raft::copy(constraint_matrix_row_offsets_ptr,
+               gpu_problem.get_constraint_matrix_offsets().data(),
+               gpu_problem.get_constraint_matrix_offsets().size(),
+               stream);
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -458,13 +966,18 @@ cuopt_int_t cuOptGetConstraintSense(cuOptOptimizationProblem problem, char* cons
   if (constraint_sense_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<char>& constraint_sense =
-    problem_and_stream_view->op_problem->get_row_types();
-  raft::copy(constraint_sense_ptr,
-             constraint_sense.data(),
-             constraint_sense.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& row_types = problem_and_stream_view->cpu_data->row_types;
+    std::copy(row_types.begin(), row_types.end(), constraint_sense_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(constraint_sense_ptr,
+               gpu_problem.get_row_types().data(),
+               gpu_problem.get_row_types().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -475,10 +988,18 @@ cuopt_int_t cuOptGetConstraintRightHandSide(cuOptOptimizationProblem problem,
   if (rhs_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& rhs =
-    problem_and_stream_view->op_problem->get_constraint_bounds();
-  raft::copy(rhs_ptr, rhs.data(), rhs.size(), problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_bounds;
+    std::copy(bounds.begin(), bounds.end(), rhs_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(rhs_ptr,
+               gpu_problem.get_constraint_bounds().data(),
+               gpu_problem.get_constraint_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -489,13 +1010,18 @@ cuopt_int_t cuOptGetConstraintLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_constraint_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_lower_bounds;
+    std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(lower_bounds_ptr,
+               gpu_problem.get_constraint_lower_bounds().data(),
+               gpu_problem.get_constraint_lower_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -506,13 +1032,18 @@ cuopt_int_t cuOptGetConstraintUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_constraint_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->constraint_upper_bounds;
+    std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(upper_bounds_ptr,
+               gpu_problem.get_constraint_upper_bounds().data(),
+               gpu_problem.get_constraint_upper_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -523,13 +1054,18 @@ cuopt_int_t cuOptGetVariableLowerBounds(cuOptOptimizationProblem problem,
   if (lower_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& lower_bounds =
-    problem_and_stream_view->op_problem->get_variable_lower_bounds();
-  raft::copy(lower_bounds_ptr,
-             lower_bounds.data(),
-             lower_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->variable_lower_bounds;
+    std::copy(bounds.begin(), bounds.end(), lower_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(lower_bounds_ptr,
+               gpu_problem.get_variable_lower_bounds().data(),
+               gpu_problem.get_variable_lower_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -540,13 +1076,18 @@ cuopt_int_t cuOptGetVariableUpperBounds(cuOptOptimizationProblem problem,
   if (upper_bounds_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<cuopt_float_t>& upper_bounds =
-    problem_and_stream_view->op_problem->get_variable_upper_bounds();
-  raft::copy(upper_bounds_ptr,
-             upper_bounds.data(),
-             upper_bounds.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& bounds = problem_and_stream_view->cpu_data->variable_upper_bounds;
+    std::copy(bounds.begin(), bounds.end(), upper_bounds_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    raft::copy(upper_bounds_ptr,
+               gpu_problem.get_variable_upper_bounds().data(),
+               gpu_problem.get_variable_upper_bounds().size(),
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+  }
   return CUOPT_SUCCESS;
 }
 
@@ -556,17 +1097,24 @@ cuopt_int_t cuOptGetVariableTypes(cuOptOptimizationProblem problem, char* variab
   if (variable_types_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  const rmm::device_uvector<var_t>& variable_types =
-    problem_and_stream_view->op_problem->get_variable_types();
-  std::vector<cuopt::linear_programming::var_t> variable_types_host(variable_types.size());
-  raft::copy(variable_types_host.data(),
-             variable_types.data(),
-             variable_types.size(),
-             problem_and_stream_view->stream_view);
-  problem_and_stream_view->stream_view.synchronize();
-  for (size_t j = 0; j < variable_types_host.size(); j++) {
-    variable_types_ptr[j] =
-      variable_types_host[j] == var_t::INTEGER ? CUOPT_INTEGER : CUOPT_CONTINUOUS;
+
+  if (!problem_and_stream_view->view.is_device_memory()) {
+    const auto& var_types = problem_and_stream_view->cpu_data->variable_types;
+    std::copy(var_types.begin(), var_types.end(), variable_types_ptr);
+  } else {
+    const auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+    auto num_vars           = gpu_problem.get_n_variables();
+    std::vector<var_t> gpu_var_types(num_vars);
+    raft::copy(gpu_var_types.data(),
+               gpu_problem.get_variable_types().data(),
+               num_vars,
+               gpu_problem.get_handle_ptr()->get_stream());
+    gpu_problem.get_handle_ptr()->sync_stream();
+    // Convert from var_t enum to char
+    for (cuopt_int_t i = 0; i < num_vars; ++i) {
+      variable_types_ptr[i] =
+        (gpu_var_types[i] == var_t::CONTINUOUS) ? CUOPT_CONTINUOUS : CUOPT_INTEGER;
+    }
   }
   return CUOPT_SUCCESS;
 }
@@ -712,10 +1260,7 @@ cuopt_int_t cuOptIsMIP(cuOptOptimizationProblem problem, cuopt_int_t* is_mip_ptr
   if (is_mip_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  bool is_mip =
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP) ||
-    (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP);
-  *is_mip_ptr = static_cast<cuopt_int_t>(is_mip);
+  *is_mip_ptr = static_cast<cuopt_int_t>(problem_and_stream_view->is_mip());
   return CUOPT_SUCCESS;
 }
 
@@ -728,44 +1273,97 @@ cuopt_int_t cuOptSolve(cuOptOptimizationProblem problem,
   if (problem == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (settings == nullptr) { return CUOPT_INVALID_ARGUMENT; }
   if (solution_ptr == nullptr) { return CUOPT_INVALID_ARGUMENT; }
+
   problem_and_stream_view_t* problem_and_stream_view =
     static_cast<problem_and_stream_view_t*>(problem);
-  if (problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::MIP ||
-      problem_and_stream_view->op_problem->get_problem_category() == problem_category_t::IP) {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
-    mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
-      solver_settings->get_mip_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(true, problem_and_stream_view->stream_view);
-    solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
-      solve_mip<cuopt_int_t, cuopt_float_t>(*op_problem, mip_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+  solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
+    static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
+
+  bool is_mip = problem_and_stream_view->is_mip();
+
+  // Use the view - solve_lp/solve_mip will check is_device_memory() to determine path
+  const auto& view = problem_and_stream_view->view;
+
+  if (view.is_device_memory()) {
+    // Local path: data is already on GPU
+    // Use gpu_problem directly for optimal performance (no extra copy)
+    auto& gpu_problem = *problem_and_stream_view->gpu_problem;
+
+    if (is_mip) {
+      mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
+        solver_settings->get_mip_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(true, problem_and_stream_view->handle.get());
+
+      solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
+        solve_mip<cuopt_int_t, cuopt_float_t>(gpu_problem, mip_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+    } else {
+      pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
+        solver_settings->get_pdlp_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(false, problem_and_stream_view->handle.get());
+
+      solution_and_stream_view->lp_solution_ptr =
+        new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
+          solve_lp<cuopt_int_t, cuopt_float_t>(gpu_problem, pdlp_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    }
   } else {
-    solver_settings_t<cuopt_int_t, cuopt_float_t>* solver_settings =
-      static_cast<solver_settings_t<cuopt_int_t, cuopt_float_t>*>(settings);
-    pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
-      solver_settings->get_pdlp_settings();
-    optimization_problem_t<cuopt_int_t, cuopt_float_t>* op_problem =
-      problem_and_stream_view->op_problem;
-    solution_and_stream_view_t* solution_and_stream_view =
-      new solution_and_stream_view_t(false, problem_and_stream_view->stream_view);
-    solution_and_stream_view->lp_solution_ptr =
-      new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
-        solve_lp<cuopt_int_t, cuopt_float_t>(*op_problem, pdlp_settings));
-    *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
-
-    cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
-
-    return static_cast<cuopt_int_t>(
-      solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    // CPU path: use view directly - solve_lp/solve_mip handle remote vs local conversion
+    // For remote solve, handle may be nullptr (no CUDA)
+    // For local solve with CPU data, handle will be created lazily
+    raft::handle_t* handle_ptr =
+      is_remote_solve_enabled() ? nullptr : problem_and_stream_view->get_handle_ptr();
+
+    if (is_mip) {
+      mip_solver_settings_t<cuopt_int_t, cuopt_float_t>& mip_settings =
+        solver_settings->get_mip_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(true, handle_ptr);
+
+      solution_and_stream_view->mip_solution_ptr = new mip_solution_t<cuopt_int_t, cuopt_float_t>(
+        solve_mip<cuopt_int_t, cuopt_float_t>(handle_ptr, view, mip_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->mip_solution_ptr->get_error_status().get_error_type());
+    } else {
+      pdlp_solver_settings_t<cuopt_int_t, cuopt_float_t>& pdlp_settings =
+        solver_settings->get_pdlp_settings();
+
+      solution_and_stream_view_t* solution_and_stream_view =
+        new solution_and_stream_view_t(false, handle_ptr);
+
+      solution_and_stream_view->lp_solution_ptr =
+        new optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>(
+          solve_lp<cuopt_int_t, cuopt_float_t>(handle_ptr, view, pdlp_settings));
+
+      *solution_ptr = static_cast<cuOptSolution>(solution_and_stream_view);
+
+      cuopt::utilities::printTimestamp("CUOPT_SOLVE_RETURN");
+
+      return static_cast<cuopt_int_t>(
+        solution_and_stream_view->lp_solution_ptr->get_error_status().get_error_type());
+    }
   }
 }
 
@@ -856,24 +1454,34 @@ cuopt_int_t cuOptGetPrimalSolution(cuOptSolution solution, cuopt_float_t* soluti
     mip_solution_t<cuopt_int_t, cuopt_float_t>* mip_solution =
       static_cast<mip_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->mip_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
-    rmm::cuda_stream_view stream_view{};
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (mip_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& solution_values = mip_solution->get_solution();
+      raft::copy(solution_values_ptr,
+                 solution_values.data(),
+                 solution_values.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& solution_values = mip_solution->get_solution_host();
+      std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
+    }
   } else {
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& solution_values =
-      optimization_problem_solution->get_primal_solution();
-    raft::copy(solution_values_ptr,
-               solution_values.data(),
-               solution_values.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& solution_values =
+        optimization_problem_solution->get_primal_solution();
+      raft::copy(solution_values_ptr,
+                 solution_values.data(),
+                 solution_values.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& solution_values =
+        optimization_problem_solution->get_primal_solution_host();
+      std::copy(solution_values.begin(), solution_values.end(), solution_values_ptr);
+    }
   }
   return CUOPT_SUCCESS;
 }
@@ -964,13 +1572,19 @@ cuopt_int_t cuOptGetDualSolution(cuOptSolution solution, cuopt_float_t* dual_sol
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& dual_solution =
-      optimization_problem_solution->get_dual_solution();
-    raft::copy(dual_solution_ptr,
-               dual_solution.data(),
-               dual_solution.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& dual_solution =
+        optimization_problem_solution->get_dual_solution();
+      raft::copy(dual_solution_ptr,
+                 dual_solution.data(),
+                 dual_solution.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& dual_solution =
+        optimization_problem_solution->get_dual_solution_host();
+      std::copy(dual_solution.begin(), dual_solution.end(), dual_solution_ptr);
+    }
     return CUOPT_SUCCESS;
   }
 }
@@ -1005,13 +1619,19 @@ cuopt_int_t cuOptGetReducedCosts(cuOptSolution solution, cuopt_float_t* reduced_
     optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>* optimization_problem_solution =
       static_cast<optimization_problem_solution_t<cuopt_int_t, cuopt_float_t>*>(
         solution_and_stream_view->lp_solution_ptr);
-    const rmm::device_uvector<cuopt_float_t>& reduced_cost =
-      optimization_problem_solution->get_reduced_cost();
-    raft::copy(reduced_cost_ptr,
-               reduced_cost.data(),
-               reduced_cost.size(),
-               solution_and_stream_view->stream_view);
-    solution_and_stream_view->stream_view.synchronize();
+    if (optimization_problem_solution->is_device_memory()) {
+      const rmm::device_uvector<cuopt_float_t>& reduced_cost =
+        optimization_problem_solution->get_reduced_cost();
+      raft::copy(reduced_cost_ptr,
+                 reduced_cost.data(),
+                 reduced_cost.size(),
+                 solution_and_stream_view->stream_view.value());
+      solution_and_stream_view->stream_view->synchronize();
+    } else {
+      const std::vector<cuopt_float_t>& reduced_cost =
+        optimization_problem_solution->get_reduced_cost_host();
+      std::copy(reduced_cost.begin(), reduced_cost.end(), reduced_cost_ptr);
+    }
     return CUOPT_SUCCESS;
   }
 }
diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu
index 829008651..b3d0e73d0 100644
--- a/cpp/src/linear_programming/solve.cu
+++ b/cpp/src/linear_programming/solve.cu
@@ -23,6 +23,7 @@
 #include <cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh>
 #include <cuopt/linear_programming/pdlp/solver_settings.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 
 #include <mps_parser/mps_data_model.hpp>
 #include <utilities/copy_helpers.hpp>
@@ -40,7 +41,8 @@
 #include <raft/core/device_setter.hpp>
 #include <raft/core/handle.hpp>
 
-#include <thread>  // For std::thread
+#include <cstring>  // For std::memcpy
+#include <thread>   // For std::thread
 
 #define CUOPT_LOG_CONDITIONAL_INFO(condition, ...) \
   if ((condition)) { CUOPT_LOG_INFO(__VA_ARGS__); }
@@ -1057,6 +1059,100 @@ cuopt::linear_programming::optimization_problem_t<i_t, f_t> mps_data_model_to_op
   return op_problem;
 }
 
+// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path)
+template <typename i_t, typename f_t>
+static data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  if (mps_data_model.has_quadratic_objective()) {
+    view.set_quadratic_objective_matrix(mps_data_model.get_quadratic_objective_values().data(),
+                                        mps_data_model.get_quadratic_objective_values().size(),
+                                        mps_data_model.get_quadratic_objective_indices().data(),
+                                        mps_data_model.get_quadratic_objective_indices().size(),
+                                        mps_data_model.get_quadratic_objective_offsets().data(),
+                                        mps_data_model.get_quadratic_objective_offsets().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  return view;
+}
+
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t> solve_lp(
   raft::handle_t const* handle_ptr,
@@ -1065,34 +1161,355 @@ optimization_problem_solution_t<i_t, f_t> solve_lp(
   bool problem_checking,
   bool use_pdlp_solver_mode)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  // Create a view pointing to CPU data and delegate to the view-based overload.
+  // The view overload handles local vs remote solve automatically.
+  auto view = create_view_from_mps_data_model(mps_data_model);
+  view.set_is_device_memory(false);  // MPS data is always in CPU memory
+  return solve_lp(handle_ptr, view, settings, problem_checking, use_pdlp_solver_mode);
+}
+
+template <typename i_t, typename f_t>
+optimization_problem_t<i_t, f_t> data_model_view_to_optimization_problem(
+  raft::handle_t const* handle_ptr, const data_model_view_t<i_t, f_t>& view)
+{
+  optimization_problem_t<i_t, f_t> op_problem(handle_ptr);
+  op_problem.set_maximize(view.get_sense());
+
+  op_problem.set_csr_constraint_matrix(view.get_constraint_matrix_values().data(),
+                                       view.get_constraint_matrix_values().size(),
+                                       view.get_constraint_matrix_indices().data(),
+                                       view.get_constraint_matrix_indices().size(),
+                                       view.get_constraint_matrix_offsets().data(),
+                                       view.get_constraint_matrix_offsets().size());
+
+  if (view.get_constraint_bounds().size() != 0) {
+    op_problem.set_constraint_bounds(view.get_constraint_bounds().data(),
+                                     view.get_constraint_bounds().size());
+  }
+  if (view.get_objective_coefficients().size() != 0) {
+    op_problem.set_objective_coefficients(view.get_objective_coefficients().data(),
+                                          view.get_objective_coefficients().size());
+  }
+  op_problem.set_objective_scaling_factor(view.get_objective_scaling_factor());
+  op_problem.set_objective_offset(view.get_objective_offset());
+  if (view.get_variable_lower_bounds().size() != 0) {
+    op_problem.set_variable_lower_bounds(view.get_variable_lower_bounds().data(),
+                                         view.get_variable_lower_bounds().size());
+  }
+  if (view.get_variable_upper_bounds().size() != 0) {
+    op_problem.set_variable_upper_bounds(view.get_variable_upper_bounds().data(),
+                                         view.get_variable_upper_bounds().size());
+  }
+  if (view.get_variable_types().size() != 0) {
+    auto var_types = view.get_variable_types();
+
+    // Check if the pointer is on host or device
+    cudaPointerAttributes attrs;
+    cudaError_t err = cudaPointerGetAttributes(&attrs, var_types.data());
+
+    std::vector<char> host_var_types(var_types.size());
+    if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) {
+      // Source is on GPU - copy to host
+      cudaMemcpy(host_var_types.data(),
+                 var_types.data(),
+                 var_types.size() * sizeof(char),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      // Source is on host (or unregistered) - direct copy
+      cudaGetLastError();  // Clear any error from cudaPointerGetAttributes
+      std::memcpy(host_var_types.data(), var_types.data(), var_types.size() * sizeof(char));
+    }
+
+    std::vector<var_t> enum_variable_types(var_types.size());
+    for (std::size_t i = 0; i < var_types.size(); ++i) {
+      enum_variable_types[i] = host_var_types[i] == 'I' ? var_t::INTEGER : var_t::CONTINUOUS;
+    }
+    op_problem.set_variable_types(enum_variable_types.data(), enum_variable_types.size());
+  }
+
+  if (view.get_row_types().size() != 0) {
+    op_problem.set_row_types(view.get_row_types().data(), view.get_row_types().size());
+  }
+  if (view.get_constraint_lower_bounds().size() != 0) {
+    op_problem.set_constraint_lower_bounds(view.get_constraint_lower_bounds().data(),
+                                           view.get_constraint_lower_bounds().size());
+  }
+  if (view.get_constraint_upper_bounds().size() != 0) {
+    op_problem.set_constraint_upper_bounds(view.get_constraint_upper_bounds().data(),
+                                           view.get_constraint_upper_bounds().size());
+  }
+
+  if (view.get_objective_name().size() != 0) {
+    op_problem.set_objective_name(view.get_objective_name());
+  }
+  if (view.get_problem_name().size() != 0) {
+    op_problem.set_problem_name(view.get_problem_name().data());
+  }
+  if (view.get_variable_names().size() != 0) {
+    op_problem.set_variable_names(view.get_variable_names());
+  }
+  if (view.get_row_names().size() != 0) { op_problem.set_row_names(view.get_row_names()); }
+
+  if (view.has_quadratic_objective()) {
+    // Copy quadratic objective from view to vectors first since we need host data
+    std::vector<f_t> Q_values(view.get_quadratic_objective_values().size());
+    std::vector<i_t> Q_indices(view.get_quadratic_objective_indices().size());
+    std::vector<i_t> Q_offsets(view.get_quadratic_objective_offsets().size());
+
+    // Check if the pointer is on host or device
+    cudaPointerAttributes attrs;
+    cudaError_t err =
+      cudaPointerGetAttributes(&attrs, view.get_quadratic_objective_values().data());
+
+    if (err == cudaSuccess && attrs.type == cudaMemoryTypeDevice) {
+      // Source is on GPU - copy to host
+      cudaMemcpy(Q_values.data(),
+                 view.get_quadratic_objective_values().data(),
+                 Q_values.size() * sizeof(f_t),
+                 cudaMemcpyDeviceToHost);
+      cudaMemcpy(Q_indices.data(),
+                 view.get_quadratic_objective_indices().data(),
+                 Q_indices.size() * sizeof(i_t),
+                 cudaMemcpyDeviceToHost);
+      cudaMemcpy(Q_offsets.data(),
+                 view.get_quadratic_objective_offsets().data(),
+                 Q_offsets.size() * sizeof(i_t),
+                 cudaMemcpyDeviceToHost);
+    } else {
+      // Source is on host - direct copy
+      cudaGetLastError();  // Clear any error from cudaPointerGetAttributes
+      std::memcpy(Q_values.data(),
+                  view.get_quadratic_objective_values().data(),
+                  Q_values.size() * sizeof(f_t));
+      std::memcpy(Q_indices.data(),
+                  view.get_quadratic_objective_indices().data(),
+                  Q_indices.size() * sizeof(i_t));
+      std::memcpy(Q_offsets.data(),
+                  view.get_quadratic_objective_offsets().data(),
+                  Q_offsets.size() * sizeof(i_t));
+    }
+
+    op_problem.set_quadratic_objective_matrix(Q_values.data(),
+                                              Q_values.size(),
+                                              Q_indices.data(),
+                                              Q_indices.size(),
+                                              Q_offsets.data(),
+                                              Q_offsets.size());
+  }
+
+  return op_problem;
+}
+
+// Helper struct to hold CPU copies of GPU data for remote solve
+template <typename i_t, typename f_t>
+struct cpu_problem_data_t {
+  std::vector<f_t> A_values;
+  std::vector<i_t> A_indices;
+  std::vector<i_t> A_offsets;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> constraint_lower_bounds;
+  std::vector<f_t> constraint_upper_bounds;
+  std::vector<f_t> objective_coefficients;
+  std::vector<f_t> variable_lower_bounds;
+  std::vector<f_t> variable_upper_bounds;
+  std::vector<char> variable_types;
+  std::vector<f_t> quadratic_objective_values;
+  std::vector<i_t> quadratic_objective_indices;
+  std::vector<i_t> quadratic_objective_offsets;
+  bool maximize;
+  f_t objective_scaling_factor;
+  f_t objective_offset;
+
+  data_model_view_t<i_t, f_t> create_view() const
+  {
+    data_model_view_t<i_t, f_t> v;
+    v.set_maximize(maximize);
+    v.set_objective_scaling_factor(objective_scaling_factor);
+    v.set_objective_offset(objective_offset);
+
+    if (!A_values.empty()) {
+      v.set_csr_constraint_matrix(A_values.data(),
+                                  A_values.size(),
+                                  A_indices.data(),
+                                  A_indices.size(),
+                                  A_offsets.data(),
+                                  A_offsets.size());
+    }
+    if (!constraint_bounds.empty()) {
+      v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+    }
+    if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) {
+      v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size());
+      v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size());
+    }
+    if (!objective_coefficients.empty()) {
+      v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+    if (!variable_lower_bounds.empty()) {
+      v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+    if (!variable_upper_bounds.empty()) {
+      v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+    if (!variable_types.empty()) {
+      v.set_variable_types(variable_types.data(), variable_types.size());
+    }
+    if (!quadratic_objective_values.empty()) {
+      v.set_quadratic_objective_matrix(quadratic_objective_values.data(),
+                                       quadratic_objective_values.size(),
+                                       quadratic_objective_indices.data(),
+                                       quadratic_objective_indices.size(),
+                                       quadratic_objective_offsets.data(),
+                                       quadratic_objective_offsets.size());
+    }
+    v.set_is_device_memory(false);
+    return v;
+  }
+};
+
+// Helper to copy GPU view data to CPU
+template <typename i_t, typename f_t>
+cpu_problem_data_t<i_t, f_t> copy_view_to_cpu(raft::handle_t const* handle_ptr,
+                                              const data_model_view_t<i_t, f_t>& gpu_view)
+{
+  cpu_problem_data_t<i_t, f_t> cpu_data;
+  auto stream = handle_ptr->get_stream();
+
+  cpu_data.maximize                 = gpu_view.get_sense();
+  cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor();
+  cpu_data.objective_offset         = gpu_view.get_objective_offset();
+
+  auto copy_to_host = [stream](auto& dst_vec, auto src_span) {
+    if (src_span.size() > 0) {
+      dst_vec.resize(src_span.size());
+      raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream);
+    }
+  };
+
+  copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values());
+  copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices());
+  copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets());
+  copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds());
+  copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds());
+  copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds());
+  copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients());
+  copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds());
+  copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds());
+  copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values());
+  copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices());
+  copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets());
+
+  // Variable types need special handling (char array)
+  auto var_types_span = gpu_view.get_variable_types();
+  if (var_types_span.size() > 0) {
+    cpu_data.variable_types.resize(var_types_span.size());
+    cudaMemcpyAsync(cpu_data.variable_types.data(),
+                    var_types_span.data(),
+                    var_types_span.size() * sizeof(char),
+                    cudaMemcpyDeviceToHost,
+                    stream);
+  }
+
+  // Synchronize to ensure all copies are complete
+  cudaStreamSynchronize(stream);
+
+  return cpu_data;
+}
+
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp(raft::handle_t const* handle_ptr,
+                                                   const data_model_view_t<i_t, f_t>& view,
+                                                   pdlp_solver_settings_t<i_t, f_t> const& settings,
+                                                   bool problem_checking,
+                                                   bool use_pdlp_solver_mode)
+{
+  // Initialize logger for this overload (needed for early returns)
+  init_logger_t log(settings.log_file, settings.log_to_console);
+
+  // Check for remote solve configuration first
+  auto remote_config = get_remote_solve_config();
+
+  if (view.is_device_memory()) {
+    if (remote_config.has_value()) {
+      // GPU data + remote solve requested: need valid handle to copy GPU→CPU
+      if (handle_ptr == nullptr) {
+        CUOPT_LOG_ERROR(
+          "[solve_lp] Remote solve requested with GPU data but no CUDA handle. "
+          "This is an internal error - GPU data should not exist without CUDA initialization.");
+        return optimization_problem_solution_t<i_t, f_t>(pdlp_termination_status_t::NumericalError);
+      }
+      CUOPT_LOG_WARN(
+        "[solve_lp] Remote solve requested but data is on GPU. "
+        "Copying to CPU for serialization (performance impact).");
+      auto cpu_data = copy_view_to_cpu(handle_ptr, view);
+      auto cpu_view = cpu_data.create_view();
+
+      CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                     remote_config->host.c_str(),
+                     remote_config->port);
+      // Call the remote solve function with CPU-side view
+      return solve_lp_remote(*remote_config, cpu_view, settings);
+    }
+
+    // Local solve: data already on GPU - convert view to optimization_problem_t and solve
+    auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
+    return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
+  }
+
+  // Data is on CPU
+  if (remote_config.has_value()) {
+    CUOPT_LOG_INFO("[solve_lp] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                   remote_config->host.c_str(),
+                   remote_config->port);
+    // Call the remote solve function
+    return solve_lp_remote(*remote_config, view, settings);
+  }
+
+  // Local solve with CPU data: copy to GPU and solve
+  if (handle_ptr == nullptr) {
+    CUOPT_LOG_ERROR("[solve_lp] Local solve requested but handle_ptr is null.");
+    return optimization_problem_solution_t<i_t, f_t>(
+      cuopt::logic_error("No CUDA handle for CPU->GPU copy", cuopt::error_type_t::RuntimeError));
+  }
+  auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
   return solve_lp(op_problem, settings, problem_checking, use_pdlp_solver_mode);
 }
 
-#define INSTANTIATE(F_TYPE)                                                            \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
-    optimization_problem_t<int, F_TYPE>& op_problem,                                   \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    bool problem_checking,                                                             \
-    bool use_pdlp_solver_mode,                                                         \
-    bool is_batch_mode);                                                               \
-                                                                                       \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                      \
-    raft::handle_t const* handle_ptr,                                                  \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,            \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    bool problem_checking,                                                             \
-    bool use_pdlp_solver_mode);                                                        \
-                                                                                       \
-  template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(          \
-    detail::problem_t<int, F_TYPE>& problem,                                           \
-    pdlp_solver_settings_t<int, F_TYPE> const& settings,                               \
-    const timer_t& timer,                                                              \
-    bool is_batch_mode);                                                               \
-                                                                                       \
-  template optimization_problem_t<int, F_TYPE> mps_data_model_to_optimization_problem( \
-    raft::handle_t const* handle_ptr,                                                  \
-    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);               \
+#define INSTANTIATE(F_TYPE)                                                             \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    optimization_problem_t<int, F_TYPE>& op_problem,                                    \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode,                                                          \
+    bool is_batch_mode);                                                                \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    raft::handle_t const* handle_ptr,                                                   \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model,             \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode);                                                         \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp_with_method(           \
+    detail::problem_t<int, F_TYPE>& problem,                                            \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    const timer_t& timer,                                                               \
+    bool is_batch_mode);                                                                \
+                                                                                        \
+  template optimization_problem_t<int, F_TYPE> mps_data_model_to_optimization_problem(  \
+    raft::handle_t const* handle_ptr,                                                   \
+    const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& data_model);                \
+                                                                                        \
+  template optimization_problem_t<int, F_TYPE> data_model_view_to_optimization_problem( \
+    raft::handle_t const* handle_ptr, const data_model_view_t<int, F_TYPE>& view);      \
+                                                                                        \
+  template optimization_problem_solution_t<int, F_TYPE> solve_lp(                       \
+    raft::handle_t const* handle_ptr,                                                   \
+    const data_model_view_t<int, F_TYPE>& view,                                         \
+    pdlp_solver_settings_t<int, F_TYPE> const& settings,                                \
+    bool problem_checking,                                                              \
+    bool use_pdlp_solver_mode);                                                         \
+                                                                                        \
   template void set_pdlp_solver_mode(pdlp_solver_settings_t<int, F_TYPE> const& settings);
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu
index 1409e7cbf..45ed3dcd3 100644
--- a/cpp/src/linear_programming/solver_solution.cu
+++ b/cpp/src/linear_programming/solver_solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -21,9 +21,10 @@ namespace cuopt::linear_programming {
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   pdlp_termination_status_t termination_status, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
@@ -32,14 +33,41 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   cuopt::logic_error error_status_, rmm::cuda_stream_view stream_view)
-  : primal_solution_{0, stream_view},
-    dual_solution_{0, stream_view},
-    reduced_cost_{0, stream_view},
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     termination_status_(pdlp_termination_status_t::NoTermination),
     error_status_(error_status_)
 {
 }
 
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  pdlp_termination_status_t termination_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>()),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>()),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    termination_status_(termination_status),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  cuopt::logic_error error_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>()),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>()),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    termination_status_(pdlp_termination_status_t::NoTermination),
+    error_status_(error_status)
+{
+}
+
 template <typename i_t, typename f_t>
 optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   rmm::device_uvector<f_t>& final_primal_solution,
@@ -51,15 +79,16 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   const std::vector<std::string>& row_names,
   additional_termination_information_t& termination_stats,
   pdlp_termination_status_t termination_status)
-  : primal_solution_(std::move(final_primal_solution)),
-    dual_solution_(std::move(final_dual_solution)),
-    reduced_cost_(std::move(final_reduced_cost)),
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_primal_solution))),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_dual_solution))),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_reduced_cost))),
+    is_device_memory_(true),
     pdlp_warm_start_data_(std::move(warm_start_data)),
+    termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
     objective_name_(objective_name),
     var_names_(std::move(var_names)),
     row_names_(std::move(row_names)),
-    termination_stats_(std::move(termination_stats)),
-    termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -74,14 +103,15 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   const std::vector<std::string>& row_names,
   additional_termination_information_t& termination_stats,
   pdlp_termination_status_t termination_status)
-  : primal_solution_(std::move(final_primal_solution)),
-    dual_solution_(std::move(final_dual_solution)),
-    reduced_cost_(std::move(final_reduced_cost)),
+  : primal_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_primal_solution))),
+    dual_solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_dual_solution))),
+    reduced_cost_(std::make_unique<rmm::device_uvector<f_t>>(std::move(final_reduced_cost))),
+    is_device_memory_(true),
+    termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
     objective_name_(objective_name),
     var_names_(std::move(var_names)),
     row_names_(std::move(row_names)),
-    termination_stats_(std::move(termination_stats)),
-    termination_status_(termination_status),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -98,14 +128,42 @@ optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
   pdlp_termination_status_t termination_status,
   const raft::handle_t* handler_ptr,
   [[maybe_unused]] bool deep_copy)
-  : primal_solution_(final_primal_solution, handler_ptr->get_stream()),
-    dual_solution_(final_dual_solution, handler_ptr->get_stream()),
-    reduced_cost_(final_reduced_cost, handler_ptr->get_stream()),
+  : primal_solution_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_primal_solution, handler_ptr->get_stream())),
+    dual_solution_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_dual_solution, handler_ptr->get_stream())),
+    reduced_cost_(
+      std::make_unique<rmm::device_uvector<f_t>>(final_reduced_cost, handler_ptr->get_stream())),
+    is_device_memory_(true),
+    termination_status_(termination_status),
+    termination_stats_(termination_stats),
     objective_name_(objective_name),
     var_names_(var_names),
     row_names_(row_names),
-    termination_stats_(termination_stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve with solution data
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t>::optimization_problem_solution_t(
+  std::vector<f_t> primal_solution,
+  std::vector<f_t> dual_solution,
+  std::vector<f_t> reduced_cost,
+  const std::string objective_name,
+  const std::vector<std::string>& var_names,
+  const std::vector<std::string>& row_names,
+  additional_termination_information_t& termination_stats,
+  pdlp_termination_status_t termination_status)
+  : primal_solution_host_(std::make_unique<std::vector<f_t>>(std::move(primal_solution))),
+    dual_solution_host_(std::make_unique<std::vector<f_t>>(std::move(dual_solution))),
+    reduced_cost_host_(std::make_unique<std::vector<f_t>>(std::move(reduced_cost))),
+    is_device_memory_(false),
     termination_status_(termination_status),
+    termination_stats_(std::move(termination_stats)),
+    objective_name_(objective_name),
+    var_names_(var_names),
+    row_names_(row_names),
     error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
 {
 }
@@ -114,31 +172,56 @@ template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::copy_from(
   const raft::handle_t* handle_ptr, const optimization_problem_solution_t<i_t, f_t>& other)
 {
-  // Resize to make sure they are of same size
-  primal_solution_.resize(other.primal_solution_.size(), handle_ptr->get_stream());
-  dual_solution_.resize(other.dual_solution_.size(), handle_ptr->get_stream());
-  reduced_cost_.resize(other.reduced_cost_.size(), handle_ptr->get_stream());
-
-  // Copy the data
-  raft::copy(primal_solution_.data(),
-             other.primal_solution_.data(),
-             primal_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(dual_solution_.data(),
-             other.dual_solution_.data(),
-             dual_solution_.size(),
-             handle_ptr->get_stream());
-  raft::copy(reduced_cost_.data(),
-             other.reduced_cost_.data(),
-             reduced_cost_.size(),
-             handle_ptr->get_stream());
+  is_device_memory_ = other.is_device_memory_;
+
+  if (other.is_device_memory_) {
+    // Copy GPU data
+    if (!primal_solution_) {
+      primal_solution_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+    if (!dual_solution_) {
+      dual_solution_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+    if (!reduced_cost_) {
+      reduced_cost_ = std::make_unique<rmm::device_uvector<f_t>>(0, handle_ptr->get_stream());
+    }
+
+    // Resize to make sure they are of same size
+    primal_solution_->resize(other.primal_solution_->size(), handle_ptr->get_stream());
+    dual_solution_->resize(other.dual_solution_->size(), handle_ptr->get_stream());
+    reduced_cost_->resize(other.reduced_cost_->size(), handle_ptr->get_stream());
+
+    // Copy the data
+    raft::copy(primal_solution_->data(),
+               other.primal_solution_->data(),
+               primal_solution_->size(),
+               handle_ptr->get_stream());
+    raft::copy(dual_solution_->data(),
+               other.dual_solution_->data(),
+               dual_solution_->size(),
+               handle_ptr->get_stream());
+    raft::copy(reduced_cost_->data(),
+               other.reduced_cost_->data(),
+               reduced_cost_->size(),
+               handle_ptr->get_stream());
+    handle_ptr->sync_stream();
+  } else {
+    // Copy CPU data
+    if (!primal_solution_host_) { primal_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+    if (!dual_solution_host_) { dual_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+    if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique<std::vector<f_t>>(); }
+
+    *primal_solution_host_ = *other.primal_solution_host_;
+    *dual_solution_host_   = *other.dual_solution_host_;
+    *reduced_cost_host_    = *other.reduced_cost_host_;
+  }
+
   termination_stats_  = other.termination_stats_;
   termination_status_ = other.termination_status_;
   objective_name_     = other.objective_name_;
   var_names_          = other.var_names_;
   row_names_          = other.row_names_;
   // We do not copy the warm start info. As it is not needed for this purpose.
-  handle_ptr->sync_stream();
 }
 
 template <typename i_t, typename f_t>
@@ -203,18 +286,31 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_file(std::string_view f
            << std::endl;
     return;
   }
+
   std::vector<f_t> primal_solution;
   std::vector<f_t> dual_solution;
   std::vector<f_t> reduced_cost;
-  primal_solution.resize(primal_solution_.size());
-  dual_solution.resize(dual_solution_.size());
-  reduced_cost.resize(reduced_cost_.size());
-  raft::copy(
-    primal_solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  raft::copy(
-    dual_solution.data(), dual_solution_.data(), dual_solution_.size(), stream_view.value());
-  raft::copy(reduced_cost.data(), reduced_cost_.data(), reduced_cost_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    primal_solution.resize(primal_solution_->size());
+    dual_solution.resize(dual_solution_->size());
+    reduced_cost.resize(reduced_cost_->size());
+    raft::copy(primal_solution.data(),
+               primal_solution_->data(),
+               primal_solution_->size(),
+               stream_view.value());
+    raft::copy(
+      dual_solution.data(), dual_solution_->data(), dual_solution_->size(), stream_view.value());
+    raft::copy(
+      reduced_cost.data(), reduced_cost_->data(), reduced_cost_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    primal_solution = *primal_solution_host_;
+    dual_solution   = *dual_solution_host_;
+    reduced_cost    = *reduced_cost_host_;
+  }
 
   myfile << "{ " << std::endl;
   myfile << "\t\"Termination reason\" : \"" << get_termination_status_string() << "\","
@@ -305,35 +401,78 @@ f_t optimization_problem_solution_t<i_t, f_t>::get_dual_objective_value() const
   return termination_stats_.dual_objective;
 }
 
+template <typename i_t, typename f_t>
+bool optimization_problem_solution_t<i_t, f_t>::is_device_memory() const
+{
+  return is_device_memory_;
+}
+
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
 {
-  return primal_solution_;
+  return *primal_solution_;
 }
 
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution()
   const
 {
-  return primal_solution_;
+  return *primal_solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution()
 {
-  return dual_solution_;
+  return *dual_solution_;
 }
 
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution() const
 {
-  return dual_solution_;
+  return *dual_solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost()
 {
-  return reduced_cost_;
+  return *reduced_cost_;
+}
+
+// Host (CPU) getters
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution_host()
+{
+  return *primal_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_primal_solution_host() const
+{
+  return *primal_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution_host()
+{
+  return *dual_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_dual_solution_host() const
+{
+  return *dual_solution_host_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost_host()
+{
+  return *reduced_cost_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& optimization_problem_solution_t<i_t, f_t>::get_reduced_cost_host() const
+{
+  return *reduced_cost_host_;
 }
 
 template <typename i_t, typename f_t>
@@ -362,6 +501,136 @@ optimization_problem_solution_t<i_t, f_t>::get_pdlp_warm_start_data()
   return pdlp_warm_start_data_;
 }
 
+//============================================================================
+// Setters for host solution data
+//============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_primal_solution_host(std::vector<f_t> solution)
+{
+  primal_solution_host_ = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_     = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_dual_solution_host(std::vector<f_t> solution)
+{
+  dual_solution_host_ = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_   = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_reduced_cost_host(std::vector<f_t> reduced_cost)
+{
+  reduced_cost_host_ = std::make_unique<std::vector<f_t>>(std::move(reduced_cost));
+  is_device_memory_  = false;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_termination_stats(
+  const additional_termination_information_t& stats)
+{
+  termination_stats_ = stats;
+}
+
+//============================================================================
+// Getters for termination statistics
+//============================================================================
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_l2_primal_residual() const
+{
+  return termination_stats_.l2_primal_residual;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_l2_dual_residual() const
+{
+  return termination_stats_.l2_dual_residual;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_primal_objective() const
+{
+  return termination_stats_.primal_objective;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_dual_objective() const
+{
+  return termination_stats_.dual_objective;
+}
+
+template <typename i_t, typename f_t>
+f_t optimization_problem_solution_t<i_t, f_t>::get_gap() const
+{
+  return termination_stats_.gap;
+}
+
+template <typename i_t, typename f_t>
+i_t optimization_problem_solution_t<i_t, f_t>::get_nb_iterations() const
+{
+  return termination_stats_.number_of_steps_taken;
+}
+
+template <typename i_t, typename f_t>
+bool optimization_problem_solution_t<i_t, f_t>::get_solved_by_pdlp() const
+{
+  return solved_by_pdlp_;
+}
+
+//============================================================================
+// Setters for termination statistics
+//============================================================================
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_l2_primal_residual(f_t value)
+{
+  termination_stats_.l2_primal_residual = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_l2_dual_residual(f_t value)
+{
+  termination_stats_.l2_dual_residual = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_primal_objective(f_t value)
+{
+  termination_stats_.primal_objective = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_dual_objective(f_t value)
+{
+  termination_stats_.dual_objective = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_gap(f_t value)
+{
+  termination_stats_.gap = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_nb_iterations(i_t value)
+{
+  termination_stats_.number_of_steps_taken = value;
+}
+
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::set_solved_by_pdlp(bool value)
+{
+  solved_by_pdlp_ = value;
+}
+
+template <typename i_t, typename f_t>
+std::string optimization_problem_solution_t<i_t, f_t>::get_error_string() const
+{
+  return error_status_.what();
+}
+
 template <typename i_t, typename f_t>
 void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
   std::string_view filename, rmm::cuda_stream_view stream_view) const
@@ -374,14 +643,73 @@ void optimization_problem_solution_t<i_t, f_t>::write_to_sol_file(
 
   auto objective_value = get_objective_value();
   std::vector<f_t> solution;
-  solution.resize(primal_solution_.size());
-  raft::copy(
-    solution.data(), primal_solution_.data(), primal_solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    solution.resize(primal_solution_->size());
+    raft::copy(
+      solution.data(), primal_solution_->data(), primal_solution_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    solution = *primal_solution_host_;
+  }
+
   solution_writer_t::write_solution_to_sol_file(
     std::string(filename), status, objective_value, var_names_, solution);
 }
 
+template <typename i_t, typename f_t>
+void optimization_problem_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (!is_device_memory_) {
+    // Already on CPU, nothing to do
+    return;
+  }
+
+  // Initialize host storage if needed
+  if (!primal_solution_host_) { primal_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+  if (!dual_solution_host_) { dual_solution_host_ = std::make_unique<std::vector<f_t>>(); }
+  if (!reduced_cost_host_) { reduced_cost_host_ = std::make_unique<std::vector<f_t>>(); }
+
+  // Copy primal solution
+  if (primal_solution_ && primal_solution_->size() > 0) {
+    primal_solution_host_->resize(primal_solution_->size());
+    raft::copy(primal_solution_host_->data(),
+               primal_solution_->data(),
+               primal_solution_->size(),
+               stream_view.value());
+  }
+
+  // Copy dual solution
+  if (dual_solution_ && dual_solution_->size() > 0) {
+    dual_solution_host_->resize(dual_solution_->size());
+    raft::copy(dual_solution_host_->data(),
+               dual_solution_->data(),
+               dual_solution_->size(),
+               stream_view.value());
+  }
+
+  // Copy reduced cost
+  if (reduced_cost_ && reduced_cost_->size() > 0) {
+    reduced_cost_host_->resize(reduced_cost_->size());
+    raft::copy(reduced_cost_host_->data(),
+               reduced_cost_->data(),
+               reduced_cost_->size(),
+               stream_view.value());
+  }
+
+  // Synchronize to ensure copies are complete
+  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  // Clear GPU storage to free memory
+  primal_solution_.reset();
+  dual_solution_.reset();
+  reduced_cost_.reset();
+
+  is_device_memory_ = false;
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class optimization_problem_solution_t<int, float>;
 #endif
diff --git a/cpp/src/linear_programming/utilities/cuopt_remote.proto b/cpp/src/linear_programming/utilities/cuopt_remote.proto
new file mode 100644
index 000000000..b2f46a432
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/cuopt_remote.proto
@@ -0,0 +1,317 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+syntax = "proto3";
+
+package cuopt.remote;
+
+// Protocol version and metadata
+message RequestHeader {
+  uint32 version = 1;           // Protocol version (currently 1)
+  ProblemType problem_type = 2; // LP or MIP
+  IndexType index_type = 3;     // INT32 or INT64
+  FloatType float_type = 4;     // FLOAT32 or DOUBLE
+}
+
+enum ProblemType {
+  LP = 0;
+  MIP = 1;
+}
+
+enum IndexType {
+  INT32 = 0;
+  INT64 = 1;
+}
+
+enum FloatType {
+  FLOAT32 = 0;
+  DOUBLE = 1;
+}
+
+// Optimization problem representation (field names match data_model_view_t)
+message OptimizationProblem {
+  // Problem metadata
+  string problem_name = 1;
+  string objective_name = 2;
+  bool maximize = 3;
+  double objective_scaling_factor = 4;
+  double objective_offset = 5;
+  // Field 6 reserved (was problem_category, now inferred from variable_types)
+
+  // Variable and row names (optional)
+  repeated string variable_names = 7;
+  repeated string row_names = 8;
+
+  // Constraint matrix A in CSR format (names match data_model_view_t: A_, A_indices_, A_offsets_)
+  repeated double A = 10;
+  repeated int32 A_indices = 11;
+  repeated int32 A_offsets = 12;
+
+  // Problem vectors (names match data_model_view_t: c_, b_)
+  repeated double c = 20;              // objective coefficients
+  repeated double b = 21;              // constraint bounds (RHS)
+  repeated double variable_lower_bounds = 22;
+  repeated double variable_upper_bounds = 23;
+
+  // Constraint bounds (alternative to b + row_types)
+  repeated double constraint_lower_bounds = 24;
+  repeated double constraint_upper_bounds = 25;
+  bytes row_types = 26;  // char array: 'E' (=), 'L' (<=), 'G' (>=), 'N' (objective)
+
+  // Variable types (matches data_model_view_t: variable_types_)
+  bytes variable_types = 30;  // char array: 'C' (continuous), 'I' (integer), 'B' (binary)
+
+  // Initial solutions (matches data_model_view_t)
+  repeated double initial_primal_solution = 40;
+  repeated double initial_dual_solution = 41;
+
+  // Quadratic objective matrix Q in CSR format for QPS (matches data_model_view_t: Q_objective_)
+  repeated double Q_values = 50;
+  repeated int32 Q_indices = 51;
+  repeated int32 Q_offsets = 52;
+}
+
+// PDLP solver mode enum (matches cuOpt pdlp_solver_mode_t)
+// Matches cuOpt pdlp_solver_mode_t enum values
+enum PDLPSolverMode {
+  Stable1 = 0;
+  Stable2 = 1;
+  Methodical1 = 2;
+  Fast1 = 3;
+  Stable3 = 4;
+}
+
+// Matches cuOpt method_t enum values
+enum LPMethod {
+  Concurrent = 0;
+  PDLP = 1;
+  DualSimplex = 2;
+  Barrier = 3;
+}
+
+// PDLP solver settings (field names match cuOpt Python/C++ API)
+message PDLPSolverSettings {
+  // Termination tolerances
+  double absolute_gap_tolerance = 1;
+  double relative_gap_tolerance = 2;
+  double primal_infeasible_tolerance = 3;
+  double dual_infeasible_tolerance = 4;
+  double absolute_dual_tolerance = 5;
+  double relative_dual_tolerance = 6;
+  double absolute_primal_tolerance = 7;
+  double relative_primal_tolerance = 8;
+
+  // Limits
+  double time_limit = 10;
+  // Iteration limit. Sentinel: set to -1 to mean "unset/use server defaults".
+  // Note: proto3 numeric fields default to 0 when omitted, so clients should
+  // explicitly use -1 (or a positive value) to avoid accidentally requesting 0 iterations.
+  int64 iteration_limit = 11;
+
+  // Solver configuration
+  bool log_to_console = 20;
+  bool detect_infeasibility = 21;
+  bool strict_infeasibility = 22;
+  PDLPSolverMode pdlp_solver_mode = 23;
+  LPMethod method = 24;
+  bool presolve = 25;
+  bool dual_postsolve = 26;
+  bool crossover = 27;
+  int32 num_gpus = 28;
+
+  // Advanced options
+  bool per_constraint_residual = 30;
+  bool cudss_deterministic = 31;
+  int32 folding = 32;
+  int32 augmented = 33;
+  int32 dualize = 34;
+  int32 ordering = 35;
+  int32 barrier_dual_initial_point = 36;
+  bool eliminate_dense_columns = 37;
+  bool save_best_primal_so_far = 38;
+  bool first_primal_feasible = 39;
+
+  // Warm start data (if provided)
+  PDLPWarmStartData warm_start_data = 50;
+}
+
+message PDLPWarmStartData {
+  repeated double current_primal_solution = 1;
+  repeated double current_dual_solution = 2;
+  repeated double initial_primal_average = 3;
+  repeated double initial_dual_average = 4;
+  repeated double current_ATY = 5;
+  repeated double sum_primal_solutions = 6;
+  repeated double sum_dual_solutions = 7;
+  repeated double last_restart_duality_gap_primal_solution = 8;
+  repeated double last_restart_duality_gap_dual_solution = 9;
+
+  double initial_primal_weight = 10;
+  double initial_step_size = 11;
+  int32 total_pdlp_iterations = 12;
+  int32 total_pdhg_iterations = 13;
+  double last_candidate_kkt_score = 14;
+  double last_restart_kkt_score = 15;
+  double sum_solution_weight = 16;
+  int32 iterations_since_last_restart = 17;
+}
+
+// MIP solver settings (field names match cuOpt Python/C++ API)
+message MIPSolverSettings {
+  // Limits
+  double time_limit = 1;
+
+  // Tolerances
+  double relative_mip_gap = 2;
+  double absolute_mip_gap = 3;
+  double integrality_tolerance = 4;
+  double absolute_tolerance = 5;
+  double relative_tolerance = 6;
+  double presolve_absolute_tolerance = 7;
+
+  // Solver configuration
+  bool log_to_console = 10;
+  bool heuristics_only = 11;
+  int32 num_cpu_threads = 12;
+  int32 num_gpus = 13;
+  bool presolve = 14;
+  bool mip_scaling = 15;
+}
+
+// LP solve request
+message SolveLPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  PDLPSolverSettings settings = 3;
+}
+
+// MIP solve request
+message SolveMIPRequest {
+  RequestHeader header = 1;
+  OptimizationProblem problem = 2;
+  MIPSolverSettings settings = 3;
+  optional bool enable_incumbents = 4;
+}
+
+// LP solution
+message LPSolution {
+  // Solution vectors
+  repeated double primal_solution = 1;
+  repeated double dual_solution = 2;
+  repeated double reduced_cost = 3;
+
+  // Warm start data for next solve
+  PDLPWarmStartData warm_start_data = 4;
+
+  // Termination information
+  PDLPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  // Solution statistics
+  double l2_primal_residual = 20;
+  double l2_dual_residual = 21;
+  double primal_objective = 22;
+  double dual_objective = 23;
+  double gap = 24;
+  int32 nb_iterations = 25;
+  double solve_time = 26;
+  bool solved_by_pdlp = 27;
+}
+
+enum PDLPTerminationStatus {
+  PDLP_NO_TERMINATION = 0;
+  PDLP_NUMERICAL_ERROR = 1;
+  PDLP_OPTIMAL = 2;
+  PDLP_PRIMAL_INFEASIBLE = 3;
+  PDLP_DUAL_INFEASIBLE = 4;
+  PDLP_ITERATION_LIMIT = 5;
+  PDLP_TIME_LIMIT = 6;
+  PDLP_CONCURRENT_LIMIT = 7;
+  PDLP_PRIMAL_FEASIBLE = 8;
+}
+
+// MIP solution
+message MIPSolution {
+  repeated double solution = 1;
+
+  MIPTerminationStatus termination_status = 10;
+  string error_message = 11;
+
+  double objective = 20;
+  double mip_gap = 21;
+  double solution_bound = 22;
+  double total_solve_time = 23;
+  double presolve_time = 24;
+  double max_constraint_violation = 25;
+  double max_int_violation = 26;
+  double max_variable_bound_violation = 27;
+  int32 nodes = 28;
+  int32 simplex_iterations = 29;
+}
+
+enum MIPTerminationStatus {
+  MIP_NO_TERMINATION = 0;
+  MIP_OPTIMAL = 1;
+  MIP_FEASIBLE_FOUND = 2;
+  MIP_INFEASIBLE = 3;
+  MIP_UNBOUNDED = 4;
+  MIP_TIME_LIMIT = 5;
+}
+
+// Job status for async operations
+enum JobStatus {
+  QUEUED = 0;           // Job submitted, waiting in queue
+  PROCESSING = 1;       // Job currently being solved
+  COMPLETED = 2;        // Job completed successfully
+  FAILED = 3;           // Job failed with error
+  NOT_FOUND = 4;        // Job ID not found
+  CANCELLED = 5;        // Job was cancelled by user
+}
+
+// Response for job submission
+message SubmitResponse {
+  ResponseStatus status = 1;
+  bytes job_id = 2;           // Unique job identifier (bytes to avoid UTF-8 validation warnings)
+  string message = 3;         // Success/error message
+}
+
+// Response for status check
+message StatusResponse {
+  JobStatus job_status = 1;
+  string message = 2;
+  double progress = 3;        // 0.0-1.0 (future enhancement)
+  int64 result_size_bytes = 4;    // Size of result payload when COMPLETED (0 if unknown)
+  int64 max_message_bytes = 5;    // Server gRPC max message size (-1 = unlimited)
+}
+
+// Response for get result
+message ResultResponse {
+  ResponseStatus status = 1;
+  string error_message = 2;
+
+  oneof solution {
+    LPSolution lp_solution = 10;
+    MIPSolution mip_solution = 11;
+  }
+}
+
+// Response for delete
+message DeleteResponse {
+  ResponseStatus status = 1;
+  string message = 2;
+}
+
+// Response for cancel job
+message CancelResponse {
+  ResponseStatus status = 1;
+  string message = 2;
+  JobStatus job_status = 3;       // Status of job after cancel attempt
+}
+
+enum ResponseStatus {
+  SUCCESS = 0;
+  ERROR_INVALID_REQUEST = 1;
+  ERROR_SOLVE_FAILED = 2;
+  ERROR_INTERNAL = 3;
+  ERROR_NOT_FOUND = 4;
+}
diff --git a/cpp/src/linear_programming/utilities/cuopt_remote_service.proto b/cpp/src/linear_programming/utilities/cuopt_remote_service.proto
new file mode 100644
index 000000000..3d2d61d15
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/cuopt_remote_service.proto
@@ -0,0 +1,245 @@
+// SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+syntax = "proto3";
+
+package cuopt.remote;
+
+// Import the existing message definitions
+import "cuopt_remote.proto";
+
+// =============================================================================
+// gRPC Service Definition
+// =============================================================================
+
+service CuOptRemoteService {
+  // -------------------------
+  // Async Job Management
+  // -------------------------
+
+  // Submit a new LP or MIP solve job (returns immediately with job_id)
+  rpc SubmitJob(SubmitJobRequest) returns (SubmitJobResponse);
+
+  // Upload a serialized SolveLPRequest / SolveMIPRequest payload in chunks (streaming),
+  // then submit it as a job.
+  //
+  // This avoids gRPC unary message size limits and allows resumable uploads via
+  // explicit offsets/acks (client can reconnect and continue from committed_size).
+  rpc UploadAndSubmit(stream UploadJobRequest) returns (stream UploadJobResponse);
+
+  // Check the status of a submitted job
+  rpc CheckStatus(StatusRequest) returns (StatusResponse);
+
+  // Get the result of a completed job
+  rpc GetResult(GetResultRequest) returns (ResultResponse);
+
+  // Stream the result of a completed job in chunks (effectively unlimited total size).
+  // Each chunk message must still fit within gRPC per-message limits.
+  rpc StreamResult(GetResultRequest) returns (stream ResultChunk);
+
+  // Delete a result from server memory
+  rpc DeleteResult(DeleteRequest) returns (DeleteResponse);
+
+  // Cancel a queued or running job
+  rpc CancelJob(CancelRequest) returns (CancelResponse);
+
+  // Wait for a job to complete and return result (blocking)
+  rpc WaitForResult(WaitRequest) returns (ResultResponse);
+
+  // -------------------------
+  // Log Streaming
+  // -------------------------
+
+  // Stream log messages as they are produced (server-side streaming)
+  // Client receives log lines in real-time until job completes or stream is cancelled
+  rpc StreamLogs(StreamLogsRequest) returns (stream LogMessage);
+
+  // -------------------------
+  // Incumbent Solutions
+  // -------------------------
+
+  // Get any available incumbent solutions since a given index.
+  rpc GetIncumbents(IncumbentRequest) returns (IncumbentResponse);
+
+  // -------------------------
+  // Synchronous Operations
+  // -------------------------
+
+  // (SolveSync removed - use SubmitJob + WaitForResult (+ StreamLogs/CancelJob) instead)
+}
+
+// =============================================================================
+// Request Messages
+// =============================================================================
+
+// Request to submit a new job
+message SubmitJobRequest {
+  oneof job_data {
+    SolveLPRequest lp_request = 1;
+    SolveMIPRequest mip_request = 2;
+  }
+}
+
+// Response when job is submitted
+message SubmitJobResponse {
+  string job_id = 1;           // Unique job identifier
+  string message = 2;          // Optional message
+}
+
+// =============================================================================
+// Streaming Upload (Chunked)
+// =============================================================================
+
+message UploadStart {
+  // Client-provided ID to support resume. If empty, server will generate one.
+  string upload_id = 1;
+
+  // Payload problem type (LP or MIP).
+  ProblemType problem_type = 2;
+
+  // If true, server will attempt to resume an existing upload_id (append).
+  bool resume = 3;
+
+  // Optional total payload size hint (bytes). Can be 0 if unknown.
+  int64 total_size = 4;
+}
+
+message UploadChunk {
+  // Must match upload_id from UploadStart.
+  string upload_id = 1;
+
+  // Byte offset of this chunk in the overall payload. Server currently requires
+  // strictly sequential appends: offset must equal the server's committed_size.
+  int64 offset = 2;
+
+  // Raw bytes of the serialized SolveLPRequest / SolveMIPRequest.
+  bytes data = 3;
+}
+
+message UploadFinish {
+  // Must match upload_id from UploadStart.
+  string upload_id = 1;
+}
+
+message UploadJobRequest {
+  oneof msg {
+    UploadStart start = 1;
+    UploadChunk chunk = 2;
+    UploadFinish finish = 3;
+  }
+}
+
+message UploadAck {
+  string upload_id = 1;
+  int64 committed_size = 2;
+  int64 max_message_bytes = 3;  // Server gRPC max message size (-1 = unlimited)
+}
+
+message UploadError {
+  string upload_id = 1;
+  string message = 2;
+  int64 committed_size = 3;
+  int64 max_message_bytes = 4;  // Server gRPC max message size (-1 = unlimited)
+}
+
+message UploadJobResponse {
+  oneof msg {
+    UploadAck ack = 1;
+    SubmitJobResponse submit = 2;
+    UploadError error = 3;
+  }
+}
+
+// Request to check job status
+message StatusRequest {
+  string job_id = 1;
+}
+
+// Request to get result
+message GetResultRequest {
+  string job_id = 1;
+}
+
+// Chunked result payload (raw serialized solution bytes).
+// Client reassembles all chunks (by offset) and then parses as:
+// - cuopt.remote.LPSolution (if LP) OR cuopt.remote.MIPSolution (if MIP)
+message ResultChunk {
+  string job_id = 1;
+  int64 offset = 2;
+  bytes data = 3;
+  bool done = 4;
+  string error_message = 5;
+}
+
+// Request to delete result
+message DeleteRequest {
+  string job_id = 1;
+}
+
+// DeleteResponse is defined in cuopt_remote.proto (imported above)
+
+// Request to cancel job
+message CancelRequest {
+  string job_id = 1;
+}
+
+// CancelResponse is defined in cuopt_remote.proto (imported above)
+
+// Request to wait for result (blocking)
+message WaitRequest {
+  string job_id = 1;
+}
+
+// Request to stream logs
+message StreamLogsRequest {
+  string job_id = 1;
+  int64 from_byte = 2;         // Optional: start from this byte offset
+}
+
+// Individual log message (streamed)
+message LogMessage {
+  string line = 1;             // Single log line
+  int64 byte_offset = 2;       // Byte offset of this line in log file
+  bool job_complete = 3;       // True if this is the last message (job done)
+}
+
+// =============================================================================
+// Incumbent Solutions
+// =============================================================================
+
+message IncumbentRequest {
+  bytes job_id = 1;
+  int64 from_index = 2;        // Return incumbents starting from this index
+  int32 max_count = 3;         // Optional limit (0 or negative => no limit)
+}
+
+message Incumbent {
+  int64 index = 1;
+  double objective = 2;
+  repeated double assignment = 3;
+  bytes job_id = 4;
+}
+
+message IncumbentResponse {
+  repeated Incumbent incumbents = 1;
+  int64 next_index = 2;        // Next index the client should request
+  bool job_complete = 3;       // True if job is complete (no more incumbents)
+}
+
+// =============================================================================
+// Notes on gRPC Status Codes
+// =============================================================================
+//
+// gRPC uses standard status codes instead of custom ResponseStatus enum:
+//   OK (0) - Success
+//   CANCELLED (1) - Operation was cancelled
+//   UNKNOWN (2) - Unknown error
+//   INVALID_ARGUMENT (3) - Invalid request
+//   DEADLINE_EXCEEDED (4) - Timeout
+//   NOT_FOUND (5) - Job ID not found
+//   ALREADY_EXISTS (6) - Job already exists
+//   RESOURCE_EXHAUSTED (8) - Queue full, out of memory, etc.
+//   INTERNAL (13) - Internal server error
+//   UNAVAILABLE (14) - Server unavailable
+//
+// Errors are returned via gRPC Status with a message, not in response message.
diff --git a/cpp/src/linear_programming/utilities/cython_solve.cu b/cpp/src/linear_programming/utilities/cython_solve.cu
index f49e2057b..ebff422a7 100644
--- a/cpp/src/linear_programming/utilities/cython_solve.cu
+++ b/cpp/src/linear_programming/utilities/cython_solve.cu
@@ -10,6 +10,7 @@
 #include <cuopt/linear_programming/solve.hpp>
 #include <cuopt/linear_programming/solver_settings.hpp>
 #include <cuopt/linear_programming/utilities/cython_solve.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
 #include <mip/logger.hpp>
 #include <mps_parser/data_model_view.hpp>
 #include <mps_parser/mps_data_model.hpp>
@@ -142,47 +143,54 @@ linear_programming_ret_t call_solve_lp(
   const bool use_pdlp_solver_mode = true;
   auto solution                   = cuopt::linear_programming::solve_lp(
     op_problem, solver_settings, problem_checking, use_pdlp_solver_mode, is_batch_mode);
-  linear_programming_ret_t lp_ret{
-    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release()),
-    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_dual_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_primal_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().initial_dual_average_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().current_ATY_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_primal_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().sum_dual_solutions_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release()),
-    std::make_unique<rmm::device_buffer>(
-      solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release()),
-    solution.get_pdlp_warm_start_data().initial_primal_weight_,
-    solution.get_pdlp_warm_start_data().initial_step_size_,
-    solution.get_pdlp_warm_start_data().total_pdlp_iterations_,
-    solution.get_pdlp_warm_start_data().total_pdhg_iterations_,
-    solution.get_pdlp_warm_start_data().last_candidate_kkt_score_,
-    solution.get_pdlp_warm_start_data().last_restart_kkt_score_,
-    solution.get_pdlp_warm_start_data().sum_solution_weight_,
-    solution.get_pdlp_warm_start_data().iterations_since_last_restart_,
-    solution.get_termination_status(),
-    solution.get_error_status().get_error_type(),
-    solution.get_error_status().what(),
-    solution.get_additional_termination_information().l2_primal_residual,
-    solution.get_additional_termination_information().l2_dual_residual,
-    solution.get_additional_termination_information().primal_objective,
-    solution.get_additional_termination_information().dual_objective,
-    solution.get_additional_termination_information().gap,
-    solution.get_additional_termination_information().number_of_steps_taken,
-    solution.get_additional_termination_information().solve_time,
-    solution.get_additional_termination_information().solved_by_pdlp};
+  linear_programming_ret_t lp_ret;
+  lp_ret.primal_solution_ =
+    std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release());
+  lp_ret.dual_solution_ =
+    std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release());
+  lp_ret.reduced_cost_ =
+    std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release());
+  lp_ret.is_device_memory_ = true;
+
+  lp_ret.current_primal_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_primal_solution_.release());
+  lp_ret.current_dual_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_dual_solution_.release());
+  lp_ret.initial_primal_average_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().initial_primal_average_.release());
+  lp_ret.initial_dual_average_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().initial_dual_average_.release());
+  lp_ret.current_ATY_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().current_ATY_.release());
+  lp_ret.sum_primal_solutions_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().sum_primal_solutions_.release());
+  lp_ret.sum_dual_solutions_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().sum_dual_solutions_.release());
+  lp_ret.last_restart_duality_gap_primal_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().last_restart_duality_gap_primal_solution_.release());
+  lp_ret.last_restart_duality_gap_dual_solution_ = std::make_unique<rmm::device_buffer>(
+    solution.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.release());
+  lp_ret.initial_primal_weight_    = solution.get_pdlp_warm_start_data().initial_primal_weight_;
+  lp_ret.initial_step_size_        = solution.get_pdlp_warm_start_data().initial_step_size_;
+  lp_ret.total_pdlp_iterations_    = solution.get_pdlp_warm_start_data().total_pdlp_iterations_;
+  lp_ret.total_pdhg_iterations_    = solution.get_pdlp_warm_start_data().total_pdhg_iterations_;
+  lp_ret.last_candidate_kkt_score_ = solution.get_pdlp_warm_start_data().last_candidate_kkt_score_;
+  lp_ret.last_restart_kkt_score_   = solution.get_pdlp_warm_start_data().last_restart_kkt_score_;
+  lp_ret.sum_solution_weight_      = solution.get_pdlp_warm_start_data().sum_solution_weight_;
+  lp_ret.iterations_since_last_restart_ =
+    solution.get_pdlp_warm_start_data().iterations_since_last_restart_;
+
+  lp_ret.termination_status_ = solution.get_termination_status();
+  lp_ret.error_status_       = solution.get_error_status().get_error_type();
+  lp_ret.error_message_      = solution.get_error_status().what();
+  lp_ret.l2_primal_residual_ = solution.get_additional_termination_information().l2_primal_residual;
+  lp_ret.l2_dual_residual_   = solution.get_additional_termination_information().l2_dual_residual;
+  lp_ret.primal_objective_   = solution.get_additional_termination_information().primal_objective;
+  lp_ret.dual_objective_     = solution.get_additional_termination_information().dual_objective;
+  lp_ret.gap_                = solution.get_additional_termination_information().gap;
+  lp_ret.nb_iterations_  = solution.get_additional_termination_information().number_of_steps_taken;
+  lp_ret.solve_time_     = solution.get_additional_termination_information().solve_time;
+  lp_ret.solved_by_pdlp_ = solution.get_additional_termination_information().solved_by_pdlp;
 
   return lp_ret;
 }
@@ -205,20 +213,22 @@ mip_ret_t call_solve_mip(
     error_type_t::ValidationError,
     "MIP solve cannot be called on an LP problem!");
   auto solution = cuopt::linear_programming::solve_mip(op_problem, solver_settings);
-  mip_ret_t mip_ret{std::make_unique<rmm::device_buffer>(solution.get_solution().release()),
-                    solution.get_termination_status(),
-                    solution.get_error_status().get_error_type(),
-                    solution.get_error_status().what(),
-                    solution.get_objective_value(),
-                    solution.get_mip_gap(),
-                    solution.get_solution_bound(),
-                    solution.get_total_solve_time(),
-                    solution.get_presolve_time(),
-                    solution.get_max_constraint_violation(),
-                    solution.get_max_int_violation(),
-                    solution.get_max_variable_bound_violation(),
-                    solution.get_num_nodes(),
-                    solution.get_num_simplex_iterations()};
+  mip_ret_t mip_ret;
+  mip_ret.solution_ = std::make_unique<rmm::device_buffer>(solution.get_solution().release());
+  mip_ret.is_device_memory_             = true;
+  mip_ret.termination_status_           = solution.get_termination_status();
+  mip_ret.error_status_                 = solution.get_error_status().get_error_type();
+  mip_ret.error_message_                = solution.get_error_status().what();
+  mip_ret.objective_                    = solution.get_objective_value();
+  mip_ret.mip_gap_                      = solution.get_mip_gap();
+  mip_ret.solution_bound_               = solution.get_solution_bound();
+  mip_ret.total_solve_time_             = solution.get_total_solve_time();
+  mip_ret.presolve_time_                = solution.get_presolve_time();
+  mip_ret.max_constraint_violation_     = solution.get_max_constraint_violation();
+  mip_ret.max_int_violation_            = solution.get_max_int_violation();
+  mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation();
+  mip_ret.nodes_                        = solution.get_num_nodes();
+  mip_ret.simplex_iterations_           = solution.get_num_simplex_iterations();
   return mip_ret;
 }
 
@@ -228,9 +238,113 @@ std::unique_ptr<solver_ret_t> call_solve(
   unsigned int flags,
   bool is_batch_mode)
 {
+  // Check if remote solve is configured FIRST (before any CUDA operations)
+  if (linear_programming::is_remote_solve_enabled()) {
+    // Data coming from Python is in CPU memory - mark it as such
+    data_model->set_is_device_memory(false);
+
+    solver_ret_t response;
+
+    // Determine if LP or MIP based on variable types
+    bool is_mip    = false;
+    auto var_types = data_model->get_variable_types();
+    for (size_t i = 0; i < var_types.size(); ++i) {
+      if (var_types.data()[i] != 'C') {
+        is_mip = true;
+        break;
+      }
+    }
+
+    if (!is_mip) {
+      // LP: call solve_lp with nullptr handle - remote solve doesn't need GPU
+      auto solution =
+        linear_programming::solve_lp(nullptr, *data_model, solver_settings->get_pdlp_settings());
+
+      // Convert solution to linear_programming_ret_t
+      auto term_info = solution.get_additional_termination_information();
+      linear_programming_ret_t lp_ret;
+
+      if (solution.is_device_memory()) {
+        // GPU data (shouldn't happen for remote solve, but handle gracefully)
+        lp_ret.primal_solution_ =
+          std::make_unique<rmm::device_buffer>(solution.get_primal_solution().release());
+        lp_ret.dual_solution_ =
+          std::make_unique<rmm::device_buffer>(solution.get_dual_solution().release());
+        lp_ret.reduced_cost_ =
+          std::make_unique<rmm::device_buffer>(solution.get_reduced_cost().release());
+        lp_ret.is_device_memory_ = true;
+      } else {
+        // CPU data from remote solve - avoid device buffer allocations so CPU-only
+        // clients don't initialize CUDA.
+        lp_ret.primal_solution_host_ = std::move(solution.get_primal_solution_host());
+        lp_ret.dual_solution_host_   = std::move(solution.get_dual_solution_host());
+        lp_ret.reduced_cost_host_    = std::move(solution.get_reduced_cost_host());
+        lp_ret.is_device_memory_     = false;
+      }
+      lp_ret.initial_primal_weight_         = 0.0;
+      lp_ret.initial_step_size_             = 0.0;
+      lp_ret.total_pdlp_iterations_         = 0;
+      lp_ret.total_pdhg_iterations_         = 0;
+      lp_ret.last_candidate_kkt_score_      = 0.0;
+      lp_ret.last_restart_kkt_score_        = 0.0;
+      lp_ret.sum_solution_weight_           = 0.0;
+      lp_ret.iterations_since_last_restart_ = 0;
+
+      lp_ret.termination_status_ = solution.get_termination_status();
+      lp_ret.error_status_       = solution.get_error_status().get_error_type();
+      lp_ret.error_message_      = solution.get_error_status().what();
+      lp_ret.l2_primal_residual_ = term_info.l2_primal_residual;
+      lp_ret.l2_dual_residual_   = term_info.l2_dual_residual;
+      lp_ret.primal_objective_   = term_info.primal_objective;
+      lp_ret.dual_objective_     = term_info.dual_objective;
+      lp_ret.gap_                = term_info.gap;
+      lp_ret.nb_iterations_      = term_info.number_of_steps_taken;
+      lp_ret.solve_time_         = solution.get_solve_time();
+      lp_ret.solved_by_pdlp_     = false;
+      response.lp_ret            = std::move(lp_ret);
+      response.problem_type      = linear_programming::problem_category_t::LP;
+    } else {
+      // MIP: call solve_mip with nullptr handle - remote solve doesn't need GPU
+      auto solution =
+        linear_programming::solve_mip(nullptr, *data_model, solver_settings->get_mip_settings());
+
+      mip_ret_t mip_ret;
+
+      if (solution.is_device_memory()) {
+        // GPU data (shouldn't happen for remote solve, but handle gracefully)
+        mip_ret.solution_ = std::make_unique<rmm::device_buffer>(solution.get_solution().release());
+        mip_ret.is_device_memory_ = true;
+      } else {
+        // CPU data from remote solve - avoid device buffer allocations so CPU-only
+        // clients don't initialize CUDA.
+        mip_ret.solution_host_    = std::move(solution.get_solution_host());
+        mip_ret.is_device_memory_ = false;
+      }
+
+      mip_ret.termination_status_           = solution.get_termination_status();
+      mip_ret.error_status_                 = solution.get_error_status().get_error_type();
+      mip_ret.error_message_                = solution.get_error_status().what();
+      mip_ret.objective_                    = solution.get_objective_value();
+      mip_ret.mip_gap_                      = solution.get_mip_gap();
+      mip_ret.solution_bound_               = solution.get_solution_bound();
+      mip_ret.total_solve_time_             = solution.get_total_solve_time();
+      mip_ret.presolve_time_                = solution.get_presolve_time();
+      mip_ret.max_constraint_violation_     = solution.get_max_constraint_violation();
+      mip_ret.max_int_violation_            = solution.get_max_int_violation();
+      mip_ret.max_variable_bound_violation_ = solution.get_max_variable_bound_violation();
+      mip_ret.nodes_                        = solution.get_num_nodes();
+      mip_ret.simplex_iterations_           = solution.get_num_simplex_iterations();
+      response.mip_ret                      = std::move(mip_ret);
+      response.problem_type                 = linear_programming::problem_category_t::MIP;
+    }
+
+    return std::make_unique<solver_ret_t>(std::move(response));
+  }
+
+  // Local solve: create CUDA resources only when needed
   raft::common::nvtx::range fun_scope("Call Solve");
-  rmm::cuda_stream stream(static_cast<rmm::cuda_stream::flags>(flags));
-  const raft::handle_t handle_{stream};
+  // FIX: Use default handle constructor like CLI does, instead of explicit stream creation
+  const raft::handle_t handle_{};
 
   solver_ret_t response;
 
diff --git a/cpp/src/linear_programming/utilities/protobuf_serializer.cu b/cpp/src/linear_programming/utilities/protobuf_serializer.cu
new file mode 100644
index 000000000..df68595cf
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/protobuf_serializer.cu
@@ -0,0 +1,1007 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuopt/linear_programming/constants.h>
+#include <cuopt_remote.pb.h>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+
+#include <utilities/logger.hpp>
+
+#include <dlfcn.h>
+#include <cmath>
+#include <limits>
+
+namespace cuopt::linear_programming {
+
+namespace {
+
+// Convert cuOpt termination status to protobuf enum
+cuopt::remote::PDLPTerminationStatus to_proto_status(pdlp_termination_status_t status)
+{
+  switch (status) {
+    case pdlp_termination_status_t::NoTermination: return cuopt::remote::PDLP_NO_TERMINATION;
+    case pdlp_termination_status_t::NumericalError: return cuopt::remote::PDLP_NUMERICAL_ERROR;
+    case pdlp_termination_status_t::Optimal: return cuopt::remote::PDLP_OPTIMAL;
+    case pdlp_termination_status_t::PrimalInfeasible: return cuopt::remote::PDLP_PRIMAL_INFEASIBLE;
+    case pdlp_termination_status_t::DualInfeasible: return cuopt::remote::PDLP_DUAL_INFEASIBLE;
+    case pdlp_termination_status_t::IterationLimit: return cuopt::remote::PDLP_ITERATION_LIMIT;
+    case pdlp_termination_status_t::TimeLimit: return cuopt::remote::PDLP_TIME_LIMIT;
+    case pdlp_termination_status_t::ConcurrentLimit: return cuopt::remote::PDLP_CONCURRENT_LIMIT;
+    case pdlp_termination_status_t::PrimalFeasible: return cuopt::remote::PDLP_PRIMAL_FEASIBLE;
+    default: return cuopt::remote::PDLP_NO_TERMINATION;
+  }
+}
+
+// Convert protobuf enum to cuOpt termination status
+pdlp_termination_status_t from_proto_status(cuopt::remote::PDLPTerminationStatus status)
+{
+  switch (status) {
+    case cuopt::remote::PDLP_NO_TERMINATION: return pdlp_termination_status_t::NoTermination;
+    case cuopt::remote::PDLP_NUMERICAL_ERROR: return pdlp_termination_status_t::NumericalError;
+    case cuopt::remote::PDLP_OPTIMAL: return pdlp_termination_status_t::Optimal;
+    case cuopt::remote::PDLP_PRIMAL_INFEASIBLE: return pdlp_termination_status_t::PrimalInfeasible;
+    case cuopt::remote::PDLP_DUAL_INFEASIBLE: return pdlp_termination_status_t::DualInfeasible;
+    case cuopt::remote::PDLP_ITERATION_LIMIT: return pdlp_termination_status_t::IterationLimit;
+    case cuopt::remote::PDLP_TIME_LIMIT: return pdlp_termination_status_t::TimeLimit;
+    case cuopt::remote::PDLP_CONCURRENT_LIMIT: return pdlp_termination_status_t::ConcurrentLimit;
+    case cuopt::remote::PDLP_PRIMAL_FEASIBLE: return pdlp_termination_status_t::PrimalFeasible;
+    default: return pdlp_termination_status_t::NoTermination;
+  }
+}
+
+// Convert MIP termination status
+cuopt::remote::MIPTerminationStatus to_proto_mip_status(mip_termination_status_t status)
+{
+  switch (status) {
+    case mip_termination_status_t::NoTermination: return cuopt::remote::MIP_NO_TERMINATION;
+    case mip_termination_status_t::Optimal: return cuopt::remote::MIP_OPTIMAL;
+    case mip_termination_status_t::FeasibleFound: return cuopt::remote::MIP_FEASIBLE_FOUND;
+    case mip_termination_status_t::Infeasible: return cuopt::remote::MIP_INFEASIBLE;
+    case mip_termination_status_t::Unbounded: return cuopt::remote::MIP_UNBOUNDED;
+    case mip_termination_status_t::TimeLimit: return cuopt::remote::MIP_TIME_LIMIT;
+    default: return cuopt::remote::MIP_NO_TERMINATION;
+  }
+}
+
+mip_termination_status_t from_proto_mip_status(cuopt::remote::MIPTerminationStatus status)
+{
+  switch (status) {
+    case cuopt::remote::MIP_NO_TERMINATION: return mip_termination_status_t::NoTermination;
+    case cuopt::remote::MIP_OPTIMAL: return mip_termination_status_t::Optimal;
+    case cuopt::remote::MIP_FEASIBLE_FOUND: return mip_termination_status_t::FeasibleFound;
+    case cuopt::remote::MIP_INFEASIBLE: return mip_termination_status_t::Infeasible;
+    case cuopt::remote::MIP_UNBOUNDED: return mip_termination_status_t::Unbounded;
+    case cuopt::remote::MIP_TIME_LIMIT: return mip_termination_status_t::TimeLimit;
+    default: return mip_termination_status_t::NoTermination;
+  }
+}
+
+}  // namespace
+
+/**
+ * @brief Default Protocol Buffers serializer implementation.
+ */
+template <typename i_t, typename f_t>
+class protobuf_serializer_t : public remote_serializer_t<i_t, f_t> {
+ public:
+  protobuf_serializer_t()           = default;
+  ~protobuf_serializer_t() override = default;
+
+  //============================================================================
+  // Problem Serialization
+  //============================================================================
+
+  std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    cuopt::remote::SolveLPRequest request;
+
+    // Set header
+    auto* header = request.mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::LP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Serialize problem data
+    serialize_problem_to_proto(view, request.mutable_problem());
+
+    // Serialize settings
+    serialize_lp_settings_to_proto(settings, request.mutable_settings());
+
+    // Serialize to bytes
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    cuopt::remote::SolveMIPRequest request;
+
+    // Set header
+    auto* header = request.mutable_header();
+    header->set_version(protocol_version());
+    header->set_problem_type(cuopt::remote::MIP);
+    header->set_index_type(sizeof(i_t) == 4 ? cuopt::remote::INT32 : cuopt::remote::INT64);
+    header->set_float_type(sizeof(f_t) == 4 ? cuopt::remote::FLOAT32 : cuopt::remote::DOUBLE);
+
+    // Serialize problem data
+    serialize_problem_to_proto(view, request.mutable_problem());
+
+    // Serialize all MIP settings (names match cuOpt API)
+    auto* pb_settings = request.mutable_settings();
+    pb_settings->set_time_limit(settings.time_limit);
+    pb_settings->set_relative_mip_gap(settings.tolerances.relative_mip_gap);
+    pb_settings->set_absolute_mip_gap(settings.tolerances.absolute_mip_gap);
+    pb_settings->set_integrality_tolerance(settings.tolerances.integrality_tolerance);
+    pb_settings->set_absolute_tolerance(settings.tolerances.absolute_tolerance);
+    pb_settings->set_relative_tolerance(settings.tolerances.relative_tolerance);
+    pb_settings->set_presolve_absolute_tolerance(settings.tolerances.presolve_absolute_tolerance);
+    pb_settings->set_log_to_console(settings.log_to_console);
+    pb_settings->set_heuristics_only(settings.heuristics_only);
+    pb_settings->set_num_cpu_threads(settings.num_cpu_threads);
+    pb_settings->set_num_gpus(settings.num_gpus);
+    pb_settings->set_presolve(settings.presolve);
+    pb_settings->set_mip_scaling(settings.mip_scaling);
+
+    request.set_enable_incumbents(!settings.get_mip_callbacks().empty());
+
+    // Serialize to bytes
+    std::vector<uint8_t> result(request.ByteSizeLong());
+    request.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  //============================================================================
+  // Solution Deserialization
+  //============================================================================
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::LPSolution pb_solution;
+    if (!pb_solution.ParseFromArray(data.data(), data.size())) {
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse LP solution", cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_lp_solution(pb_solution);
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_solution(const std::vector<uint8_t>& data) override
+  {
+    cuopt::remote::MIPSolution pb_solution;
+    if (!pb_solution.ParseFromArray(data.data(), data.size())) {
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Failed to parse MIP solution", cuopt::error_type_t::RuntimeError));
+    }
+
+    return proto_to_mip_solution(pb_solution);
+  }
+
+  //============================================================================
+  // Server-side Operations
+  //============================================================================
+
+  bool is_mip_request(const std::vector<uint8_t>& data) override
+  {
+    // Try to parse as direct request and check the header's problem_type
+    // MIP request - check if header indicates MIP
+    cuopt::remote::SolveMIPRequest mip_request;
+    if (mip_request.ParseFromArray(data.data(), data.size()) && mip_request.has_header()) {
+      if (mip_request.header().problem_type() == cuopt::remote::MIP) { return true; }
+    }
+
+    // LP request - check if header indicates LP
+    cuopt::remote::SolveLPRequest lp_request;
+    if (lp_request.ParseFromArray(data.data(), data.size()) && lp_request.has_header()) {
+      if (lp_request.header().problem_type() == cuopt::remote::LP) { return false; }
+    }
+
+    return false;  // Default to LP if can't determine
+  }
+
+  bool deserialize_lp_request(const std::vector<uint8_t>& data,
+                              cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                              pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Try direct LP request
+    cuopt::remote::SolveLPRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) {
+      CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse LP request");
+      return false;
+    }
+
+    proto_to_mps_data(request.problem(), mps_data);
+    proto_to_lp_settings(request.settings(), settings);
+    return true;
+  }
+
+  bool deserialize_mip_request(const std::vector<uint8_t>& data,
+                               cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+                               mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Try direct MIP request
+    cuopt::remote::SolveMIPRequest request;
+    if (!request.ParseFromArray(data.data(), data.size())) {
+      CUOPT_LOG_ERROR("[protobuf_serializer] Failed to parse MIP request");
+      return false;
+    }
+
+    proto_to_mps_data(request.problem(), mps_data);
+    proto_to_mip_settings(request.settings(), settings);
+    return true;
+  }
+
+  std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) override
+  {
+    cuopt::remote::LPSolution pb_solution;
+    lp_solution_to_proto(solution, &pb_solution);
+
+    std::vector<uint8_t> result(pb_solution.ByteSizeLong());
+    pb_solution.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  std::vector<uint8_t> serialize_mip_solution(const mip_solution_t<i_t, f_t>& solution) override
+  {
+    cuopt::remote::MIPSolution pb_solution;
+    mip_solution_to_proto(solution, &pb_solution);
+
+    std::vector<uint8_t> result(pb_solution.ByteSizeLong());
+    pb_solution.SerializeToArray(result.data(), result.size());
+    return result;
+  }
+
+  //============================================================================
+  // Metadata
+  //============================================================================
+
+  std::string format_name() const override { return "protobuf"; }
+
+  uint32_t protocol_version() const override { return 1; }
+
+ private:
+  //============================================================================
+  // Helper Methods - Problem Serialization
+  //============================================================================
+
+  void serialize_problem_to_proto(const mps_parser::data_model_view_t<i_t, f_t>& view,
+                                  cuopt::remote::OptimizationProblem* pb_problem)
+  {
+    // Note: view must point to CPU memory for serialization
+    // The solve functions ensure this by copying GPU data to CPU if needed
+
+    pb_problem->set_problem_name(view.get_problem_name());
+    pb_problem->set_objective_name(view.get_objective_name());
+    pb_problem->set_maximize(view.get_sense());  // get_sense() returns true for maximize
+    pb_problem->set_objective_scaling_factor(view.get_objective_scaling_factor());
+    pb_problem->set_objective_offset(view.get_objective_offset());
+
+    // Get spans for constraint matrix (CSR format)
+    auto values_span  = view.get_constraint_matrix_values();
+    auto indices_span = view.get_constraint_matrix_indices();
+    auto offsets_span = view.get_constraint_matrix_offsets();
+
+    auto nnz    = static_cast<i_t>(values_span.size());
+    auto n_rows = static_cast<i_t>(offsets_span.size()) - 1;
+
+    const f_t* values_ptr  = values_span.data();
+    const i_t* indices_ptr = indices_span.data();
+    const i_t* offsets_ptr = offsets_span.data();
+
+    // Constraint matrix A in CSR format (field names match data_model_view_t)
+    for (i_t i = 0; i < nnz; ++i) {
+      pb_problem->add_a(static_cast<double>(values_ptr[i]));
+    }
+    for (i_t i = 0; i < nnz; ++i) {
+      pb_problem->add_a_indices(static_cast<int32_t>(indices_ptr[i]));
+    }
+    for (i_t i = 0; i <= n_rows; ++i) {
+      pb_problem->add_a_offsets(static_cast<int32_t>(offsets_ptr[i]));
+    }
+
+    // Objective coefficients c (field name matches data_model_view_t: c_)
+    auto obj_span      = view.get_objective_coefficients();
+    auto n_cols        = static_cast<i_t>(obj_span.size());
+    const f_t* obj_ptr = obj_span.data();
+    for (i_t i = 0; i < n_cols; ++i) {
+      pb_problem->add_c(static_cast<double>(obj_ptr[i]));
+    }
+
+    // Variable bounds
+    auto lb_span      = view.get_variable_lower_bounds();
+    auto ub_span      = view.get_variable_upper_bounds();
+    const f_t* lb_ptr = lb_span.data();
+    const f_t* ub_ptr = ub_span.data();
+    for (i_t i = 0; i < n_cols; ++i) {
+      pb_problem->add_variable_lower_bounds(static_cast<double>(lb_ptr[i]));
+      pb_problem->add_variable_upper_bounds(static_cast<double>(ub_ptr[i]));
+    }
+
+    // Constraint bounds - need to handle both formats:
+    // 1. Direct lower/upper bounds (set_constraint_lower/upper_bounds)
+    // 2. RHS + row_types format (set_constraint_bounds + set_row_types)
+    auto c_lb_span = view.get_constraint_lower_bounds();
+    auto c_ub_span = view.get_constraint_upper_bounds();
+
+    if (c_lb_span.size() == static_cast<size_t>(n_rows) &&
+        c_ub_span.size() == static_cast<size_t>(n_rows)) {
+      // Direct format - use as-is
+      const f_t* c_lb_ptr = c_lb_span.data();
+      const f_t* c_ub_ptr = c_ub_span.data();
+      for (i_t i = 0; i < n_rows; ++i) {
+        pb_problem->add_constraint_lower_bounds(static_cast<double>(c_lb_ptr[i]));
+        pb_problem->add_constraint_upper_bounds(static_cast<double>(c_ub_ptr[i]));
+      }
+    } else {
+      // RHS + row_types format - compute lower/upper bounds
+      auto b_span         = view.get_constraint_bounds();
+      auto row_types_span = view.get_row_types();
+      const f_t* b_ptr    = b_span.data();
+      const char* rt_ptr  = row_types_span.data();
+
+      constexpr f_t inf = std::numeric_limits<f_t>::infinity();
+
+      for (i_t i = 0; i < n_rows; ++i) {
+        f_t lb, ub;
+        char row_type = (rt_ptr && row_types_span.size() > 0) ? rt_ptr[i] : 'E';
+        f_t rhs       = (b_ptr && b_span.size() > 0) ? b_ptr[i] : 0;
+
+        switch (row_type) {
+          case 'E':  // Equality: lb = ub = rhs
+            lb = rhs;
+            ub = rhs;
+            break;
+          case 'L':  // Less-than-or-equal: -inf <= Ax <= rhs
+            lb = -inf;
+            ub = rhs;
+            break;
+          case 'G':  // Greater-than-or-equal: rhs <= Ax <= inf
+            lb = rhs;
+            ub = inf;
+            break;
+          case 'N':  // Non-constraining (free)
+            lb = -inf;
+            ub = inf;
+            break;
+          default:  // Default to equality
+            lb = rhs;
+            ub = rhs;
+            break;
+        }
+        pb_problem->add_constraint_lower_bounds(static_cast<double>(lb));
+        pb_problem->add_constraint_upper_bounds(static_cast<double>(ub));
+      }
+    }
+
+    // Variable names (if available)
+    const auto& var_names = view.get_variable_names();
+    for (const auto& name : var_names) {
+      pb_problem->add_variable_names(name);
+    }
+
+    // Row names (if available)
+    const auto& row_names = view.get_row_names();
+    for (const auto& name : row_names) {
+      pb_problem->add_row_names(name);
+    }
+
+    // Variable types (for MIP problems) - stored as bytes to match data_model_view_t
+    auto var_types_span = view.get_variable_types();
+    if (var_types_span.size() > 0) {
+      pb_problem->set_variable_types(std::string(var_types_span.data(), var_types_span.size()));
+    }
+
+    // Row types - store directly as bytes
+    auto row_types_span = view.get_row_types();
+    if (row_types_span.size() > 0) {
+      pb_problem->set_row_types(std::string(row_types_span.data(), row_types_span.size()));
+    }
+
+    // Constraint bounds b (RHS) - store directly if available
+    auto b_span = view.get_constraint_bounds();
+    if (b_span.size() > 0) {
+      const f_t* b_ptr = b_span.data();
+      for (size_t i = 0; i < b_span.size(); ++i) {
+        pb_problem->add_b(static_cast<double>(b_ptr[i]));
+      }
+    }
+
+    // Initial solutions (if available)
+    auto init_primal_span = view.get_initial_primal_solution();
+    if (init_primal_span.size() > 0) {
+      const f_t* init_primal_ptr = init_primal_span.data();
+      for (size_t i = 0; i < init_primal_span.size(); ++i) {
+        pb_problem->add_initial_primal_solution(static_cast<double>(init_primal_ptr[i]));
+      }
+    }
+
+    auto init_dual_span = view.get_initial_dual_solution();
+    if (init_dual_span.size() > 0) {
+      const f_t* init_dual_ptr = init_dual_span.data();
+      for (size_t i = 0; i < init_dual_span.size(); ++i) {
+        pb_problem->add_initial_dual_solution(static_cast<double>(init_dual_ptr[i]));
+      }
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (view.has_quadratic_objective()) {
+      auto q_values_span  = view.get_quadratic_objective_values();
+      auto q_indices_span = view.get_quadratic_objective_indices();
+      auto q_offsets_span = view.get_quadratic_objective_offsets();
+
+      const f_t* q_values_ptr  = q_values_span.data();
+      const i_t* q_indices_ptr = q_indices_span.data();
+      const i_t* q_offsets_ptr = q_offsets_span.data();
+
+      for (size_t i = 0; i < q_values_span.size(); ++i) {
+        pb_problem->add_q_values(static_cast<double>(q_values_ptr[i]));
+      }
+      for (size_t i = 0; i < q_indices_span.size(); ++i) {
+        pb_problem->add_q_indices(static_cast<int32_t>(q_indices_ptr[i]));
+      }
+      for (size_t i = 0; i < q_offsets_span.size(); ++i) {
+        pb_problem->add_q_offsets(static_cast<int32_t>(q_offsets_ptr[i]));
+      }
+    }
+  }
+
+  // Convert cuOpt pdlp_solver_mode_t to protobuf enum
+  cuopt::remote::PDLPSolverMode to_proto_pdlp_mode(pdlp_solver_mode_t mode)
+  {
+    switch (mode) {
+      case pdlp_solver_mode_t::Stable1: return cuopt::remote::Stable1;
+      case pdlp_solver_mode_t::Stable2: return cuopt::remote::Stable2;
+      case pdlp_solver_mode_t::Methodical1: return cuopt::remote::Methodical1;
+      case pdlp_solver_mode_t::Fast1: return cuopt::remote::Fast1;
+      case pdlp_solver_mode_t::Stable3: return cuopt::remote::Stable3;
+      default: return cuopt::remote::Stable3;
+    }
+  }
+
+  // Convert cuOpt method_t to protobuf enum
+  cuopt::remote::LPMethod to_proto_method(method_t method)
+  {
+    switch (method) {
+      case method_t::Concurrent: return cuopt::remote::Concurrent;
+      case method_t::PDLP: return cuopt::remote::PDLP;
+      case method_t::DualSimplex: return cuopt::remote::DualSimplex;
+      case method_t::Barrier: return cuopt::remote::Barrier;
+      default: return cuopt::remote::Concurrent;
+    }
+  }
+
+  void serialize_lp_settings_to_proto(const pdlp_solver_settings_t<i_t, f_t>& settings,
+                                      cuopt::remote::PDLPSolverSettings* pb_settings)
+  {
+    // Termination tolerances (all names match cuOpt API)
+    pb_settings->set_absolute_gap_tolerance(settings.tolerances.absolute_gap_tolerance);
+    pb_settings->set_relative_gap_tolerance(settings.tolerances.relative_gap_tolerance);
+    pb_settings->set_primal_infeasible_tolerance(settings.tolerances.primal_infeasible_tolerance);
+    pb_settings->set_dual_infeasible_tolerance(settings.tolerances.dual_infeasible_tolerance);
+    pb_settings->set_absolute_dual_tolerance(settings.tolerances.absolute_dual_tolerance);
+    pb_settings->set_relative_dual_tolerance(settings.tolerances.relative_dual_tolerance);
+    pb_settings->set_absolute_primal_tolerance(settings.tolerances.absolute_primal_tolerance);
+    pb_settings->set_relative_primal_tolerance(settings.tolerances.relative_primal_tolerance);
+
+    // Limits
+    pb_settings->set_time_limit(settings.time_limit);
+    // Avoid emitting a huge number when the iteration limit is the library default.
+    // Use -1 sentinel for "unset/use server defaults".
+    if (settings.iteration_limit == std::numeric_limits<i_t>::max()) {
+      pb_settings->set_iteration_limit(-1);
+    } else {
+      pb_settings->set_iteration_limit(static_cast<int64_t>(settings.iteration_limit));
+    }
+
+    // Solver configuration
+    pb_settings->set_log_to_console(settings.log_to_console);
+    pb_settings->set_detect_infeasibility(settings.detect_infeasibility);
+    pb_settings->set_strict_infeasibility(settings.strict_infeasibility);
+    pb_settings->set_pdlp_solver_mode(to_proto_pdlp_mode(settings.pdlp_solver_mode));
+    pb_settings->set_method(to_proto_method(settings.method));
+    pb_settings->set_presolve(settings.presolve);
+    pb_settings->set_dual_postsolve(settings.dual_postsolve);
+    pb_settings->set_crossover(settings.crossover);
+    pb_settings->set_num_gpus(settings.num_gpus);
+
+    // Advanced options
+    pb_settings->set_per_constraint_residual(settings.per_constraint_residual);
+    pb_settings->set_cudss_deterministic(settings.cudss_deterministic);
+    pb_settings->set_folding(settings.folding);
+    pb_settings->set_augmented(settings.augmented);
+    pb_settings->set_dualize(settings.dualize);
+    pb_settings->set_ordering(settings.ordering);
+    pb_settings->set_barrier_dual_initial_point(settings.barrier_dual_initial_point);
+    pb_settings->set_eliminate_dense_columns(settings.eliminate_dense_columns);
+    pb_settings->set_save_best_primal_so_far(settings.save_best_primal_so_far);
+    pb_settings->set_first_primal_feasible(settings.first_primal_feasible);
+  }
+
+  //============================================================================
+  // Helper Methods - Problem Deserialization
+  //============================================================================
+
+  void proto_to_mps_data(const cuopt::remote::OptimizationProblem& pb_problem,
+                         cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data)
+  {
+    mps_data.set_problem_name(pb_problem.problem_name());
+    mps_data.set_objective_name(pb_problem.objective_name());
+    mps_data.set_maximize(pb_problem.maximize());
+    mps_data.set_objective_scaling_factor(pb_problem.objective_scaling_factor());
+    mps_data.set_objective_offset(pb_problem.objective_offset());
+
+    // Constraint matrix A in CSR format (field names match data_model_view_t)
+    std::vector<f_t> values(pb_problem.a().begin(), pb_problem.a().end());
+    std::vector<i_t> indices(pb_problem.a_indices().begin(), pb_problem.a_indices().end());
+    std::vector<i_t> offsets(pb_problem.a_offsets().begin(), pb_problem.a_offsets().end());
+
+    mps_data.set_csr_constraint_matrix(values.data(),
+                                       static_cast<i_t>(values.size()),
+                                       indices.data(),
+                                       static_cast<i_t>(indices.size()),
+                                       offsets.data(),
+                                       static_cast<i_t>(offsets.size()));
+
+    // Objective coefficients c
+    std::vector<f_t> obj(pb_problem.c().begin(), pb_problem.c().end());
+    mps_data.set_objective_coefficients(obj.data(), static_cast<i_t>(obj.size()));
+
+    // Variable bounds
+    std::vector<f_t> var_lb(pb_problem.variable_lower_bounds().begin(),
+                            pb_problem.variable_lower_bounds().end());
+    std::vector<f_t> var_ub(pb_problem.variable_upper_bounds().begin(),
+                            pb_problem.variable_upper_bounds().end());
+    mps_data.set_variable_lower_bounds(var_lb.data(), static_cast<i_t>(var_lb.size()));
+    mps_data.set_variable_upper_bounds(var_ub.data(), static_cast<i_t>(var_ub.size()));
+
+    // Constraint bounds (prefer lower/upper bounds if available)
+    if (pb_problem.constraint_lower_bounds_size() > 0) {
+      std::vector<f_t> con_lb(pb_problem.constraint_lower_bounds().begin(),
+                              pb_problem.constraint_lower_bounds().end());
+      std::vector<f_t> con_ub(pb_problem.constraint_upper_bounds().begin(),
+                              pb_problem.constraint_upper_bounds().end());
+      mps_data.set_constraint_lower_bounds(con_lb.data(), static_cast<i_t>(con_lb.size()));
+      mps_data.set_constraint_upper_bounds(con_ub.data(), static_cast<i_t>(con_ub.size()));
+    } else if (pb_problem.b_size() > 0) {
+      // Use b (RHS) + row_types format
+      std::vector<f_t> b(pb_problem.b().begin(), pb_problem.b().end());
+      mps_data.set_constraint_bounds(b.data(), static_cast<i_t>(b.size()));
+
+      if (!pb_problem.row_types().empty()) {
+        const std::string& row_types = pb_problem.row_types();
+        mps_data.set_row_types(row_types.data(), static_cast<i_t>(row_types.size()));
+      }
+    }
+
+    // Variable names
+    if (pb_problem.variable_names_size() > 0) {
+      std::vector<std::string> var_names(pb_problem.variable_names().begin(),
+                                         pb_problem.variable_names().end());
+      mps_data.set_variable_names(var_names);
+    }
+
+    // Row names
+    if (pb_problem.row_names_size() > 0) {
+      std::vector<std::string> row_names(pb_problem.row_names().begin(),
+                                         pb_problem.row_names().end());
+      mps_data.set_row_names(row_names);
+    }
+
+    // Variable types (stored as bytes, matching data_model_view_t)
+    if (!pb_problem.variable_types().empty()) {
+      const std::string& var_types_str = pb_problem.variable_types();
+      std::vector<char> var_types(var_types_str.begin(), var_types_str.end());
+      mps_data.set_variable_types(var_types);
+    }
+
+    // Initial solutions (if provided)
+    if (pb_problem.initial_primal_solution_size() > 0) {
+      std::vector<f_t> init_primal(pb_problem.initial_primal_solution().begin(),
+                                   pb_problem.initial_primal_solution().end());
+      mps_data.set_initial_primal_solution(init_primal.data(),
+                                           static_cast<i_t>(init_primal.size()));
+    }
+
+    if (pb_problem.initial_dual_solution_size() > 0) {
+      std::vector<f_t> init_dual(pb_problem.initial_dual_solution().begin(),
+                                 pb_problem.initial_dual_solution().end());
+      mps_data.set_initial_dual_solution(init_dual.data(), static_cast<i_t>(init_dual.size()));
+    }
+
+    // Quadratic objective matrix Q (for QPS problems)
+    if (pb_problem.q_values_size() > 0) {
+      std::vector<f_t> q_values(pb_problem.q_values().begin(), pb_problem.q_values().end());
+      std::vector<i_t> q_indices(pb_problem.q_indices().begin(), pb_problem.q_indices().end());
+      std::vector<i_t> q_offsets(pb_problem.q_offsets().begin(), pb_problem.q_offsets().end());
+
+      mps_data.set_quadratic_objective_matrix(q_values.data(),
+                                              static_cast<i_t>(q_values.size()),
+                                              q_indices.data(),
+                                              static_cast<i_t>(q_indices.size()),
+                                              q_offsets.data(),
+                                              static_cast<i_t>(q_offsets.size()));
+    }
+  }
+
+  // Convert protobuf enum to cuOpt pdlp_solver_mode_t
+  pdlp_solver_mode_t from_proto_pdlp_mode(cuopt::remote::PDLPSolverMode mode)
+  {
+    switch (mode) {
+      case cuopt::remote::Stable1: return pdlp_solver_mode_t::Stable1;
+      case cuopt::remote::Stable2: return pdlp_solver_mode_t::Stable2;
+      case cuopt::remote::Methodical1: return pdlp_solver_mode_t::Methodical1;
+      case cuopt::remote::Fast1: return pdlp_solver_mode_t::Fast1;
+      case cuopt::remote::Stable3: return pdlp_solver_mode_t::Stable3;
+      default: return pdlp_solver_mode_t::Stable3;
+    }
+  }
+
+  // Convert protobuf enum to cuOpt method_t
+  method_t from_proto_method(cuopt::remote::LPMethod method)
+  {
+    switch (method) {
+      case cuopt::remote::Concurrent: return method_t::Concurrent;
+      case cuopt::remote::PDLP: return method_t::PDLP;
+      case cuopt::remote::DualSimplex: return method_t::DualSimplex;
+      case cuopt::remote::Barrier: return method_t::Barrier;
+      default: return method_t::Concurrent;
+    }
+  }
+
+  void proto_to_lp_settings(const cuopt::remote::PDLPSolverSettings& pb_settings,
+                            pdlp_solver_settings_t<i_t, f_t>& settings)
+  {
+    // Termination tolerances (all names match cuOpt API)
+    settings.tolerances.absolute_gap_tolerance      = pb_settings.absolute_gap_tolerance();
+    settings.tolerances.relative_gap_tolerance      = pb_settings.relative_gap_tolerance();
+    settings.tolerances.primal_infeasible_tolerance = pb_settings.primal_infeasible_tolerance();
+    settings.tolerances.dual_infeasible_tolerance   = pb_settings.dual_infeasible_tolerance();
+    settings.tolerances.absolute_dual_tolerance     = pb_settings.absolute_dual_tolerance();
+    settings.tolerances.relative_dual_tolerance     = pb_settings.relative_dual_tolerance();
+    settings.tolerances.absolute_primal_tolerance   = pb_settings.absolute_primal_tolerance();
+    settings.tolerances.relative_primal_tolerance   = pb_settings.relative_primal_tolerance();
+
+    // Limits
+    settings.time_limit = pb_settings.time_limit();
+    // proto3 defaults numeric fields to 0; treat negative iteration_limit as "unset"
+    // so the server keeps the library default (typically max()).
+    if (pb_settings.iteration_limit() >= 0) {
+      settings.iteration_limit = static_cast<i_t>(pb_settings.iteration_limit());
+    }
+
+    // Solver configuration
+    settings.log_to_console       = pb_settings.log_to_console();
+    settings.detect_infeasibility = pb_settings.detect_infeasibility();
+    settings.strict_infeasibility = pb_settings.strict_infeasibility();
+    settings.pdlp_solver_mode     = from_proto_pdlp_mode(pb_settings.pdlp_solver_mode());
+    settings.method               = from_proto_method(pb_settings.method());
+    settings.presolve             = pb_settings.presolve();
+    settings.dual_postsolve       = pb_settings.dual_postsolve();
+    settings.crossover            = pb_settings.crossover();
+    settings.num_gpus             = pb_settings.num_gpus();
+
+    // Advanced options
+    settings.per_constraint_residual    = pb_settings.per_constraint_residual();
+    settings.cudss_deterministic        = pb_settings.cudss_deterministic();
+    settings.folding                    = pb_settings.folding();
+    settings.augmented                  = pb_settings.augmented();
+    settings.dualize                    = pb_settings.dualize();
+    settings.ordering                   = pb_settings.ordering();
+    settings.barrier_dual_initial_point = pb_settings.barrier_dual_initial_point();
+    settings.eliminate_dense_columns    = pb_settings.eliminate_dense_columns();
+    settings.save_best_primal_so_far    = pb_settings.save_best_primal_so_far();
+    settings.first_primal_feasible      = pb_settings.first_primal_feasible();
+  }
+
+  void proto_to_mip_settings(const cuopt::remote::MIPSolverSettings& pb_settings,
+                             mip_solver_settings_t<i_t, f_t>& settings)
+  {
+    // Limits
+    settings.time_limit = pb_settings.time_limit();
+
+    // Tolerances (all names match cuOpt API)
+    settings.tolerances.relative_mip_gap            = pb_settings.relative_mip_gap();
+    settings.tolerances.absolute_mip_gap            = pb_settings.absolute_mip_gap();
+    settings.tolerances.integrality_tolerance       = pb_settings.integrality_tolerance();
+    settings.tolerances.absolute_tolerance          = pb_settings.absolute_tolerance();
+    settings.tolerances.relative_tolerance          = pb_settings.relative_tolerance();
+    settings.tolerances.presolve_absolute_tolerance = pb_settings.presolve_absolute_tolerance();
+
+    // Solver configuration
+    settings.log_to_console  = pb_settings.log_to_console();
+    settings.heuristics_only = pb_settings.heuristics_only();
+    settings.num_cpu_threads = pb_settings.num_cpu_threads();
+    settings.num_gpus        = pb_settings.num_gpus();
+    settings.presolve        = pb_settings.presolve();
+    settings.mip_scaling     = pb_settings.mip_scaling();
+  }
+
+  //============================================================================
+  // Helper Methods - Solution Conversion
+  //============================================================================
+
+  optimization_problem_solution_t<i_t, f_t> proto_to_lp_solution(
+    const cuopt::remote::LPSolution& pb_solution)
+  {
+    // Create CPU-based solution
+    std::vector<f_t> primal(pb_solution.primal_solution().begin(),
+                            pb_solution.primal_solution().end());
+    std::vector<f_t> dual(pb_solution.dual_solution().begin(), pb_solution.dual_solution().end());
+    std::vector<f_t> reduced_cost(pb_solution.reduced_cost().begin(),
+                                  pb_solution.reduced_cost().end());
+
+    optimization_problem_solution_t<i_t, f_t> solution(
+      from_proto_status(pb_solution.termination_status()));
+
+    // Set solution data
+    solution.set_primal_solution_host(std::move(primal));
+    solution.set_dual_solution_host(std::move(dual));
+    solution.set_reduced_cost_host(std::move(reduced_cost));
+
+    // Set statistics
+    solution.set_l2_primal_residual(pb_solution.l2_primal_residual());
+    solution.set_l2_dual_residual(pb_solution.l2_dual_residual());
+    solution.set_primal_objective(pb_solution.primal_objective());
+    solution.set_dual_objective(pb_solution.dual_objective());
+    solution.set_gap(pb_solution.gap());
+    solution.set_nb_iterations(pb_solution.nb_iterations());
+    solution.set_solve_time(pb_solution.solve_time());
+    solution.set_solved_by_pdlp(pb_solution.solved_by_pdlp());
+
+    return solution;
+  }
+
+  void lp_solution_to_proto(const optimization_problem_solution_t<i_t, f_t>& solution,
+                            cuopt::remote::LPSolution* pb_solution)
+  {
+    pb_solution->set_termination_status(to_proto_status(solution.get_termination_status()));
+    pb_solution->set_error_message(solution.get_error_string());
+
+    // Solution vectors - handle both device and host memory
+    if (solution.is_device_memory()) {
+      // Copy from device to host
+      const auto& d_primal = solution.get_primal_solution();
+      const auto& d_dual   = solution.get_dual_solution();
+      // Note: reduced_cost getter is non-const, so we need to work around this
+
+      // Copy primal solution from device
+      if (d_primal.size() > 0) {
+        std::vector<f_t> h_primal(d_primal.size());
+        cudaMemcpy(
+          h_primal.data(), d_primal.data(), d_primal.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_primal) {
+          pb_solution->add_primal_solution(static_cast<double>(v));
+        }
+      }
+
+      // Copy dual solution from device
+      if (d_dual.size() > 0) {
+        std::vector<f_t> h_dual(d_dual.size());
+        cudaMemcpy(
+          h_dual.data(), d_dual.data(), d_dual.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_dual) {
+          pb_solution->add_dual_solution(static_cast<double>(v));
+        }
+      }
+
+      // For reduced cost, we can access via const cast since we're just reading
+      auto& nc_solution    = const_cast<optimization_problem_solution_t<i_t, f_t>&>(solution);
+      auto& d_reduced_cost = nc_solution.get_reduced_cost();
+      if (d_reduced_cost.size() > 0) {
+        std::vector<f_t> h_reduced_cost(d_reduced_cost.size());
+        cudaMemcpy(h_reduced_cost.data(),
+                   d_reduced_cost.data(),
+                   d_reduced_cost.size() * sizeof(f_t),
+                   cudaMemcpyDeviceToHost);
+        for (const auto& v : h_reduced_cost) {
+          pb_solution->add_reduced_cost(static_cast<double>(v));
+        }
+      }
+    } else {
+      // Data is already on host
+      const auto& primal       = solution.get_primal_solution_host();
+      const auto& dual         = solution.get_dual_solution_host();
+      const auto& reduced_cost = solution.get_reduced_cost_host();
+
+      for (const auto& v : primal) {
+        pb_solution->add_primal_solution(static_cast<double>(v));
+      }
+      for (const auto& v : dual) {
+        pb_solution->add_dual_solution(static_cast<double>(v));
+      }
+      for (const auto& v : reduced_cost) {
+        pb_solution->add_reduced_cost(static_cast<double>(v));
+      }
+    }
+
+    // Statistics
+    pb_solution->set_l2_primal_residual(solution.get_l2_primal_residual());
+    pb_solution->set_l2_dual_residual(solution.get_l2_dual_residual());
+    pb_solution->set_primal_objective(solution.get_primal_objective());
+    pb_solution->set_dual_objective(solution.get_dual_objective());
+    pb_solution->set_gap(solution.get_gap());
+    pb_solution->set_nb_iterations(solution.get_nb_iterations());
+    pb_solution->set_solve_time(solution.get_solve_time());
+    pb_solution->set_solved_by_pdlp(solution.get_solved_by_pdlp());
+  }
+
+  mip_solution_t<i_t, f_t> proto_to_mip_solution(const cuopt::remote::MIPSolution& pb_solution)
+  {
+    std::vector<f_t> solution_vec(pb_solution.solution().begin(), pb_solution.solution().end());
+
+    // Create stats from protobuf data
+    solver_stats_t<i_t, f_t> stats;
+    stats.total_solve_time       = pb_solution.total_solve_time();
+    stats.presolve_time          = pb_solution.presolve_time();
+    stats.solution_bound         = pb_solution.solution_bound();
+    stats.num_nodes              = pb_solution.nodes();
+    stats.num_simplex_iterations = pb_solution.simplex_iterations();
+
+    mip_solution_t<i_t, f_t> solution(from_proto_mip_status(pb_solution.termination_status()),
+                                      stats);
+
+    solution.set_solution_host(std::move(solution_vec));
+    solution.set_objective(pb_solution.objective());
+    solution.set_mip_gap(pb_solution.mip_gap());
+    solution.set_max_constraint_violation(pb_solution.max_constraint_violation());
+    solution.set_max_int_violation(pb_solution.max_int_violation());
+    solution.set_max_variable_bound_violation(pb_solution.max_variable_bound_violation());
+
+    return solution;
+  }
+
+  void mip_solution_to_proto(const mip_solution_t<i_t, f_t>& solution,
+                             cuopt::remote::MIPSolution* pb_solution)
+  {
+    pb_solution->set_termination_status(to_proto_mip_status(solution.get_termination_status()));
+    pb_solution->set_error_message(solution.get_error_string());
+
+    // Handle both device and host memory
+    if (solution.is_device_memory()) {
+      const auto& d_sol = solution.get_solution();
+      if (d_sol.size() > 0) {
+        std::vector<f_t> h_sol(d_sol.size());
+        cudaMemcpy(h_sol.data(), d_sol.data(), d_sol.size() * sizeof(f_t), cudaMemcpyDeviceToHost);
+        for (const auto& v : h_sol) {
+          pb_solution->add_solution(static_cast<double>(v));
+        }
+      }
+    } else {
+      const auto& sol_vec = solution.get_solution_host();
+      for (const auto& v : sol_vec) {
+        pb_solution->add_solution(static_cast<double>(v));
+      }
+    }
+
+    pb_solution->set_objective(solution.get_objective_value());
+    pb_solution->set_mip_gap(solution.get_mip_gap());
+    pb_solution->set_solution_bound(solution.get_solution_bound());
+    pb_solution->set_total_solve_time(solution.get_total_solve_time());
+    pb_solution->set_presolve_time(solution.get_presolve_time());
+    pb_solution->set_max_constraint_violation(solution.get_max_constraint_violation());
+    pb_solution->set_max_int_violation(solution.get_max_int_violation());
+    pb_solution->set_max_variable_bound_violation(solution.get_max_variable_bound_violation());
+    pb_solution->set_nodes(solution.get_num_nodes());
+    pb_solution->set_simplex_iterations(solution.get_num_simplex_iterations());
+  }
+};
+
+//============================================================================
+// Template Instantiations
+// Note: Only int32_t and double types are instantiated to avoid adding
+// int64_t instantiations throughout the codebase
+//============================================================================
+
+#if CUOPT_INSTANTIATE_FLOAT
+template class protobuf_serializer_t<int32_t, float>;
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template class protobuf_serializer_t<int32_t, double>;
+#endif
+
+//============================================================================
+// Factory Functions
+//============================================================================
+
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_default_serializer()
+{
+  static auto instance = std::make_shared<protobuf_serializer_t<i_t, f_t>>();
+  return instance;
+}
+
+// Explicit instantiations for factory functions
+#if CUOPT_INSTANTIATE_FLOAT
+template std::shared_ptr<remote_serializer_t<int32_t, float>> get_default_serializer();
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template std::shared_ptr<remote_serializer_t<int32_t, double>> get_default_serializer();
+#endif
+
+// Custom serializer loader (lazy-initialized)
+template <typename i_t, typename f_t>
+std::shared_ptr<remote_serializer_t<i_t, f_t>> get_serializer()
+{
+  static std::shared_ptr<remote_serializer_t<i_t, f_t>> instance;
+  static std::once_flag init_flag;
+
+  std::call_once(init_flag, []() {
+    const char* custom_lib = std::getenv("CUOPT_SERIALIZER_LIB");
+
+    if (custom_lib && custom_lib[0] != '\0') {
+      // Try to load custom serializer
+      CUOPT_LOG_INFO(std::string("[remote_solve] Loading custom serializer from: ") + custom_lib);
+
+      // Open the shared library
+      void* handle = dlopen(custom_lib, RTLD_NOW | RTLD_LOCAL);
+      if (!handle) {
+        CUOPT_LOG_ERROR(std::string("[remote_solve] Failed to load serializer library: ") +
+                        dlerror());
+        instance = get_default_serializer<i_t, f_t>();
+        return;
+      }
+
+      // Look for the factory function
+      // The function name includes template types for proper linking
+      std::string factory_name = "create_cuopt_serializer";
+      if constexpr (std::is_same_v<i_t, int32_t> && std::is_same_v<f_t, double>) {
+        factory_name = "create_cuopt_serializer_i32_f64";
+      } else if constexpr (std::is_same_v<i_t, int32_t> && std::is_same_v<f_t, float>) {
+        factory_name = "create_cuopt_serializer_i32_f32";
+      } else if constexpr (std::is_same_v<i_t, int64_t> && std::is_same_v<f_t, double>) {
+        factory_name = "create_cuopt_serializer_i64_f64";
+      } else if constexpr (std::is_same_v<i_t, int64_t> && std::is_same_v<f_t, float>) {
+        factory_name = "create_cuopt_serializer_i64_f32";
+      }
+
+      using factory_fn_t = std::unique_ptr<remote_serializer_t<i_t, f_t>> (*)();
+      auto factory       = reinterpret_cast<factory_fn_t>(dlsym(handle, factory_name.c_str()));
+
+      if (!factory) {
+        CUOPT_LOG_ERROR(std::string("[remote_solve] Factory function '") + factory_name +
+                        "' not found: " + dlerror());
+        dlclose(handle);
+        instance = get_default_serializer<i_t, f_t>();
+        return;
+      }
+
+      auto custom_serializer = factory();
+      if (custom_serializer) {
+        CUOPT_LOG_INFO(std::string("[remote_solve] Using custom serializer: ") +
+                       custom_serializer->format_name());
+        instance = std::move(custom_serializer);
+      } else {
+        CUOPT_LOG_ERROR("[remote_solve] Factory returned null, using default");
+        dlclose(handle);
+        instance = get_default_serializer<i_t, f_t>();
+      }
+      // Note: We intentionally don't dlclose(handle) here to keep the library loaded
+    } else {
+      instance = get_default_serializer<i_t, f_t>();
+    }
+  });
+
+  return instance;
+}
+
+// Explicit instantiations
+#if CUOPT_INSTANTIATE_FLOAT
+template std::shared_ptr<remote_serializer_t<int32_t, float>> get_serializer();
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template std::shared_ptr<remote_serializer_t<int32_t, double>> get_serializer();
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/utilities/remote_solve.cu b/cpp/src/linear_programming/utilities/remote_solve.cu
new file mode 100644
index 000000000..1501b7fe8
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/remote_solve.cu
@@ -0,0 +1,566 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <cuopt/linear_programming/constants.h>
+#include <cuopt/linear_programming/utilities/internals.hpp>
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+#include <utilities/logger.hpp>
+
+#if CUOPT_ENABLE_GRPC
+#include "remote_solve_grpc.hpp"
+#endif
+
+#include <cuda_runtime.h>
+
+#include <cstring>
+#include <iostream>
+#include <thread>
+#include <vector>
+
+namespace cuopt::linear_programming {
+
+namespace {
+
+// gRPC is the only supported remote transport.
+template <typename f_t>
+bool copy_incumbent_to_device(const std::vector<double>& host_assignment,
+                              double host_objective,
+                              f_t** d_assignment_out,
+                              f_t** d_objective_out)
+{
+  *d_assignment_out = nullptr;
+  *d_objective_out  = nullptr;
+  if (host_assignment.empty()) { return false; }
+
+  int device_count       = 0;
+  cudaError_t device_err = cudaGetDeviceCount(&device_count);
+  if (device_err != cudaSuccess || device_count == 0) {
+    static bool logged_no_device = false;
+    if (!logged_no_device) {
+      CUOPT_LOG_INFO("[remote_solve] No CUDA device available; using host incumbents");
+      logged_no_device = true;
+    }
+    return false;
+  }
+
+  size_t n = host_assignment.size();
+  std::vector<f_t> assignment(n);
+  for (size_t i = 0; i < n; ++i) {
+    assignment[i] = static_cast<f_t>(host_assignment[i]);
+  }
+  f_t objective = static_cast<f_t>(host_objective);
+
+  if (cudaMalloc(reinterpret_cast<void**>(d_assignment_out), n * sizeof(f_t)) != cudaSuccess) {
+    CUOPT_LOG_WARN("[remote_solve] Failed to cudaMalloc for incumbent assignment");
+    return false;
+  }
+  if (cudaMalloc(reinterpret_cast<void**>(d_objective_out), sizeof(f_t)) != cudaSuccess) {
+    CUOPT_LOG_WARN("[remote_solve] Failed to cudaMalloc for incumbent objective");
+    cudaFree(*d_assignment_out);
+    *d_assignment_out = nullptr;
+    return false;
+  }
+
+  if (cudaMemcpy(*d_assignment_out, assignment.data(), n * sizeof(f_t), cudaMemcpyHostToDevice) !=
+      cudaSuccess) {
+    CUOPT_LOG_WARN("[remote_solve] Failed to cudaMemcpy incumbent assignment");
+    cudaFree(*d_assignment_out);
+    cudaFree(*d_objective_out);
+    *d_assignment_out = nullptr;
+    *d_objective_out  = nullptr;
+    return false;
+  }
+  if (cudaMemcpy(*d_objective_out, &objective, sizeof(f_t), cudaMemcpyHostToDevice) !=
+      cudaSuccess) {
+    CUOPT_LOG_WARN("[remote_solve] Failed to cudaMemcpy incumbent objective");
+    cudaFree(*d_assignment_out);
+    cudaFree(*d_objective_out);
+    *d_assignment_out = nullptr;
+    *d_objective_out  = nullptr;
+    return false;
+  }
+
+  return true;
+}
+
+template <typename f_t>
+void invoke_incumbent_callbacks(
+  const std::vector<cuopt::internals::base_solution_callback_t*>& callbacks,
+  const std::vector<double>& assignment,
+  double objective)
+{
+  f_t* d_assignment = nullptr;
+  f_t* d_objective  = nullptr;
+  bool on_device =
+    copy_incumbent_to_device<f_t>(assignment, objective, &d_assignment, &d_objective);
+  std::vector<f_t> h_assignment;
+  f_t h_objective     = static_cast<f_t>(objective);
+  f_t* assignment_ptr = nullptr;
+  f_t* objective_ptr  = nullptr;
+  if (on_device) {
+    assignment_ptr = d_assignment;
+    objective_ptr  = d_objective;
+  } else {
+    if (assignment.empty()) { return; }
+    h_assignment.resize(assignment.size());
+    for (size_t i = 0; i < assignment.size(); ++i) {
+      h_assignment[i] = static_cast<f_t>(assignment[i]);
+    }
+    assignment_ptr = h_assignment.data();
+    objective_ptr  = &h_objective;
+  }
+
+  for (auto* cb : callbacks) {
+    if (cb == nullptr) { continue; }
+    if (cb->get_type() != cuopt::internals::base_solution_callback_type::GET_SOLUTION) { continue; }
+    cb->set_memory_location(on_device ? cuopt::internals::callback_memory_location::DEVICE
+                                      : cuopt::internals::callback_memory_location::HOST);
+    auto* get_cb = static_cast<cuopt::internals::get_solution_callback_t*>(cb);
+    get_cb->get_solution(assignment_ptr, objective_ptr);
+  }
+
+  if (on_device) {
+    cudaDeviceSynchronize();
+    cudaFree(d_assignment);
+    cudaFree(d_objective);
+  }
+}
+
+// Socket transport removed. gRPC is the only supported remote transport.
+
+}  // namespace
+
+//============================================================================
+// LP Remote Solve
+//============================================================================
+
+template <typename i_t, typename f_t>
+optimization_problem_solution_t<i_t, f_t> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const pdlp_solver_settings_t<i_t, f_t>& settings)
+{
+  CUOPT_LOG_INFO(
+    "[remote_solve] Solving LP remotely on %s:%d (gRPC)", config.host.c_str(), config.port);
+
+  // Log problem info (similar to local solve)
+  if (settings.log_to_console) {
+    auto n_rows = view.get_constraint_matrix_offsets().size() > 0
+                    ? static_cast<i_t>(view.get_constraint_matrix_offsets().size()) - 1
+                    : 0;
+    auto n_cols = static_cast<i_t>(view.get_objective_coefficients().size());
+    auto nnz    = static_cast<i_t>(view.get_constraint_matrix_values().size());
+    CUOPT_LOG_INFO("Solving a problem with %d constraints, %d variables, and %d nonzeros (remote)",
+                   n_rows,
+                   n_cols,
+                   nnz);
+  }
+
+  auto serializer = get_serializer<i_t, f_t>();
+
+#if CUOPT_ENABLE_GRPC
+  const std::string address = config.host + ":" + std::to_string(config.port);
+
+  // Serialize as SolveLPRequest for gRPC submission.
+  std::vector<uint8_t> request_data = serializer->serialize_lp_request(view, settings);
+  CUOPT_LOG_DEBUG(std::string("[remote_solve] Serialized LP request (gRPC): ") +
+                  std::to_string(request_data.size()) + " bytes");
+
+  std::string job_id;
+  std::string err;
+  bool used_upload  = false;
+  int64_t max_bytes = -1;
+  if (!grpc_remote::submit_or_upload(address,
+                                     grpc_remote::ProblemType::LP,
+                                     request_data.data(),
+                                     request_data.size(),
+                                     job_id,
+                                     err,
+                                     &used_upload,
+                                     &max_bytes)) {
+    std::cerr << "[remote_solve] UploadAndSubmit failed: " << err << "\n";
+    std::cerr.flush();
+    return optimization_problem_solution_t<i_t, f_t>(
+      cuopt::logic_error("gRPC UploadAndSubmit failed: " + err, cuopt::error_type_t::RuntimeError));
+  }
+  if (settings.log_to_console) {
+    CUOPT_LOG_INFO("gRPC submit path for LP (%zu bytes, max=%ld): %s",
+                   request_data.size(),
+                   static_cast<long>(max_bytes),
+                   used_upload ? "UploadAndSubmit" : "SubmitJob");
+    CUOPT_LOG_INFO("gRPC job_id: %s", job_id.c_str());
+  }
+
+  // Optional realtime logs on client side
+  volatile bool stop_logs = false;
+  std::thread log_thread;
+  if (settings.log_to_console) {
+    log_thread =
+      std::thread([&]() { grpc_remote::stream_logs_to_stdout(address, job_id, &stop_logs, ""); });
+  }
+
+  // Poll status until terminal, allowing log streaming and cancellation in other threads.
+  std::string status;
+  int64_t result_size_bytes = 0;
+  int64_t max_message_bytes = 0;
+  std::string last_status;
+  while (true) {
+    std::string st_err;
+    if (!grpc_remote::check_status(
+          address, job_id, status, st_err, &result_size_bytes, &max_message_bytes)) {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "gRPC CheckStatus failed: " + st_err, cuopt::error_type_t::RuntimeError));
+    }
+
+    if (settings.log_to_console && status != last_status) {
+      CUOPT_LOG_INFO("gRPC status for job %s: %s", job_id.c_str(), status.c_str());
+      last_status = status;
+    }
+    if (status == "COMPLETED") { break; }
+    if (status == "FAILED" || status == "CANCELLED" || status == "NOT_FOUND") {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return optimization_problem_solution_t<i_t, f_t>(
+        cuopt::logic_error("Remote job did not complete successfully (status=" + status + ")",
+                           cuopt::error_type_t::RuntimeError));
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+
+  // Retrieve result bytes (unary if it fits, otherwise streaming)
+  std::vector<uint8_t> solution_bytes;
+  std::string res_err;
+  bool use_get_result = false;
+  if (max_message_bytes < 0) {
+    use_get_result = true;
+  } else if (result_size_bytes > 0 && result_size_bytes <= max_message_bytes) {
+    use_get_result = true;
+  }
+  if (use_get_result) {
+    if (!grpc_remote::get_result(address, job_id, solution_bytes, res_err)) {
+      use_get_result = false;
+    }
+  }
+  if (!use_get_result) {
+    if (!grpc_remote::stream_result(address, job_id, solution_bytes, res_err)) {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+        "gRPC StreamResult failed: " + res_err, cuopt::error_type_t::RuntimeError));
+    }
+  }
+  if (settings.log_to_console) {
+    CUOPT_LOG_INFO("gRPC result fetch: %s (size=%ld max=%ld)",
+                   use_get_result ? "GetResult" : "StreamResult",
+                   static_cast<long>(result_size_bytes),
+                   static_cast<long>(max_message_bytes));
+  }
+  if (solution_bytes.empty()) {
+    stop_logs = true;
+    if (log_thread.joinable()) { log_thread.join(); }
+    grpc_remote::delete_result(address, job_id);
+    return optimization_problem_solution_t<i_t, f_t>(
+      cuopt::logic_error("gRPC result payload empty", cuopt::error_type_t::RuntimeError));
+  }
+
+  stop_logs = true;
+  if (log_thread.joinable()) { log_thread.join(); }
+
+  grpc_remote::delete_result(address, job_id);
+  return serializer->deserialize_lp_solution(solution_bytes);
+#else
+  (void)serializer;
+  return optimization_problem_solution_t<i_t, f_t>(cuopt::logic_error(
+    "gRPC support is disabled; remote solve is unavailable", cuopt::error_type_t::RuntimeError));
+#endif
+}
+
+//============================================================================
+// MIP Remote Solve
+//============================================================================
+
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<i_t, f_t>& view,
+  const mip_solver_settings_t<i_t, f_t>& settings)
+{
+  CUOPT_LOG_INFO(
+    "[remote_solve] Solving MIP remotely on %s:%d (gRPC)", config.host.c_str(), config.port);
+
+  // Log problem info
+  {
+    auto n_rows = view.get_constraint_matrix_offsets().size() > 0
+                    ? static_cast<i_t>(view.get_constraint_matrix_offsets().size()) - 1
+                    : 0;
+    auto n_cols = static_cast<i_t>(view.get_objective_coefficients().size());
+    auto nnz    = static_cast<i_t>(view.get_constraint_matrix_values().size());
+    CUOPT_LOG_INFO(
+      "Solving a problem with %d constraints, %d variables, and %d nonzeros (remote MIP)",
+      n_rows,
+      n_cols,
+      nnz);
+  }
+
+  auto serializer = get_serializer<i_t, f_t>();
+
+#if CUOPT_ENABLE_GRPC
+  const std::string address = config.host + ":" + std::to_string(config.port);
+
+  std::vector<uint8_t> request_data = serializer->serialize_mip_request(view, settings);
+  CUOPT_LOG_DEBUG(std::string("[remote_solve] Serialized MIP request (gRPC): ") +
+                  std::to_string(request_data.size()) + " bytes");
+
+  std::string job_id;
+  std::string err;
+  bool used_upload  = false;
+  int64_t max_bytes = -1;
+  if (!grpc_remote::submit_or_upload(address,
+                                     grpc_remote::ProblemType::MIP,
+                                     request_data.data(),
+                                     request_data.size(),
+                                     job_id,
+                                     err,
+                                     &used_upload,
+                                     &max_bytes)) {
+    std::cerr << "[remote_solve] UploadAndSubmit failed: " << err << "\n";
+    std::cerr.flush();
+    return mip_solution_t<i_t, f_t>(
+      cuopt::logic_error("gRPC UploadAndSubmit failed: " + err, cuopt::error_type_t::RuntimeError));
+  }
+  if (settings.log_to_console) {
+    CUOPT_LOG_INFO("gRPC submit path for MIP (%zu bytes, max=%ld): %s",
+                   request_data.size(),
+                   static_cast<long>(max_bytes),
+                   used_upload ? "UploadAndSubmit" : "SubmitJob");
+    CUOPT_LOG_INFO("gRPC job_id: %s", job_id.c_str());
+  }
+
+  volatile bool stop_logs = false;
+  std::thread log_thread;
+  if (settings.log_to_console) {
+    log_thread =
+      std::thread([&]() { grpc_remote::stream_logs_to_stdout(address, job_id, &stop_logs, ""); });
+  }
+
+  std::vector<cuopt::internals::base_solution_callback_t*> callbacks = settings.get_mip_callbacks();
+  int64_t incumbent_index                                            = 0;
+  bool incumbents_done                                               = callbacks.empty();
+  CUOPT_LOG_INFO(std::string("[remote_solve] MIP incumbent callbacks: ") +
+                 std::to_string(callbacks.size()));
+  if (!callbacks.empty()) {
+    size_t n_vars = view.get_objective_coefficients().size();
+    for (auto* cb : callbacks) {
+      if (cb != nullptr) { cb->setup<f_t>(n_vars); }
+    }
+  }
+
+  std::string status;
+  int64_t result_size_bytes = 0;
+  int64_t max_message_bytes = 0;
+  std::string last_status;
+  while (true) {
+    std::string st_err;
+    if (!grpc_remote::check_status(
+          address, job_id, status, st_err, &result_size_bytes, &max_message_bytes)) {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error("gRPC CheckStatus failed: " + st_err,
+                                                         cuopt::error_type_t::RuntimeError));
+    }
+
+    if (!incumbents_done) {
+      std::vector<grpc_remote::Incumbent> incumbents;
+      int64_t next_index = incumbent_index;
+      bool job_complete  = false;
+      std::string inc_err;
+      if (grpc_remote::get_incumbents(
+            address, job_id, incumbent_index, 32, incumbents, next_index, job_complete, inc_err)) {
+        if (!incumbents.empty()) {
+          CUOPT_LOG_INFO(std::string("[remote_solve] Received ") +
+                         std::to_string(incumbents.size()) + " incumbents");
+        } else if (next_index != incumbent_index || job_complete) {
+          CUOPT_LOG_INFO(std::string("[remote_solve] GetIncumbents returned 0 incumbents (from=") +
+                         std::to_string(incumbent_index) + " next=" + std::to_string(next_index) +
+                         " done=" + std::to_string(static_cast<int>(job_complete)) + ")");
+        }
+        for (const auto& inc : incumbents) {
+          CUOPT_LOG_INFO(std::string("[remote_solve] Incumbent idx=") + std::to_string(inc.index) +
+                         " obj=" + std::to_string(inc.objective) +
+                         " vars=" + std::to_string(inc.assignment.size()));
+          invoke_incumbent_callbacks<f_t>(callbacks, inc.assignment, inc.objective);
+        }
+        incumbent_index = next_index;
+        if (job_complete) { incumbents_done = true; }
+      } else if (!inc_err.empty()) {
+        CUOPT_LOG_WARN(std::string("[remote_solve] GetIncumbents failed: ") + inc_err);
+      }
+    }
+
+    if (settings.log_to_console && status != last_status) {
+      CUOPT_LOG_INFO("gRPC status for job %s: %s", job_id.c_str(), status.c_str());
+      last_status = status;
+    }
+    if (status == "COMPLETED") { break; }
+    if (status == "FAILED" || status == "CANCELLED" || status == "NOT_FOUND") {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return mip_solution_t<i_t, f_t>(
+        cuopt::logic_error("Remote job did not complete successfully (status=" + status + ")",
+                           cuopt::error_type_t::RuntimeError));
+    }
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+  }
+
+  if (!incumbents_done) {
+    std::this_thread::sleep_for(std::chrono::milliseconds(100));
+    // Final drain after completion to catch any last incumbents.
+    for (int i = 0; i < 5; ++i) {
+      std::vector<grpc_remote::Incumbent> incumbents;
+      int64_t next_index = incumbent_index;
+      bool job_complete  = false;
+      std::string inc_err;
+      if (!grpc_remote::get_incumbents(
+            address, job_id, incumbent_index, 0, incumbents, next_index, job_complete, inc_err)) {
+        break;
+      }
+      if (incumbents.empty() && next_index == incumbent_index) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(50));
+        continue;
+      }
+      for (const auto& inc : incumbents) {
+        CUOPT_LOG_INFO(std::string("[remote_solve] Final drain incumbent idx=") +
+                       std::to_string(inc.index) + " obj=" + std::to_string(inc.objective) +
+                       " vars=" + std::to_string(inc.assignment.size()));
+        invoke_incumbent_callbacks<f_t>(callbacks, inc.assignment, inc.objective);
+      }
+      incumbent_index = next_index;
+      if (job_complete) { break; }
+    }
+  }
+
+  std::vector<uint8_t> solution_bytes;
+  std::string res_err;
+  bool use_get_result = false;
+  if (max_message_bytes < 0) {
+    use_get_result = true;
+  } else if (result_size_bytes > 0 && result_size_bytes <= max_message_bytes) {
+    use_get_result = true;
+  }
+  if (use_get_result) {
+    if (!grpc_remote::get_result(address, job_id, solution_bytes, res_err)) {
+      use_get_result = false;
+    }
+  }
+  if (!use_get_result) {
+    if (!grpc_remote::stream_result(address, job_id, solution_bytes, res_err)) {
+      stop_logs = true;
+      if (log_thread.joinable()) { log_thread.join(); }
+      grpc_remote::delete_result(address, job_id);
+      return mip_solution_t<i_t, f_t>(cuopt::logic_error("gRPC StreamResult failed: " + res_err,
+                                                         cuopt::error_type_t::RuntimeError));
+    }
+  }
+  if (settings.log_to_console) {
+    CUOPT_LOG_INFO("gRPC result fetch: %s (size=%ld max=%ld)",
+                   use_get_result ? "GetResult" : "StreamResult",
+                   static_cast<long>(result_size_bytes),
+                   static_cast<long>(max_message_bytes));
+  }
+  if (solution_bytes.empty()) {
+    stop_logs = true;
+    if (log_thread.joinable()) { log_thread.join(); }
+    grpc_remote::delete_result(address, job_id);
+    return mip_solution_t<i_t, f_t>(
+      cuopt::logic_error("gRPC result payload empty", cuopt::error_type_t::RuntimeError));
+  }
+
+  stop_logs = true;
+  if (log_thread.joinable()) { log_thread.join(); }
+
+  grpc_remote::delete_result(address, job_id);
+  return serializer->deserialize_mip_solution(solution_bytes);
+#else
+  (void)serializer;
+  return mip_solution_t<i_t, f_t>(cuopt::logic_error(
+    "gRPC support is disabled; remote solve is unavailable", cuopt::error_type_t::RuntimeError));
+#endif
+}
+
+//============================================================================
+// Cancel Job Remote
+//============================================================================
+
+cancel_job_result_t cancel_job_remote(const remote_solve_config_t& config,
+                                      const std::string& job_id)
+{
+  CUOPT_LOG_INFO(std::string("[remote_solve] Cancelling job ") + job_id + " on " + config.host +
+                 ":" + std::to_string(config.port));
+
+#if CUOPT_ENABLE_GRPC
+  const std::string address = config.host + ":" + std::to_string(config.port);
+  bool ok                   = false;
+  std::string status;
+  std::string msg;
+  std::string err;
+  bool rpc_ok = grpc_remote::cancel_job(address, job_id, ok, status, msg, err);
+  cancel_job_result_t result;
+  result.success = rpc_ok && ok;
+  result.message = rpc_ok ? msg : err;
+  if (status == "QUEUED")
+    result.job_status = remote_job_status_t::QUEUED;
+  else if (status == "PROCESSING")
+    result.job_status = remote_job_status_t::PROCESSING;
+  else if (status == "COMPLETED")
+    result.job_status = remote_job_status_t::COMPLETED;
+  else if (status == "FAILED")
+    result.job_status = remote_job_status_t::FAILED;
+  else if (status == "CANCELLED")
+    result.job_status = remote_job_status_t::CANCELLED;
+  else
+    result.job_status = remote_job_status_t::NOT_FOUND;
+  return result;
+#else
+  return cancel_job_result_t{false,
+                             "gRPC support is disabled; remote cancel is unavailable",
+                             remote_job_status_t::NOT_FOUND};
+#endif
+}
+
+// Explicit instantiations
+#if CUOPT_INSTANTIATE_FLOAT
+template optimization_problem_solution_t<int32_t, float> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, float>& view,
+  const pdlp_solver_settings_t<int32_t, float>& settings);
+
+template mip_solution_t<int32_t, float> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, float>& view,
+  const mip_solver_settings_t<int32_t, float>& settings);
+#endif
+
+#if CUOPT_INSTANTIATE_DOUBLE
+template optimization_problem_solution_t<int32_t, double> solve_lp_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, double>& view,
+  const pdlp_solver_settings_t<int32_t, double>& settings);
+
+template mip_solution_t<int32_t, double> solve_mip_remote(
+  const remote_solve_config_t& config,
+  const cuopt::mps_parser::data_model_view_t<int32_t, double>& view,
+  const mip_solver_settings_t<int32_t, double>& settings);
+#endif
+
+}  // namespace cuopt::linear_programming
diff --git a/cpp/src/linear_programming/utilities/remote_solve_grpc.cpp b/cpp/src/linear_programming/utilities/remote_solve_grpc.cpp
new file mode 100644
index 000000000..1c7fc5b02
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/remote_solve_grpc.cpp
@@ -0,0 +1,669 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#include "remote_solve_grpc.hpp"
+
+#include <cuopt_remote_service.grpc.pb.h>
+
+#include <grpcpp/grpcpp.h>
+
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <thread>
+
+namespace cuopt::linear_programming::grpc_remote {
+namespace {
+
+constexpr int64_t kMiB = 1024LL * 1024;
+
+int64_t get_submit_max_bytes();
+
+void append_stream_closed_hint(std::string& message, const grpc::Status& status)
+{
+  if (status.ok()) { return; }
+  if (message.find("max message") != std::string::npos) { return; }
+  if (status.error_code() == grpc::StatusCode::RESOURCE_EXHAUSTED ||
+      status.error_code() == grpc::StatusCode::CANCELLED) {
+    message +=
+      " (stream closed; check server --max-message-mb and client "
+      "CUOPT_GRPC_MAX_MESSAGE_MB)";
+  }
+}
+
+std::unique_ptr<cuopt::remote::CuOptRemoteService::Stub> make_stub(const std::string& address)
+{
+  grpc::ChannelArguments args;
+  // Align channel max sizes with client max message configuration.
+  const int64_t max_bytes = get_submit_max_bytes();
+  const int channel_limit =
+    (max_bytes <= 0)
+      ? -1
+      : static_cast<int>(std::min<int64_t>(max_bytes, std::numeric_limits<int>::max()));
+  args.SetMaxReceiveMessageSize(channel_limit);
+  args.SetMaxSendMessageSize(channel_limit);
+
+  auto channel = grpc::CreateCustomChannel(address, grpc::InsecureChannelCredentials(), args);
+  return cuopt::remote::CuOptRemoteService::NewStub(channel);
+}
+
+int64_t get_submit_max_bytes()
+{
+  constexpr int64_t kDefaultMax = 256LL * kMiB;
+
+  const char* val = std::getenv("CUOPT_GRPC_MAX_MESSAGE_MB");
+  if (!val || val[0] == '\0') { return kDefaultMax; }
+  try {
+    int64_t mb = std::stoll(val);
+    if (mb <= 0) { return std::numeric_limits<int64_t>::max(); }
+    return mb * kMiB;
+  } catch (...) {
+    return kDefaultMax;
+  }
+}
+
+bool submit_job(const std::string& address,
+                ProblemType problem_type,
+                const uint8_t* data,
+                size_t size,
+                std::string& job_id,
+                std::string& error_message,
+                grpc::StatusCode& status_code)
+{
+  job_id.clear();
+  error_message.clear();
+  status_code = grpc::StatusCode::OK;
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::SubmitJobRequest req;
+
+  if (problem_type == ProblemType::LP) {
+    auto* lp_req = req.mutable_lp_request();
+    if (!lp_req->ParseFromArray(data, static_cast<int>(size))) {
+      error_message = "SubmitJob: failed to parse LP request";
+      status_code   = grpc::StatusCode::INVALID_ARGUMENT;
+      return false;
+    }
+  } else {
+    auto* mip_req = req.mutable_mip_request();
+    if (!mip_req->ParseFromArray(data, static_cast<int>(size))) {
+      error_message = "SubmitJob: failed to parse MIP request";
+      status_code   = grpc::StatusCode::INVALID_ARGUMENT;
+      return false;
+    }
+  }
+
+  cuopt::remote::SubmitJobResponse resp;
+  grpc::Status st = stub->SubmitJob(&ctx, req, &resp);
+  if (!st.ok()) {
+    status_code   = st.error_code();
+    error_message = "SubmitJob: " + st.error_message();
+    return false;
+  }
+
+  job_id = resp.job_id();
+  if (job_id.empty()) {
+    error_message = "SubmitJob: no job_id returned";
+    status_code   = grpc::StatusCode::INTERNAL;
+    return false;
+  }
+
+  return true;
+}
+
+bool read_upload_start_ack(grpc::ClientReaderWriter<cuopt::remote::UploadJobRequest,
+                                                    cuopt::remote::UploadJobResponse>* stream,
+                           std::string& upload_id,
+                           int64_t& committed,
+                           int64_t* max_message_bytes_out,
+                           std::string& error_message)
+{
+  cuopt::remote::UploadJobResponse resp;
+  if (!stream->Read(&resp)) {
+    error_message = "UploadAndSubmit: failed to read response after start";
+    return false;
+  }
+  if (resp.has_error()) {
+    error_message = "UploadAndSubmit: " + resp.error().message();
+    if (max_message_bytes_out) { *max_message_bytes_out = resp.error().max_message_bytes(); }
+    return false;
+  }
+  if (!resp.has_ack()) {
+    error_message = "UploadAndSubmit: expected ack after start";
+    return false;
+  }
+  upload_id = resp.ack().upload_id();
+  committed = resp.ack().committed_size();
+  if (max_message_bytes_out) { *max_message_bytes_out = resp.ack().max_message_bytes(); }
+  return true;
+}
+
+bool write_chunk_and_read_ack(grpc::ClientReaderWriter<cuopt::remote::UploadJobRequest,
+                                                       cuopt::remote::UploadJobResponse>* stream,
+                              const std::string& upload_id,
+                              int64_t offset,
+                              const uint8_t* data,
+                              size_t n,
+                              int64_t& committed_out,
+                              int64_t* max_message_bytes_out,
+                              std::string& error_message)
+{
+  cuopt::remote::UploadJobRequest req;
+  auto* chunk = req.mutable_chunk();
+  chunk->set_upload_id(upload_id);
+  chunk->set_offset(offset);
+  chunk->set_data(reinterpret_cast<const char*>(data), n);
+
+  if (!stream->Write(req)) {
+    error_message = "UploadAndSubmit: failed to write chunk";
+    return false;
+  }
+
+  cuopt::remote::UploadJobResponse resp;
+  if (!stream->Read(&resp)) {
+    error_message = "UploadAndSubmit: failed to read response after chunk";
+    return false;
+  }
+  if (resp.has_error()) {
+    error_message = "UploadAndSubmit: " + resp.error().message();
+    committed_out = resp.error().committed_size();
+    if (max_message_bytes_out) { *max_message_bytes_out = resp.error().max_message_bytes(); }
+    return false;
+  }
+  if (!resp.has_ack()) {
+    error_message = "UploadAndSubmit: expected ack after chunk";
+    return false;
+  }
+
+  committed_out = resp.ack().committed_size();
+  if (max_message_bytes_out) { *max_message_bytes_out = resp.ack().max_message_bytes(); }
+  return true;
+}
+
+}  // namespace
+
+bool submit_or_upload(const std::string& address,
+                      ProblemType problem_type,
+                      const uint8_t* data,
+                      size_t size,
+                      std::string& job_id,
+                      std::string& error_message,
+                      bool* used_upload_out,
+                      int64_t* max_bytes_out)
+{
+  try {
+    const int64_t max_bytes = get_submit_max_bytes();
+    if (max_bytes_out) { *max_bytes_out = max_bytes; }
+    if (max_bytes >= 0 && static_cast<int64_t>(size) <= max_bytes) {
+      grpc::StatusCode status_code = grpc::StatusCode::OK;
+      if (submit_job(address, problem_type, data, size, job_id, error_message, status_code)) {
+        if (used_upload_out) { *used_upload_out = false; }
+        return true;
+      }
+      if (status_code != grpc::StatusCode::RESOURCE_EXHAUSTED) { return false; }
+    }
+
+    if (used_upload_out) { *used_upload_out = true; }
+    return upload_and_submit(address, problem_type, data, size, job_id, error_message);
+  } catch (const std::exception& ex) {
+    error_message = std::string("SubmitOrUpload: exception: ") + ex.what();
+    std::cerr << "[remote_solve] SubmitOrUpload exception: " << ex.what() << "\n";
+    std::cerr.flush();
+    return false;
+  } catch (...) {
+    error_message = "SubmitOrUpload: unknown exception";
+    std::cerr << "[remote_solve] SubmitOrUpload unknown exception\n";
+    std::cerr.flush();
+    return false;
+  }
+}
+
+bool upload_and_submit(const std::string& address,
+                       ProblemType problem_type,
+                       const uint8_t* data,
+                       size_t size,
+                       std::string& job_id,
+                       std::string& error_message)
+{
+  try {
+    constexpr size_t kMinChunkSize = 4 * 1024;
+    size_t default_chunk_size      = 1 << 20;  // 1 MiB
+    if (const char* chunk_kb = std::getenv("CUOPT_GRPC_UPLOAD_CHUNK_KB")) {
+      try {
+        auto kb = std::stoll(chunk_kb);
+        if (kb > 0) { default_chunk_size = static_cast<size_t>(kb) * 1024; }
+      } catch (...) {
+      }
+    }
+
+    auto compute_chunk_size = [&](int64_t max_bytes, size_t fallback) -> size_t {
+      size_t chunk_size = fallback;
+      if (max_bytes > 0 && max_bytes < static_cast<int64_t>(chunk_size)) {
+        chunk_size = static_cast<size_t>(max_bytes / 2);
+        if (chunk_size < kMinChunkSize) { chunk_size = kMinChunkSize; }
+      }
+      return chunk_size;
+    };
+
+    int64_t last_max_message_bytes = 0;
+    auto parse_max_mb_from_error   = [](const std::string& msg) -> int64_t {
+      const char* key = "max_message_mb=";
+      auto pos        = msg.find(key);
+      if (pos == std::string::npos) { return 0; }
+      pos += std::strlen(key);
+      const char* start = msg.c_str() + pos;
+      char* end         = nullptr;
+      long long mb      = std::strtoll(start, &end, 10);
+      if (end == start || mb <= 0) { return 0; }
+      return mb * kMiB;
+    };
+
+    auto do_upload = [&](size_t initial_chunk_size, std::string& err_out) -> bool {
+      job_id.clear();
+      err_out.clear();
+
+      auto stub = make_stub(address);
+      grpc::ClientContext ctx;
+      int64_t timeout_ms = 30000;
+      if (const char* timeout_env = std::getenv("CUOPT_GRPC_UPLOAD_TIMEOUT_MS")) {
+        try {
+          auto parsed = std::stoll(timeout_env);
+          if (parsed > 0) { timeout_ms = parsed; }
+        } catch (...) {
+        }
+      }
+      ctx.set_deadline(std::chrono::system_clock::now() + std::chrono::milliseconds(timeout_ms));
+      auto stream = stub->UploadAndSubmit(&ctx);
+
+      cuopt::remote::UploadJobRequest start_req;
+      auto* start = start_req.mutable_start();
+      start->set_problem_type(problem_type == ProblemType::LP ? cuopt::remote::LP
+                                                              : cuopt::remote::MIP);
+      start->set_resume(false);
+      start->set_total_size(static_cast<int64_t>(size));
+
+      if (!stream->Write(start_req)) {
+        err_out = "UploadAndSubmit: failed to write start";
+        return false;
+      }
+
+      std::string upload_id;
+      int64_t committed         = 0;
+      int64_t max_message_bytes = 0;
+      if (!read_upload_start_ack(stream.get(), upload_id, committed, &max_message_bytes, err_out)) {
+        if (max_message_bytes != 0) { last_max_message_bytes = max_message_bytes; }
+        grpc::Status st = stream->Finish();
+        if (!st.ok()) {
+          err_out +=
+            " (grpc_status=" + std::to_string(st.error_code()) + " " + st.error_message() + ")";
+          append_stream_closed_hint(err_out, st);
+        }
+        if (last_max_message_bytes == 0) {
+          last_max_message_bytes = parse_max_mb_from_error(err_out);
+        }
+        return false;
+      }
+
+      int64_t active_max_bytes = get_submit_max_bytes();
+      if (max_message_bytes != 0) {
+        active_max_bytes       = max_message_bytes;
+        last_max_message_bytes = max_message_bytes;
+      }
+      size_t chunk_size = compute_chunk_size(active_max_bytes, initial_chunk_size);
+
+      while (static_cast<size_t>(committed) < size) {
+        size_t off = static_cast<size_t>(committed);
+        size_t n   = std::min(chunk_size, size - off);
+
+        int64_t committed2    = committed;
+        int64_t ack_max_bytes = 0;
+        if (!write_chunk_and_read_ack(stream.get(),
+                                      upload_id,
+                                      committed,
+                                      data + off,
+                                      n,
+                                      committed2,
+                                      &ack_max_bytes,
+                                      err_out)) {
+          if (ack_max_bytes != 0) { last_max_message_bytes = ack_max_bytes; }
+          grpc::Status st = stream->Finish();
+          if (!st.ok()) {
+            err_out +=
+              " (grpc_status=" + std::to_string(st.error_code()) + " " + st.error_message() + ")";
+            append_stream_closed_hint(err_out, st);
+          }
+          if (last_max_message_bytes == 0) {
+            last_max_message_bytes = parse_max_mb_from_error(err_out);
+          }
+          return false;
+        }
+        committed = committed2;
+        if (ack_max_bytes != 0) {
+          active_max_bytes       = ack_max_bytes;
+          last_max_message_bytes = ack_max_bytes;
+          chunk_size             = compute_chunk_size(active_max_bytes, chunk_size);
+        }
+      }
+
+      cuopt::remote::UploadJobRequest finish_req;
+      finish_req.mutable_finish()->set_upload_id(upload_id);
+      stream->Write(finish_req);
+      stream->WritesDone();
+
+      cuopt::remote::UploadJobResponse resp;
+      while (stream->Read(&resp)) {
+        if (resp.has_submit()) {
+          job_id = resp.submit().job_id();
+          break;
+        }
+        if (resp.has_error()) {
+          err_out = "UploadAndSubmit: " + resp.error().message();
+          break;
+        }
+      }
+
+      grpc::Status st = stream->Finish();
+      if (!st.ok()) {
+        if (err_out.empty()) {
+          err_out = "UploadAndSubmit: grpc_status=" + std::to_string(st.error_code()) + " " +
+                    st.error_message();
+        }
+        append_stream_closed_hint(err_out, st);
+        if (last_max_message_bytes == 0) {
+          last_max_message_bytes = parse_max_mb_from_error(err_out);
+        }
+        return false;
+      }
+      if (job_id.empty()) {
+        if (err_out.empty()) { err_out = "UploadAndSubmit: no job_id returned"; }
+        return false;
+      }
+
+      return true;
+    };
+
+    size_t first_chunk = default_chunk_size;
+    if (do_upload(first_chunk, error_message)) { return true; }
+
+    std::cout << "[remote_solve] UploadAndSubmit failed: " << error_message << "\n";
+    if (last_max_message_bytes > 0) {
+      std::cout << "[remote_solve] Server max message MiB: " << (last_max_message_bytes / kMiB)
+                << "\n";
+    }
+    std::cout.flush();
+
+    size_t retry_chunk = first_chunk / 2;
+    if (last_max_message_bytes > 0) {
+      retry_chunk = compute_chunk_size(last_max_message_bytes, first_chunk);
+      if (retry_chunk >= first_chunk) { retry_chunk = first_chunk / 2; }
+    }
+    if (retry_chunk < kMinChunkSize) { return false; }
+    std::cout << "[remote_solve] UploadAndSubmit retry with chunk_size=" << retry_chunk << "\n";
+    std::cout.flush();
+    return do_upload(retry_chunk, error_message);
+  } catch (const std::exception& ex) {
+    error_message = std::string("UploadAndSubmit: exception: ") + ex.what();
+    std::cerr << "[remote_solve] UploadAndSubmit exception: " << ex.what() << "\n";
+    std::cerr.flush();
+    return false;
+  } catch (...) {
+    error_message = "UploadAndSubmit: unknown exception";
+    std::cerr << "[remote_solve] UploadAndSubmit unknown exception\n";
+    std::cerr.flush();
+    return false;
+  }
+}
+
+bool check_status(const std::string& address,
+                  const std::string& job_id,
+                  std::string& status_out,
+                  std::string& error_message,
+                  int64_t* result_size_bytes_out,
+                  int64_t* max_message_bytes_out)
+{
+  status_out.clear();
+  error_message.clear();
+  if (result_size_bytes_out) { *result_size_bytes_out = 0; }
+  if (max_message_bytes_out) { *max_message_bytes_out = 0; }
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::StatusRequest req;
+  req.set_job_id(job_id);
+  cuopt::remote::StatusResponse resp;
+
+  grpc::Status st = stub->CheckStatus(&ctx, req, &resp);
+  if (!st.ok()) {
+    error_message = st.error_message();
+    return false;
+  }
+
+  if (result_size_bytes_out) { *result_size_bytes_out = resp.result_size_bytes(); }
+  if (max_message_bytes_out) { *max_message_bytes_out = resp.max_message_bytes(); }
+
+  switch (resp.job_status()) {
+    case cuopt::remote::QUEUED: status_out = "QUEUED"; break;
+    case cuopt::remote::PROCESSING: status_out = "PROCESSING"; break;
+    case cuopt::remote::COMPLETED: status_out = "COMPLETED"; break;
+    case cuopt::remote::FAILED: status_out = "FAILED"; break;
+    case cuopt::remote::CANCELLED: status_out = "CANCELLED"; break;
+    case cuopt::remote::NOT_FOUND: status_out = "NOT_FOUND"; break;
+    default: status_out = "UNKNOWN"; break;
+  }
+
+  return true;
+}
+
+bool stream_result(const std::string& address,
+                   const std::string& job_id,
+                   std::vector<uint8_t>& out,
+                   std::string& error_message)
+{
+  out.clear();
+  error_message.clear();
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::GetResultRequest req;
+  req.set_job_id(job_id);
+
+  std::unique_ptr<grpc::ClientReader<cuopt::remote::ResultChunk>> reader =
+    stub->StreamResult(&ctx, req);
+
+  cuopt::remote::ResultChunk chunk;
+  while (reader->Read(&chunk)) {
+    if (!chunk.error_message().empty()) {
+      error_message = chunk.error_message();
+      break;
+    }
+    if (chunk.done()) { break; }
+    const std::string& data = chunk.data();
+    out.insert(out.end(), data.begin(), data.end());
+  }
+
+  grpc::Status st = reader->Finish();
+  if (!st.ok()) {
+    if (error_message.empty()) { error_message = st.error_message(); }
+    return false;
+  }
+  if (!error_message.empty()) { return false; }
+  return true;
+}
+
+bool get_result(const std::string& address,
+                const std::string& job_id,
+                std::vector<uint8_t>& out,
+                std::string& error_message)
+{
+  out.clear();
+  error_message.clear();
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::GetResultRequest req;
+  req.set_job_id(job_id);
+
+  cuopt::remote::ResultResponse resp;
+  grpc::Status st = stub->GetResult(&ctx, req, &resp);
+  if (!st.ok()) {
+    error_message = st.error_message();
+    return false;
+  }
+  if (resp.status() != cuopt::remote::SUCCESS) {
+    error_message = resp.error_message().empty() ? "GetResult failed" : resp.error_message();
+    return false;
+  }
+
+  if (resp.has_lp_solution()) {
+    const auto& lp = resp.lp_solution();
+    out.resize(lp.ByteSizeLong());
+    if (!lp.SerializeToArray(out.data(), out.size())) {
+      error_message = "GetResult: failed to serialize LP solution";
+      return false;
+    }
+    return true;
+  }
+  if (resp.has_mip_solution()) {
+    const auto& mip = resp.mip_solution();
+    out.resize(mip.ByteSizeLong());
+    if (!mip.SerializeToArray(out.data(), out.size())) {
+      error_message = "GetResult: failed to serialize MIP solution";
+      return false;
+    }
+    return true;
+  }
+
+  error_message = "GetResult: missing solution payload";
+  return false;
+}
+
+void delete_result(const std::string& address, const std::string& job_id)
+{
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::DeleteRequest req;
+  req.set_job_id(job_id);
+  cuopt::remote::DeleteResponse resp;
+  (void)stub->DeleteResult(&ctx, req, &resp);
+}
+
+bool cancel_job(const std::string& address,
+                const std::string& job_id,
+                bool& success_out,
+                std::string& status_out,
+                std::string& message_out,
+                std::string& error_message)
+{
+  success_out = false;
+  status_out.clear();
+  message_out.clear();
+  error_message.clear();
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::CancelRequest req;
+  req.set_job_id(job_id);
+  cuopt::remote::CancelResponse resp;
+
+  grpc::Status st = stub->CancelJob(&ctx, req, &resp);
+  if (!st.ok()) {
+    error_message = st.error_message();
+    return false;
+  }
+
+  success_out = (resp.status() == cuopt::remote::SUCCESS);
+  message_out = resp.message();
+  switch (resp.job_status()) {
+    case cuopt::remote::QUEUED: status_out = "QUEUED"; break;
+    case cuopt::remote::PROCESSING: status_out = "PROCESSING"; break;
+    case cuopt::remote::COMPLETED: status_out = "COMPLETED"; break;
+    case cuopt::remote::FAILED: status_out = "FAILED"; break;
+    case cuopt::remote::CANCELLED: status_out = "CANCELLED"; break;
+    case cuopt::remote::NOT_FOUND: status_out = "NOT_FOUND"; break;
+    default: status_out = "UNKNOWN"; break;
+  }
+
+  return true;
+}
+
+void stream_logs_to_stdout(const std::string& address,
+                           const std::string& job_id,
+                           volatile bool* stop_flag,
+                           const std::string& print_prefix)
+{
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::StreamLogsRequest req;
+  req.set_job_id(job_id);
+  req.set_from_byte(0);
+
+  std::unique_ptr<grpc::ClientReader<cuopt::remote::LogMessage>> reader =
+    stub->StreamLogs(&ctx, req);
+
+  cuopt::remote::LogMessage msg;
+  while (reader->Read(&msg)) {
+    if (stop_flag != nullptr && *stop_flag) { ctx.TryCancel(); }
+    if (!msg.line().empty()) {
+      if (!print_prefix.empty()) { std::cout << print_prefix; }
+      std::cout << msg.line() << "\n";
+      std::cout.flush();
+    }
+    if (msg.job_complete()) { break; }
+  }
+  reader->Finish();
+}
+
+bool get_incumbents(const std::string& address,
+                    const std::string& job_id,
+                    int64_t from_index,
+                    int32_t max_count,
+                    std::vector<Incumbent>& incumbents_out,
+                    int64_t& next_index_out,
+                    bool& job_complete_out,
+                    std::string& error_message)
+{
+  incumbents_out.clear();
+  next_index_out   = from_index;
+  job_complete_out = false;
+  error_message.clear();
+
+  auto stub = make_stub(address);
+  grpc::ClientContext ctx;
+  cuopt::remote::IncumbentRequest req;
+  req.set_job_id(job_id);
+  req.set_from_index(from_index);
+  req.set_max_count(max_count);
+
+  cuopt::remote::IncumbentResponse resp;
+  grpc::Status st = stub->GetIncumbents(&ctx, req, &resp);
+  if (!st.ok()) {
+    error_message = st.error_message();
+    return false;
+  }
+
+  incumbents_out.reserve(resp.incumbents_size());
+  for (const auto& inc : resp.incumbents()) {
+    Incumbent entry;
+    entry.index     = inc.index();
+    entry.objective = inc.objective();
+    entry.assignment.reserve(inc.assignment_size());
+    for (int i = 0; i < inc.assignment_size(); ++i) {
+      entry.assignment.push_back(inc.assignment(i));
+    }
+    incumbents_out.push_back(std::move(entry));
+  }
+
+  next_index_out   = resp.next_index();
+  job_complete_out = resp.job_complete();
+  return true;
+}
+
+}  // namespace cuopt::linear_programming::grpc_remote
diff --git a/cpp/src/linear_programming/utilities/remote_solve_grpc.hpp b/cpp/src/linear_programming/utilities/remote_solve_grpc.hpp
new file mode 100644
index 000000000..286f70a3a
--- /dev/null
+++ b/cpp/src/linear_programming/utilities/remote_solve_grpc.hpp
@@ -0,0 +1,89 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace cuopt::linear_programming::grpc_remote {
+
+enum class ProblemType { LP = 0, MIP = 1 };
+
+struct Incumbent {
+  int64_t index    = 0;
+  double objective = 0.0;
+  std::vector<double> assignment;
+};
+
+// Upload serialized SolveLPRequest / SolveMIPRequest bytes and enqueue a job.
+bool upload_and_submit(const std::string& address,
+                       ProblemType problem_type,
+                       const uint8_t* data,
+                       size_t size,
+                       std::string& job_id,
+                       std::string& error_message);
+
+// Submit as unary if payload fits, otherwise fall back to UploadAndSubmit.
+bool submit_or_upload(const std::string& address,
+                      ProblemType problem_type,
+                      const uint8_t* data,
+                      size_t size,
+                      std::string& job_id,
+                      std::string& error_message,
+                      bool* used_upload_out  = nullptr,
+                      int64_t* max_bytes_out = nullptr);
+
+// Return one of: "QUEUED", "PROCESSING", "COMPLETED", "FAILED", "CANCELLED", "NOT_FOUND"
+bool check_status(const std::string& address,
+                  const std::string& job_id,
+                  std::string& status_out,
+                  std::string& error_message,
+                  int64_t* result_size_bytes_out = nullptr,
+                  int64_t* max_message_bytes_out = nullptr);
+
+// Stream raw serialized solution bytes (LPSolution or MIPSolution) into out.
+bool stream_result(const std::string& address,
+                   const std::string& job_id,
+                   std::vector<uint8_t>& out,
+                   std::string& error_message);
+
+// Unary GetResult (returns full serialized solution bytes).
+bool get_result(const std::string& address,
+                const std::string& job_id,
+                std::vector<uint8_t>& out,
+                std::string& error_message);
+
+// Best-effort delete of server-side stored result for a job.
+void delete_result(const std::string& address, const std::string& job_id);
+
+// Best-effort cancel.
+bool cancel_job(const std::string& address,
+                const std::string& job_id,
+                bool& success_out,
+                std::string& status_out,
+                std::string& message_out,
+                std::string& error_message);
+
+// Stream logs to stdout until stop_flag is true or server indicates job_complete.
+// If print_prefix is non-empty, it will be printed before each log line.
+void stream_logs_to_stdout(const std::string& address,
+                           const std::string& job_id,
+                           volatile bool* stop_flag,
+                           const std::string& print_prefix);
+
+// Fetch incumbent solutions for a job starting at from_index.
+bool get_incumbents(const std::string& address,
+                    const std::string& job_id,
+                    int64_t from_index,
+                    int32_t max_count,
+                    std::vector<Incumbent>& incumbents_out,
+                    int64_t& next_index_out,
+                    bool& job_complete_out,
+                    std::string& error_message);
+
+}  // namespace cuopt::linear_programming::grpc_remote
diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu
index 766ed09cb..21654fce5 100644
--- a/cpp/src/mip/diversity/population.cu
+++ b/cpp/src/mip/diversity/population.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -265,6 +265,11 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
   bool better_solution_found = is_better_than_best_feasible(sol);
   auto user_callbacks        = context.settings.get_mip_callbacks();
   if (better_solution_found) {
+    if (!user_callbacks.empty()) {
+      CUOPT_LOG_INFO("Population: incumbent callbacks=%zu objective=%g",
+                     user_callbacks.size(),
+                     sol.get_user_objective());
+    }
     if (context.settings.benchmark_info_ptr != nullptr) {
       context.settings.benchmark_info_ptr->last_improvement_of_best_feasible = timer.elapsed_time();
     }
@@ -275,6 +280,7 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
 
     for (auto callback : user_callbacks) {
       if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) {
+        callback->set_memory_location(internals::callback_memory_location::DEVICE);
         auto get_sol_callback = static_cast<internals::get_solution_callback_t*>(callback);
         solution_t<i_t, f_t> temp_sol(sol);
         problem_ptr->post_process_assignment(temp_sol.assignment);
@@ -298,7 +304,7 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
         f_t user_objective =
           temp_sol.problem_ptr->get_user_obj_from_solver_obj(temp_sol.get_objective());
         user_objective_vec.set_element_async(0, user_objective, temp_sol.handle_ptr->get_stream());
-        CUOPT_LOG_DEBUG("Returning incumbent solution with objective %g", user_objective);
+        CUOPT_LOG_INFO("Returning incumbent solution with objective %g", user_objective);
         get_sol_callback->get_solution(temp_sol.assignment.data(), user_objective_vec.data());
       }
     }
@@ -311,6 +317,7 @@ void population_t<i_t, f_t>::run_solution_callbacks(solution_t<i_t, f_t>& sol)
 
   for (auto callback : user_callbacks) {
     if (callback->get_type() == internals::base_solution_callback_type::SET_SOLUTION) {
+      callback->set_memory_location(internals::callback_memory_location::DEVICE);
       auto set_sol_callback = static_cast<internals::set_solution_callback_t*>(callback);
       rmm::device_uvector<f_t> incumbent_assignment(
         problem_ptr->original_problem_ptr->get_n_variables(), sol.handle_ptr->get_stream());
diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu
index e6a392d40..5d1de256b 100644
--- a/cpp/src/mip/solve.cu
+++ b/cpp/src/mip/solve.cu
@@ -31,6 +31,8 @@
 
 #include <mps_parser/mps_data_model.hpp>
 
+#include <cuopt/linear_programming/utilities/remote_solve.hpp>
+
 #include <raft/sparse/detail/cusparse_macros.h>
 #include <raft/sparse/detail/cusparse_wrappers.h>
 #include <raft/common/nvtx.hpp>
@@ -293,13 +295,276 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
   }
 }
 
+// Helper to create a data_model_view_t from mps_data_model_t (for remote solve path)
+template <typename i_t, typename f_t>
+static data_model_view_t<i_t, f_t> create_view_from_mps_data_model(
+  const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model)
+{
+  data_model_view_t<i_t, f_t> view;
+
+  view.set_maximize(mps_data_model.get_sense());
+
+  if (!mps_data_model.get_constraint_matrix_values().empty()) {
+    view.set_csr_constraint_matrix(mps_data_model.get_constraint_matrix_values().data(),
+                                   mps_data_model.get_constraint_matrix_values().size(),
+                                   mps_data_model.get_constraint_matrix_indices().data(),
+                                   mps_data_model.get_constraint_matrix_indices().size(),
+                                   mps_data_model.get_constraint_matrix_offsets().data(),
+                                   mps_data_model.get_constraint_matrix_offsets().size());
+  }
+
+  if (!mps_data_model.get_constraint_bounds().empty()) {
+    view.set_constraint_bounds(mps_data_model.get_constraint_bounds().data(),
+                               mps_data_model.get_constraint_bounds().size());
+  }
+
+  if (!mps_data_model.get_objective_coefficients().empty()) {
+    view.set_objective_coefficients(mps_data_model.get_objective_coefficients().data(),
+                                    mps_data_model.get_objective_coefficients().size());
+  }
+
+  view.set_objective_scaling_factor(mps_data_model.get_objective_scaling_factor());
+  view.set_objective_offset(mps_data_model.get_objective_offset());
+
+  if (!mps_data_model.get_variable_lower_bounds().empty()) {
+    view.set_variable_lower_bounds(mps_data_model.get_variable_lower_bounds().data(),
+                                   mps_data_model.get_variable_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_upper_bounds().empty()) {
+    view.set_variable_upper_bounds(mps_data_model.get_variable_upper_bounds().data(),
+                                   mps_data_model.get_variable_upper_bounds().size());
+  }
+
+  if (!mps_data_model.get_variable_types().empty()) {
+    view.set_variable_types(mps_data_model.get_variable_types().data(),
+                            mps_data_model.get_variable_types().size());
+  }
+
+  if (!mps_data_model.get_row_types().empty()) {
+    view.set_row_types(mps_data_model.get_row_types().data(),
+                       mps_data_model.get_row_types().size());
+  }
+
+  if (!mps_data_model.get_constraint_lower_bounds().empty()) {
+    view.set_constraint_lower_bounds(mps_data_model.get_constraint_lower_bounds().data(),
+                                     mps_data_model.get_constraint_lower_bounds().size());
+  }
+
+  if (!mps_data_model.get_constraint_upper_bounds().empty()) {
+    view.set_constraint_upper_bounds(mps_data_model.get_constraint_upper_bounds().data(),
+                                     mps_data_model.get_constraint_upper_bounds().size());
+  }
+
+  view.set_objective_name(mps_data_model.get_objective_name());
+  view.set_problem_name(mps_data_model.get_problem_name());
+
+  if (!mps_data_model.get_variable_names().empty()) {
+    view.set_variable_names(mps_data_model.get_variable_names());
+  }
+
+  if (!mps_data_model.get_row_names().empty()) {
+    view.set_row_names(mps_data_model.get_row_names());
+  }
+
+  if (!mps_data_model.get_initial_primal_solution().empty()) {
+    view.set_initial_primal_solution(mps_data_model.get_initial_primal_solution().data(),
+                                     mps_data_model.get_initial_primal_solution().size());
+  }
+
+  if (!mps_data_model.get_initial_dual_solution().empty()) {
+    view.set_initial_dual_solution(mps_data_model.get_initial_dual_solution().data(),
+                                   mps_data_model.get_initial_dual_solution().size());
+  }
+
+  return view;
+}
+
+// Helper struct to hold CPU copies of GPU data for remote solve
+template <typename i_t, typename f_t>
+struct cpu_problem_data_t {
+  std::vector<f_t> A_values;
+  std::vector<i_t> A_indices;
+  std::vector<i_t> A_offsets;
+  std::vector<f_t> constraint_bounds;
+  std::vector<f_t> constraint_lower_bounds;
+  std::vector<f_t> constraint_upper_bounds;
+  std::vector<f_t> objective_coefficients;
+  std::vector<f_t> variable_lower_bounds;
+  std::vector<f_t> variable_upper_bounds;
+  std::vector<char> variable_types;
+  std::vector<f_t> quadratic_objective_values;
+  std::vector<i_t> quadratic_objective_indices;
+  std::vector<i_t> quadratic_objective_offsets;
+  bool maximize;
+  f_t objective_scaling_factor;
+  f_t objective_offset;
+
+  data_model_view_t<i_t, f_t> create_view() const
+  {
+    data_model_view_t<i_t, f_t> v;
+    v.set_maximize(maximize);
+    v.set_objective_scaling_factor(objective_scaling_factor);
+    v.set_objective_offset(objective_offset);
+
+    if (!A_values.empty()) {
+      v.set_csr_constraint_matrix(A_values.data(),
+                                  A_values.size(),
+                                  A_indices.data(),
+                                  A_indices.size(),
+                                  A_offsets.data(),
+                                  A_offsets.size());
+    }
+    if (!constraint_bounds.empty()) {
+      v.set_constraint_bounds(constraint_bounds.data(), constraint_bounds.size());
+    }
+    if (!constraint_lower_bounds.empty() && !constraint_upper_bounds.empty()) {
+      v.set_constraint_lower_bounds(constraint_lower_bounds.data(), constraint_lower_bounds.size());
+      v.set_constraint_upper_bounds(constraint_upper_bounds.data(), constraint_upper_bounds.size());
+    }
+    if (!objective_coefficients.empty()) {
+      v.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size());
+    }
+    if (!variable_lower_bounds.empty()) {
+      v.set_variable_lower_bounds(variable_lower_bounds.data(), variable_lower_bounds.size());
+    }
+    if (!variable_upper_bounds.empty()) {
+      v.set_variable_upper_bounds(variable_upper_bounds.data(), variable_upper_bounds.size());
+    }
+    if (!variable_types.empty()) {
+      v.set_variable_types(variable_types.data(), variable_types.size());
+    }
+    if (!quadratic_objective_values.empty()) {
+      v.set_quadratic_objective_matrix(quadratic_objective_values.data(),
+                                       quadratic_objective_values.size(),
+                                       quadratic_objective_indices.data(),
+                                       quadratic_objective_indices.size(),
+                                       quadratic_objective_offsets.data(),
+                                       quadratic_objective_offsets.size());
+    }
+    v.set_is_device_memory(false);
+    return v;
+  }
+};
+
+// Helper to copy GPU view data to CPU
+template <typename i_t, typename f_t>
+cpu_problem_data_t<i_t, f_t> copy_view_to_cpu(raft::handle_t const* handle_ptr,
+                                              const data_model_view_t<i_t, f_t>& gpu_view)
+{
+  cpu_problem_data_t<i_t, f_t> cpu_data;
+  auto stream = handle_ptr->get_stream();
+
+  cpu_data.maximize                 = gpu_view.get_sense();
+  cpu_data.objective_scaling_factor = gpu_view.get_objective_scaling_factor();
+  cpu_data.objective_offset         = gpu_view.get_objective_offset();
+
+  auto copy_to_host = [stream](auto& dst_vec, auto src_span) {
+    if (src_span.size() > 0) {
+      dst_vec.resize(src_span.size());
+      raft::copy(dst_vec.data(), src_span.data(), src_span.size(), stream);
+    }
+  };
+
+  copy_to_host(cpu_data.A_values, gpu_view.get_constraint_matrix_values());
+  copy_to_host(cpu_data.A_indices, gpu_view.get_constraint_matrix_indices());
+  copy_to_host(cpu_data.A_offsets, gpu_view.get_constraint_matrix_offsets());
+  copy_to_host(cpu_data.constraint_bounds, gpu_view.get_constraint_bounds());
+  copy_to_host(cpu_data.constraint_lower_bounds, gpu_view.get_constraint_lower_bounds());
+  copy_to_host(cpu_data.constraint_upper_bounds, gpu_view.get_constraint_upper_bounds());
+  copy_to_host(cpu_data.objective_coefficients, gpu_view.get_objective_coefficients());
+  copy_to_host(cpu_data.variable_lower_bounds, gpu_view.get_variable_lower_bounds());
+  copy_to_host(cpu_data.variable_upper_bounds, gpu_view.get_variable_upper_bounds());
+  copy_to_host(cpu_data.quadratic_objective_values, gpu_view.get_quadratic_objective_values());
+  copy_to_host(cpu_data.quadratic_objective_indices, gpu_view.get_quadratic_objective_indices());
+  copy_to_host(cpu_data.quadratic_objective_offsets, gpu_view.get_quadratic_objective_offsets());
+
+  // Variable types need special handling (char array)
+  auto var_types_span = gpu_view.get_variable_types();
+  if (var_types_span.size() > 0) {
+    cpu_data.variable_types.resize(var_types_span.size());
+    cudaMemcpyAsync(cpu_data.variable_types.data(),
+                    var_types_span.data(),
+                    var_types_span.size() * sizeof(char),
+                    cudaMemcpyDeviceToHost,
+                    stream);
+  }
+
+  // Synchronize to ensure all copies are complete
+  cudaStreamSynchronize(stream);
+
+  return cpu_data;
+}
+
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t> solve_mip(
   raft::handle_t const* handle_ptr,
   const cuopt::mps_parser::mps_data_model_t<i_t, f_t>& mps_data_model,
   mip_solver_settings_t<i_t, f_t> const& settings)
 {
-  auto op_problem = mps_data_model_to_optimization_problem(handle_ptr, mps_data_model);
+  // Create a view pointing to CPU data and delegate to the view-based overload.
+  // The view overload handles local vs remote solve automatically.
+  auto view = create_view_from_mps_data_model(mps_data_model);
+  view.set_is_device_memory(false);  // MPS data is always in CPU memory
+  return solve_mip(handle_ptr, view, settings);
+}
+
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t> solve_mip(raft::handle_t const* handle_ptr,
+                                   const data_model_view_t<i_t, f_t>& view,
+                                   mip_solver_settings_t<i_t, f_t> const& settings)
+{
+  // Initialize logger for this overload (needed for early returns)
+  init_logger_t log(settings.log_file, settings.log_to_console);
+
+  // Check for remote solve configuration first
+  auto remote_config = get_remote_solve_config();
+
+  if (view.is_device_memory()) {
+    if (remote_config.has_value()) {
+      // GPU data + remote solve requested: need valid handle to copy GPU→CPU
+      if (handle_ptr == nullptr) {
+        CUOPT_LOG_ERROR(
+          "[solve_mip] Remote solve requested with GPU data but no CUDA handle. "
+          "This is an internal error - GPU data should not exist without CUDA initialization.");
+        return mip_solution_t<i_t, f_t>(
+          cuopt::logic_error("No CUDA handle for GPU data", cuopt::error_type_t::RuntimeError));
+      }
+      CUOPT_LOG_WARN(
+        "[solve_mip] Remote solve requested but data is on GPU. "
+        "Copying to CPU for serialization (performance impact).");
+      auto cpu_data = copy_view_to_cpu(handle_ptr, view);
+      auto cpu_view = cpu_data.create_view();
+
+      CUOPT_LOG_INFO(
+        "[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+        remote_config->host.c_str(),
+        remote_config->port);
+      // Remote solve with GPU data - serialize cpu_view and send to remote server
+      return solve_mip_remote(*remote_config, cpu_view, settings);
+    }
+
+    // Local solve: data already on GPU - convert view to optimization_problem_t and solve
+    auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
+    return solve_mip(op_problem, settings);
+  }
+
+  // Data is on CPU
+  if (remote_config.has_value()) {
+    CUOPT_LOG_INFO("[solve_mip] Remote solve detected: CUOPT_REMOTE_HOST=%s, CUOPT_REMOTE_PORT=%d",
+                   remote_config->host.c_str(),
+                   remote_config->port);
+    // Remote solve with CPU data - serialize view and send to remote server
+    return solve_mip_remote(*remote_config, view, settings);
+  }
+
+  // Local solve with CPU data: copy to GPU and solve
+  if (handle_ptr == nullptr) {
+    CUOPT_LOG_ERROR("[solve_mip] Local solve requested but handle_ptr is null.");
+    return mip_solution_t<i_t, f_t>(
+      cuopt::logic_error("No CUDA handle for CPU->GPU copy", cuopt::error_type_t::RuntimeError));
+  }
+  auto op_problem = data_model_view_to_optimization_problem(handle_ptr, view);
   return solve_mip(op_problem, settings);
 }
 
@@ -311,6 +576,11 @@ mip_solution_t<i_t, f_t> solve_mip(
   template mip_solution_t<int, F_TYPE> solve_mip(                           \
     raft::handle_t const* handle_ptr,                                       \
     const cuopt::mps_parser::mps_data_model_t<int, F_TYPE>& mps_data_model, \
+    mip_solver_settings_t<int, F_TYPE> const& settings);                    \
+                                                                            \
+  template mip_solution_t<int, F_TYPE> solve_mip(                           \
+    raft::handle_t const* handle_ptr,                                       \
+    const data_model_view_t<int, F_TYPE>& view,                             \
     mip_solver_settings_t<int, F_TYPE> const& settings);
 
 #if MIP_INSTANTIATE_FLOAT
diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu
index 2ce6d5700..fd8707732 100644
--- a/cpp/src/mip/solver_solution.cu
+++ b/cpp/src/mip/solver_solution.cu
@@ -1,6 +1,6 @@
 /* clang-format off */
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  */
 /* clang-format on */
@@ -28,7 +28,8 @@ mip_solution_t<i_t, f_t>::mip_solution_t(rmm::device_uvector<f_t> solution,
                                          f_t max_variable_bound_violation,
                                          solver_stats_t<i_t, f_t> stats,
                                          std::vector<rmm::device_uvector<f_t>> solution_pool)
-  : solution_(std::move(solution)),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(std::move(solution))),
+    is_device_memory_(true),
     var_names_(std::move(var_names)),
     objective_(objective),
     mip_gap_(mip_gap),
@@ -46,7 +47,8 @@ template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_status,
                                          solver_stats_t<i_t, f_t> stats,
                                          rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
     objective_(0),
     mip_gap_(0),
     termination_status_(termination_status),
@@ -61,7 +63,65 @@ mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_st
 template <typename i_t, typename f_t>
 mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status,
                                          rmm::cuda_stream_view stream_view)
-  : solution_(0, stream_view),
+  : solution_(std::make_unique<rmm::device_uvector<f_t>>(0, stream_view)),
+    is_device_memory_(true),
+    objective_(0),
+    mip_gap_(0),
+    termination_status_(mip_termination_status_t::NoTermination),
+    max_constraint_violation_(0),
+    max_int_violation_(0),
+    max_variable_bound_violation_(0),
+    error_status_(error_status)
+{
+}
+
+// CPU-only constructor for remote solve with solution data
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(std::vector<f_t> solution,
+                                         std::vector<std::string> var_names,
+                                         f_t objective,
+                                         f_t mip_gap,
+                                         mip_termination_status_t termination_status,
+                                         f_t max_constraint_violation,
+                                         f_t max_int_violation,
+                                         f_t max_variable_bound_violation,
+                                         solver_stats_t<i_t, f_t> stats)
+  : solution_host_(std::make_unique<std::vector<f_t>>(std::move(solution))),
+    is_device_memory_(false),
+    var_names_(std::move(var_names)),
+    objective_(objective),
+    mip_gap_(mip_gap),
+    termination_status_(termination_status),
+    max_constraint_violation_(max_constraint_violation),
+    max_int_violation_(max_int_violation),
+    max_variable_bound_violation_(max_variable_bound_violation),
+    stats_(stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(mip_termination_status_t termination_status,
+                                         solver_stats_t<i_t, f_t> stats)
+  : solution_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
+    objective_(0),
+    mip_gap_(0),
+    termination_status_(termination_status),
+    max_constraint_violation_(0),
+    max_int_violation_(0),
+    max_variable_bound_violation_(0),
+    stats_(stats),
+    error_status_(cuopt::logic_error("", cuopt::error_type_t::Success))
+{
+}
+
+// CPU-only constructor for remote solve error cases
+template <typename i_t, typename f_t>
+mip_solution_t<i_t, f_t>::mip_solution_t(const cuopt::logic_error& error_status)
+  : solution_host_(std::make_unique<std::vector<f_t>>()),
+    is_device_memory_(false),
     objective_(0),
     mip_gap_(0),
     termination_status_(mip_termination_status_t::NoTermination),
@@ -78,16 +138,34 @@ const cuopt::logic_error& mip_solution_t<i_t, f_t>::get_error_status() const
   return error_status_;
 }
 
+template <typename i_t, typename f_t>
+bool mip_solution_t<i_t, f_t>::is_device_memory() const
+{
+  return is_device_memory_;
+}
+
 template <typename i_t, typename f_t>
 const rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution() const
 {
-  return solution_;
+  return *solution_;
 }
 
 template <typename i_t, typename f_t>
 rmm::device_uvector<f_t>& mip_solution_t<i_t, f_t>::get_solution()
 {
-  return solution_;
+  return *solution_;
+}
+
+template <typename i_t, typename f_t>
+std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution_host()
+{
+  return *solution_host_;
+}
+
+template <typename i_t, typename f_t>
+const std::vector<f_t>& mip_solution_t<i_t, f_t>::get_solution_host() const
+{
+  return *solution_host_;
 }
 
 template <typename i_t, typename f_t>
@@ -211,9 +289,16 @@ void mip_solution_t<i_t, f_t>::write_to_sol_file(std::string_view filename,
   double objective_value = get_objective_value();
   auto& var_names        = get_variable_names();
   std::vector<f_t> solution;
-  solution.resize(solution_.size());
-  raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value());
-  RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+
+  if (is_device_memory_) {
+    // Copy from GPU to CPU
+    solution.resize(solution_->size());
+    raft::copy(solution.data(), solution_->data(), solution_->size(), stream_view.value());
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  } else {
+    // Already on CPU
+    solution = *solution_host_;
+  }
 
   solution_writer_t::write_solution_to_sol_file(
     std::string(filename), status, objective_value, var_names, solution);
@@ -233,6 +318,121 @@ void mip_solution_t<i_t, f_t>::log_summary() const
   CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time());
 }
 
+//============================================================================
+// Setters for remote solve deserialization
+//============================================================================
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_solution_host(std::vector<f_t> solution)
+{
+  solution_host_    = std::make_unique<std::vector<f_t>>(std::move(solution));
+  is_device_memory_ = false;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_objective(f_t value)
+{
+  objective_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_mip_gap(f_t value)
+{
+  mip_gap_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_solution_bound(f_t value)
+{
+  stats_.solution_bound = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_total_solve_time(double value)
+{
+  stats_.total_solve_time = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_presolve_time(double value)
+{
+  stats_.presolve_time = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_constraint_violation(f_t value)
+{
+  max_constraint_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_int_violation(f_t value)
+{
+  max_int_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_max_variable_bound_violation(f_t value)
+{
+  max_variable_bound_violation_ = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_nodes(i_t value)
+{
+  stats_.num_nodes = value;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::set_simplex_iterations(i_t value)
+{
+  stats_.num_simplex_iterations = value;
+}
+
+template <typename i_t, typename f_t>
+std::string mip_solution_t<i_t, f_t>::get_error_string() const
+{
+  return error_status_.what();
+}
+
+template <typename i_t, typename f_t>
+i_t mip_solution_t<i_t, f_t>::get_nodes() const
+{
+  return stats_.num_nodes;
+}
+
+template <typename i_t, typename f_t>
+i_t mip_solution_t<i_t, f_t>::get_simplex_iterations() const
+{
+  return stats_.num_simplex_iterations;
+}
+
+template <typename i_t, typename f_t>
+void mip_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (!is_device_memory_) {
+    // Already on CPU, nothing to do
+    return;
+  }
+
+  // Initialize host storage if needed
+  if (!solution_host_) { solution_host_ = std::make_unique<std::vector<f_t>>(); }
+
+  // Copy solution
+  if (solution_ && solution_->size() > 0) {
+    solution_host_->resize(solution_->size());
+    raft::copy(solution_host_->data(), solution_->data(), solution_->size(), stream_view.value());
+
+    // Synchronize to ensure copy is complete
+    RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view.value()));
+  }
+
+  // Clear GPU storage to free memory
+  solution_.reset();
+
+  is_device_memory_ = false;
+}
+
 #if MIP_INSTANTIATE_FLOAT
 template class mip_solution_t<int, float>;
 #endif
diff --git a/cpp/tests/linear_programming/CMakeLists.txt b/cpp/tests/linear_programming/CMakeLists.txt
index c091751f9..8a8810b63 100644
--- a/cpp/tests/linear_programming/CMakeLists.txt
+++ b/cpp/tests/linear_programming/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
diff --git a/dependencies.yaml b/dependencies.yaml
index 7dc6b9490..300e41fd8 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -302,6 +302,8 @@ dependencies:
           - tbb-devel
           - zlib
           - bzip2
+          - grpc-cpp
+          - libprotobuf
   test_cpp:
     common:
       - output_types: [conda]
diff --git a/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md
new file mode 100644
index 000000000..006ab9422
--- /dev/null
+++ b/docs/developer/SERIALIZATION_PLUGIN_GUIDE.md
@@ -0,0 +1,595 @@
+# cuOpt Remote Solve Serialization Plugin Guide
+
+This guide explains how to develop custom serialization plugins for cuOpt's remote solve feature. Plugins allow you to replace the default Protocol Buffers serialization with alternative formats like MsgPack, JSON, FlatBuffers, or custom binary protocols.
+
+## Overview
+
+The remote solve feature uses a pluggable serialization interface (`remote_serializer_t`) that handles:
+- Serializing optimization problems (LP/MIP) for network transmission
+- Deserializing solver settings
+- Serializing solutions back to the client
+- Message type identification (LP vs MIP)
+
+```
+┌─────────────┐                              ┌─────────────┐
+│   Client    │                              │   Server    │
+│             │   serialize_lp_request()     │             │
+│  Problem ───┼──────────────────────────────┼──► Problem  │
+│             │                              │             │
+│             │   serialize_lp_solution()    │             │
+│  Solution ◄─┼──────────────────────────────┼─── Solution │
+└─────────────┘                              └─────────────┘
+         ▲                                          ▲
+         │                                          │
+         └────────── Same Serializer ───────────────┘
+```
+
+**Important**: Both client and server must use the same serializer for communication to work.
+
+## The Serializer Interface
+
+Your plugin must implement the `remote_serializer_t<i_t, f_t>` interface defined in:
+`cpp/include/cuopt/linear_programming/utilities/remote_serialization.hpp`
+
+### Required Methods
+
+```cpp
+template <typename i_t, typename f_t>
+class remote_serializer_t {
+public:
+  virtual ~remote_serializer_t() = default;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // CLIENT-SIDE: Serialize requests, deserialize solutions
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Serialize an LP problem and settings into bytes for transmission
+  virtual std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& problem,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Serialize a MIP problem and settings into bytes
+  virtual std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& problem,
+    const mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Deserialize an LP solution from bytes received from server
+  virtual optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  // Deserialize a MIP solution from bytes
+  virtual mip_solution_t<i_t, f_t> deserialize_mip_solution(
+    const std::vector<uint8_t>& data) = 0;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // SERVER-SIDE: Deserialize requests, serialize solutions
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Check if the received data is a MIP request (vs LP)
+  virtual bool is_mip_request(const std::vector<uint8_t>& data) = 0;
+
+  // Deserialize LP request into problem data and settings
+  virtual bool deserialize_lp_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& problem_data,
+    pdlp_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Deserialize MIP request into problem data and settings
+  virtual bool deserialize_mip_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& problem_data,
+    mip_solver_settings_t<i_t, f_t>& settings) = 0;
+
+  // Serialize LP solution for transmission back to client
+  virtual std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) = 0;
+
+  // Serialize MIP solution
+  virtual std::vector<uint8_t> serialize_mip_solution(
+    const mip_solution_t<i_t, f_t>& solution) = 0;
+
+  // ═══════════════════════════════════════════════════════════════════
+  // METADATA
+  // ═══════════════════════════════════════════════════════════════════
+
+  // Human-readable format name (e.g., "msgpack", "json", "flatbuffers")
+  virtual std::string format_name() const = 0;
+
+  // Protocol version for compatibility checking
+  virtual uint32_t protocol_version() const = 0;
+};
+```
+
+### Factory Function
+
+Your plugin must export a factory function that creates the serializer:
+
+```cpp
+extern "C" {
+  // For int32_t indices, double floats (most common)
+  std::unique_ptr<remote_serializer_t<int32_t, double>>
+    create_cuopt_serializer_i32_f64();
+
+  // Additional type combinations if needed
+  std::unique_ptr<remote_serializer_t<int32_t, float>>
+    create_cuopt_serializer_i32_f32();
+}
+```
+
+## Step-by-Step Implementation
+
+### Step 1: Create the Plugin Source File
+
+Create `cpp/src/linear_programming/utilities/serializers/my_serializer.cpp`:
+
+```cpp
+#include <cuopt/linear_programming/utilities/remote_serialization.hpp>
+#include <vector>
+#include <cstring>
+
+namespace cuopt::linear_programming {
+
+// Message type identifiers (first byte of each message)
+constexpr uint8_t MSG_LP_REQUEST   = 1;
+constexpr uint8_t MSG_MIP_REQUEST  = 2;
+constexpr uint8_t MSG_LP_SOLUTION  = 3;
+constexpr uint8_t MSG_MIP_SOLUTION = 4;
+
+template <typename i_t, typename f_t>
+class my_serializer_t : public remote_serializer_t<i_t, f_t> {
+public:
+  my_serializer_t() = default;
+  ~my_serializer_t() override = default;
+
+  std::string format_name() const override { return "my_format"; }
+  uint32_t protocol_version() const override { return 1; }
+
+  //========================================================================
+  // CLIENT-SIDE METHODS
+  //========================================================================
+
+  std::vector<uint8_t> serialize_lp_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    std::vector<uint8_t> buffer;
+
+    // Start with message type
+    buffer.push_back(MSG_LP_REQUEST);
+
+    // Serialize problem dimensions
+    i_t n_rows = view.get_constraint_matrix_offsets().size() > 0
+                   ? view.get_constraint_matrix_offsets().size() - 1 : 0;
+    i_t n_cols = view.get_objective_coefficients().size();
+    i_t nnz = view.get_constraint_matrix_values().size();
+
+    // ... serialize all problem data ...
+    // See msgpack_serializer.cpp for complete example
+
+    return buffer;
+  }
+
+  std::vector<uint8_t> serialize_mip_request(
+    const mps_parser::data_model_view_t<i_t, f_t>& view,
+    const mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_MIP_REQUEST);
+    // ... similar to LP but with MIP settings ...
+    return buffer;
+  }
+
+  optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    // Parse the solution data
+    // Create and return solution object
+
+    // On error, return error solution:
+    // return optimization_problem_solution_t<i_t, f_t>(
+    //   cuopt::logic_error("Parse error", cuopt::error_type_t::RuntimeError));
+  }
+
+  mip_solution_t<i_t, f_t> deserialize_mip_solution(
+    const std::vector<uint8_t>& data) override
+  {
+    // Similar to LP solution
+  }
+
+  //========================================================================
+  // SERVER-SIDE METHODS
+  //========================================================================
+
+  bool is_mip_request(const std::vector<uint8_t>& data) override
+  {
+    if (data.empty()) return false;
+    return data[0] == MSG_MIP_REQUEST;
+  }
+
+  bool deserialize_lp_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+    pdlp_solver_settings_t<i_t, f_t>& settings) override
+  {
+    try {
+      // Parse message type
+      if (data.empty() || data[0] != MSG_LP_REQUEST) return false;
+
+      // Parse problem data and populate mps_data:
+      // mps_data.set_problem_name("...");
+      // mps_data.set_objective_coefficients(coeffs.data(), coeffs.size());
+      // mps_data.set_csr_constraint_matrix(...);
+      // mps_data.set_variable_bounds(...);
+      // mps_data.set_constraint_bounds(...);
+
+      // Parse settings:
+      // settings.time_limit = ...;
+      // settings.iteration_limit = ...;
+
+      return true;
+    } catch (...) {
+      return false;
+    }
+  }
+
+  bool deserialize_mip_request(
+    const std::vector<uint8_t>& data,
+    mps_parser::mps_data_model_t<i_t, f_t>& mps_data,
+    mip_solver_settings_t<i_t, f_t>& settings) override
+  {
+    // Similar to LP, also set variable types for integers/binaries:
+    // mps_data.set_variable_types(var_types);
+    return true;
+  }
+
+  std::vector<uint8_t> serialize_lp_solution(
+    const optimization_problem_solution_t<i_t, f_t>& solution) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_LP_SOLUTION);
+
+    // NOTE: Server calls solution.to_host() before serialization,
+    // so solution data is always in CPU memory. Use:
+    //   solution.get_primal_solution_host()
+    //   solution.get_dual_solution_host()
+    //   solution.get_reduced_cost_host()
+
+    // Serialize termination status, objective, solution vectors, etc.
+
+    return buffer;
+  }
+
+  std::vector<uint8_t> serialize_mip_solution(
+    const mip_solution_t<i_t, f_t>& solution) override
+  {
+    std::vector<uint8_t> buffer;
+    buffer.push_back(MSG_MIP_SOLUTION);
+
+    // Use solution.get_solution_host() for the solution vector
+
+    return buffer;
+  }
+};
+
+//==========================================================================
+// FACTORY FUNCTIONS - Must be exported with C linkage
+//==========================================================================
+
+template <typename i_t, typename f_t>
+std::unique_ptr<remote_serializer_t<i_t, f_t>> create_serializer_impl()
+{
+  return std::make_unique<my_serializer_t<i_t, f_t>>();
+}
+
+}  // namespace cuopt::linear_programming
+
+// Export factory functions with C linkage for dlopen/dlsym
+extern "C" {
+
+std::unique_ptr<cuopt::linear_programming::remote_serializer_t<int32_t, double>>
+create_cuopt_serializer_i32_f64()
+{
+  return cuopt::linear_programming::create_serializer_impl<int32_t, double>();
+}
+
+std::unique_ptr<cuopt::linear_programming::remote_serializer_t<int32_t, float>>
+create_cuopt_serializer_i32_f32()
+{
+  return cuopt::linear_programming::create_serializer_impl<int32_t, float>();
+}
+
+// Add more type combinations as needed
+
+}
+```
+
+### Step 2: Create CMakeLists.txt for the Plugin
+
+Create `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt`:
+
+```cmake
+# Build the custom serializer as a shared library plugin
+add_library(cuopt_my_serializer SHARED my_serializer.cpp)
+
+target_link_libraries(cuopt_my_serializer
+  PRIVATE
+    cuopt  # Link against cuOpt for solution types
+)
+
+target_include_directories(cuopt_my_serializer
+  PRIVATE
+    ${CMAKE_SOURCE_DIR}/include
+)
+
+# Set RPATH so the plugin can find libcuopt.so
+set_target_properties(cuopt_my_serializer PROPERTIES
+  INSTALL_RPATH "$ORIGIN"
+)
+
+install(TARGETS cuopt_my_serializer
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+)
+```
+
+### Step 3: Add to Parent CMakeLists.txt
+
+In `cpp/CMakeLists.txt`, add:
+
+```cmake
+add_subdirectory(src/linear_programming/utilities/serializers)
+```
+
+### Step 4: Build the Plugin
+
+```bash
+# Build everything including the plugin
+./build.sh libcuopt cuopt_grpc_server
+
+# Or just the plugin (after initial build)
+cd cpp/build
+ninja cuopt_my_serializer
+```
+
+## Using the Plugin
+
+### Environment Variable
+
+Set `CUOPT_SERIALIZER_LIB` to point to your plugin:
+
+```bash
+export CUOPT_SERIALIZER_LIB=/path/to/libcuopt_my_serializer.so
+```
+
+### Running Server with Custom Serializer
+
+```bash
+# Set the serializer library
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so
+
+# Start the server
+cuopt_grpc_server -p 8765
+```
+
+Server output will show:
+```
+[remote_solve] Loading custom serializer from: /path/to/libcuopt_my_serializer.so
+[remote_solve] Using custom serializer: my_format
+```
+
+### Running Client with Custom Serializer
+
+```bash
+# Same serializer must be used on client side
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_my_serializer.so
+export CUOPT_REMOTE_HOST=localhost
+export CUOPT_REMOTE_PORT=8765
+
+# Run cuopt_cli
+cuopt_cli problem.mps
+
+# Or Python
+python my_solver_script.py
+```
+
+### Complete Example Session
+
+```bash
+# Terminal 1: Start server with msgpack serializer
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so
+cuopt_grpc_server -p 8765
+
+# Terminal 2: Run client with same serializer
+export CUOPT_SERIALIZER_LIB=$CONDA_PREFIX/lib/libcuopt_msgpack_serializer.so
+export CUOPT_REMOTE_HOST=localhost
+export CUOPT_REMOTE_PORT=8765
+cuopt_cli /path/to/problem.mps
+```
+
+## Data Model Reference
+
+### Problem Data (`data_model_view_t`)
+
+Key getters for serializing problem data:
+
+```cpp
+// Problem metadata
+view.get_problem_name()           // std::string
+view.get_objective_name()         // std::string
+view.get_sense()                  // bool (true = maximize)
+view.get_objective_scaling_factor() // f_t
+view.get_objective_offset()       // f_t
+
+// Constraint matrix (CSR format)
+view.get_constraint_matrix_values()   // span<f_t>
+view.get_constraint_matrix_indices()  // span<i_t>
+view.get_constraint_matrix_offsets()  // span<i_t>
+
+// Objective and bounds
+view.get_objective_coefficients()     // span<f_t>
+view.get_variable_lower_bounds()      // span<f_t>
+view.get_variable_upper_bounds()      // span<f_t>
+view.get_constraint_lower_bounds()    // span<f_t>
+view.get_constraint_upper_bounds()    // span<f_t>
+
+// For MIP problems
+view.get_variable_types()             // span<char> ('C', 'I', 'B')
+
+// Names (optional)
+view.get_variable_names()             // vector<string>
+view.get_row_names()                  // vector<string>
+```
+
+### Problem Data (`mps_data_model_t`) - Server Side
+
+Key setters for deserializing:
+
+```cpp
+mps_data.set_problem_name(name);
+mps_data.set_objective_name(name);
+mps_data.set_maximize(bool);
+mps_data.set_objective_scaling_factor(factor);
+mps_data.set_objective_offset(offset);
+
+mps_data.set_objective_coefficients(ptr, size);
+mps_data.set_csr_constraint_matrix(values, nvals, indices, nidx, offsets, noff);
+mps_data.set_variable_bounds(lower, upper, size);
+mps_data.set_constraint_bounds(lower, upper, size);
+
+// For MIP
+mps_data.set_variable_types(std::vector<char>);
+```
+
+### LP Solution (`optimization_problem_solution_t`)
+
+```cpp
+// Getters (for serialization)
+solution.get_termination_status()     // pdlp_termination_status_t
+solution.get_objective_value()        // f_t
+solution.get_primal_solution_host()   // vector<f_t>&
+solution.get_dual_solution_host()     // vector<f_t>&
+solution.get_reduced_cost_host()      // vector<f_t>&
+solution.get_solve_time()             // double
+solution.get_l2_primal_residual()     // f_t
+solution.get_l2_dual_residual()       // f_t
+solution.get_gap()                    // f_t
+solution.get_nb_iterations()          // i_t
+
+// Setters (for deserialization on client)
+solution.set_termination_status(status);
+solution.set_objective_value(value);
+solution.set_primal_solution_host(vector);
+solution.set_dual_solution_host(vector);
+solution.set_reduced_cost_host(vector);
+solution.set_solve_time(time);
+// ... etc
+```
+
+### MIP Solution (`mip_solution_t`)
+
+```cpp
+// Getters
+solution.get_termination_status()     // mip_termination_status_t
+solution.get_objective_value()        // f_t
+solution.get_solution_host()          // vector<f_t>&
+solution.get_total_solve_time()       // double
+solution.get_mip_gap()                // f_t
+
+// Setters
+solution.set_solution_host(vector);
+solution.set_objective_value(value);
+solution.set_mip_gap(gap);
+// ... etc
+```
+
+## Tips and Best Practices
+
+### 1. Message Type Identification
+
+Always include a message type identifier as the first byte(s):
+
+```cpp
+constexpr uint8_t MSG_LP_REQUEST   = 1;
+constexpr uint8_t MSG_MIP_REQUEST  = 2;
+constexpr uint8_t MSG_LP_SOLUTION  = 3;
+constexpr uint8_t MSG_MIP_SOLUTION = 4;
+```
+
+### 2. Version Compatibility
+
+Include a protocol version in your messages for future compatibility:
+
+```cpp
+// In serialize:
+buffer.push_back(MSG_LP_REQUEST);
+buffer.push_back(PROTOCOL_VERSION);
+
+// In deserialize:
+uint8_t version = data[1];
+if (version != PROTOCOL_VERSION) {
+  // Handle version mismatch
+}
+```
+
+### 3. Error Handling
+
+Return proper error solutions on parse failures:
+
+```cpp
+optimization_problem_solution_t<i_t, f_t> deserialize_lp_solution(...) {
+  try {
+    // Parse...
+  } catch (const std::exception& e) {
+    return optimization_problem_solution_t<i_t, f_t>(
+      cuopt::logic_error(
+        std::string("Deserialize error: ") + e.what(),
+        cuopt::error_type_t::RuntimeError));
+  }
+}
+```
+
+### 4. Solution Memory
+
+The server calls `solution.to_host()` before serialization, so:
+- Always use `get_*_host()` methods for solution data
+- No need to handle GPU memory in your serializer
+
+### 5. Testing
+
+Test your serializer with both LP and MIP problems:
+
+```bash
+# LP test
+cuopt_cli /path/to/lp_problem.mps
+
+# MIP test (use a problem with integer variables)
+cuopt_cli /path/to/mip_problem.mps
+```
+
+## Reference Implementation
+
+See the MsgPack serializer for a complete working example:
+- `cpp/src/linear_programming/utilities/serializers/msgpack_serializer.cpp`
+- `cpp/src/linear_programming/utilities/serializers/CMakeLists.txt`
+
+## Troubleshooting
+
+### "Failed to load serializer library"
+
+- Check the path in `CUOPT_SERIALIZER_LIB` is correct
+- Ensure the library was built: `ls $CONDA_PREFIX/lib/libcuopt_*serializer.so`
+
+### "Factory function not found"
+
+- Ensure factory functions are exported with `extern "C"`
+- Check function names match: `create_cuopt_serializer_i32_f64`, etc.
+
+### "Read failed" / Malformed messages
+
+- Ensure client and server use the **same** serializer
+- Check message framing is consistent
+- Verify all required fields are serialized
+
+### Symbol errors at runtime
+
+- Rebuild and reinstall with `./build.sh libcuopt cuopt_grpc_server`
+- Ensure plugin links against `cuopt`
diff --git a/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md
new file mode 100644
index 000000000..1180e323b
--- /dev/null
+++ b/docs/developer/SOLUTION_MEMORY_ARCHITECTURE.md
@@ -0,0 +1,236 @@
+# cuOpt Solution Memory Architecture
+
+This document describes how cuOpt manages solution data memory for both local GPU-based solving and remote CPU-only solving.
+
+## Overview
+
+cuOpt solutions can exist in either GPU memory (for local high-performance workflows) or CPU memory (for remote solve and CPU-only clients). The architecture supports both use cases efficiently.
+
+## Solution Classes
+
+### LP Solution: `optimization_problem_solution_t<i_t, f_t>`
+
+Located in: `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp`
+
+**Key Data Members:**
+```cpp
+// GPU memory (primary storage for local solve)
+rmm::device_uvector<f_t> primal_solution_;
+rmm::device_uvector<f_t> dual_solution_;
+rmm::device_uvector<f_t> reduced_cost_;
+
+// CPU memory (used for remote solve or explicit host access)
+std::vector<f_t> primal_solution_host_;
+std::vector<f_t> dual_solution_host_;
+std::vector<f_t> reduced_cost_host_;
+
+// Scalars (always on host)
+f_t objective_value_;
+f_t dual_objective_value_;
+f_t l2_primal_residual_;
+f_t l2_dual_residual_;
+f_t gap_;
+i_t nb_iterations_;
+f_t solve_time_;
+pdlp_termination_status_t termination_status_;
+error_type_t error_status_;
+```
+
+### MIP Solution: `mip_solution_t<i_t, f_t>`
+
+Located in: `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp`
+
+**Key Data Members:**
+```cpp
+// GPU memory (primary storage for local solve)
+rmm::device_uvector<f_t> solution_;
+std::vector<rmm::device_uvector<f_t>> solution_pool_;
+
+// CPU memory (used for remote solve)
+std::vector<f_t> solution_host_;
+std::vector<std::vector<f_t>> solution_pool_host_;
+
+// Scalars (always on host)
+f_t objective_;
+f_t mip_gap_;
+f_t max_constraint_violation_;
+f_t max_int_violation_;
+f_t max_variable_bound_violation_;
+mip_termination_status_t termination_status_;
+error_type_t error_status_;
+```
+
+## Memory Management Strategy
+
+### Local Solve (GPU)
+
+When solving locally on a GPU:
+
+1. **Solver computes** → Results in GPU memory (`device_uvector`)
+2. **Solution returned** → Contains GPU buffers
+3. **User accesses** → Can work directly with GPU data or copy to host as needed
+
+```
+┌─────────────┐     ┌─────────────┐     ┌─────────────┐
+│   Solver    │ ──► │  Solution   │ ──► │    User     │
+│   (GPU)     │     │ (GPU mem)   │     │ (GPU/CPU)   │
+└─────────────┘     └─────────────┘     └─────────────┘
+```
+
+### Remote Solve (CPU-only client)
+
+When solving remotely from a CPU-only machine:
+
+1. **Client sends** → Problem data serialized and sent to server
+2. **Server solves** → Results computed on GPU
+3. **`to_host()` called** → GPU data copied to CPU memory
+4. **Solution serialized** → CPU data sent back to client
+5. **Client receives** → Solution with CPU memory only
+
+```
+┌──────────┐     ┌──────────────────────────────────────────┐     ┌──────────┐
+│  Client  │ ──► │              SERVER                      │ ◄── │  Client  │
+│ (no GPU) │     │ GPU solve → to_host() → serialize        │     │(solution)│
+└──────────┘     └──────────────────────────────────────────┘     └──────────┘
+```
+
+## The `to_host()` Method
+
+Both solution classes provide a `to_host()` method that copies GPU data to CPU:
+
+```cpp
+// LP Solution
+void optimization_problem_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (primal_solution_.size() > 0) {
+    primal_solution_host_.resize(primal_solution_.size());
+    raft::copy(primal_solution_host_.data(), primal_solution_.data(),
+               primal_solution_.size(), stream_view);
+  }
+  // ... similar for dual_solution_, reduced_cost_
+  stream_view.synchronize();
+}
+
+// MIP Solution
+void mip_solution_t<i_t, f_t>::to_host(rmm::cuda_stream_view stream_view)
+{
+  if (solution_.size() > 0) {
+    solution_host_.resize(solution_.size());
+    raft::copy(solution_host_.data(), solution_.data(),
+               solution_.size(), stream_view);
+  }
+  // ... similar for solution_pool_
+  stream_view.synchronize();
+}
+```
+
+### When to Call `to_host()`
+
+- **Server-side remote solve**: Called before serializing solution for network transmission
+- **Client accessing host data**: If user needs `std::vector` access to solution data
+- **Writing to files**: When saving solutions to disk
+
+### Performance Considerations
+
+The `to_host()` copy adds overhead, but:
+- Only called when CPU access is actually needed
+- GPU computation dominates solve time for non-trivial problems
+- One-time cost after solve completes
+
+**Typical overhead**: Negligible for problems with thousands of variables. For a 10,000-variable problem, copying ~80KB takes <1ms.
+
+## Accessor Methods
+
+### GPU Accessors (for local solve)
+
+```cpp
+// LP
+const rmm::device_uvector<f_t>& get_primal_solution() const;
+const rmm::device_uvector<f_t>& get_dual_solution() const;
+const rmm::device_uvector<f_t>& get_reduced_cost() const;
+
+// MIP
+const rmm::device_uvector<f_t>& get_solution() const;
+```
+
+### CPU Accessors (for remote solve)
+
+```cpp
+// LP
+const std::vector<f_t>& get_primal_solution_host() const;
+const std::vector<f_t>& get_dual_solution_host() const;
+const std::vector<f_t>& get_reduced_cost_host() const;
+
+// MIP
+const std::vector<f_t>& get_solution_host() const;
+```
+
+### Checking Memory Location
+
+```cpp
+// Returns true if solution data is on GPU
+bool is_device_memory() const;
+```
+
+## Usage in Remote Solve Server
+
+The server calls `to_host()` before serialization:
+
+```cpp
+// In cuopt_grpc_server.cpp
+if (is_mip) {
+  mip_solution_t<i_t, f_t> solution = solve_mip(...);
+  solution.to_host(stream);  // Copy GPU → CPU
+  result_data = serializer->serialize_mip_solution(solution);
+} else {
+  optimization_problem_solution_t<i_t, f_t> solution = solve_lp(...);
+  solution.to_host(stream);  // Copy GPU → CPU
+  result_data = serializer->serialize_lp_solution(solution);
+}
+```
+
+## Design Rationale
+
+### Why Not Pure CPU Memory?
+
+An earlier design considered using only `std::vector` for solutions. We chose the hybrid approach because:
+
+1. **GPU performance**: Local solves benefit from keeping data on GPU
+2. **Minimize changes**: Existing GPU-based code continues to work unchanged
+3. **Flexibility**: Users can choose GPU or CPU access as needed
+
+### Why Not Pure GPU Memory?
+
+Pure GPU memory would fail for:
+
+1. **Remote solve**: CPU-only clients need CPU data
+2. **Serialization**: Network transmission requires CPU memory
+3. **File I/O**: Writing to disk typically uses CPU
+
+### Hybrid Approach Benefits
+
+- ✅ Local GPU workflows remain efficient
+- ✅ Remote solve works with CPU-only clients
+- ✅ Minimal code changes to existing solvers
+- ✅ On-demand copy (only when needed)
+- ✅ Clear separation of concerns
+
+## Files Involved
+
+| File | Description |
+|------|-------------|
+| `cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp` | LP solution class declaration |
+| `cpp/src/linear_programming/solver_solution.cu` | LP solution implementation + `to_host()` |
+| `cpp/include/cuopt/linear_programming/mip/solver_solution.hpp` | MIP solution class declaration |
+| `cpp/src/mip/solver_solution.cu` | MIP solution implementation + `to_host()` |
+| `cpp/cuopt_grpc_server.cpp` | Server calls `to_host()` before serialization |
+| `cpp/src/linear_programming/utilities/protobuf_serializer.cu` | Uses host accessors for serialization |
+
+## Summary
+
+The cuOpt solution memory architecture uses a **hybrid GPU/CPU approach**:
+
+1. **Primary storage**: GPU (`device_uvector`) for local solve performance
+2. **Secondary storage**: CPU (`std::vector`) for remote solve and host access
+3. **On-demand copying**: `to_host()` method copies GPU → CPU when needed
+4. **Transparent to users**: Local users get GPU data, remote users get CPU data automatically
diff --git a/python/cuopt/CMakeLists.txt b/python/cuopt/CMakeLists.txt
index 905662493..7dd8415da 100644
--- a/python/cuopt/CMakeLists.txt
+++ b/python/cuopt/CMakeLists.txt
@@ -1,5 +1,5 @@
 # cmake-format: off
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 # cmake-format: on
 
@@ -18,7 +18,6 @@ project(
             # that is fixed we need to keep C.
             C CXX CUDA)
 
-
 find_package(cuopt "${RAPIDS_VERSION}")
 find_package(mps_parser "${RAPIDS_VERSION}")
 
diff --git a/python/cuopt/cuopt/__init__.py b/python/cuopt/cuopt/__init__.py
index c6e9150c8..7ea141221 100644
--- a/python/cuopt/cuopt/__init__.py
+++ b/python/cuopt/cuopt/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 try:
@@ -9,5 +9,23 @@
     libcuopt.load_library()
     del libcuopt
 
-from cuopt import linear_programming, routing
 from cuopt._version import __git_commit__, __version__, __version_major_minor__
+
+# Lazy imports for linear_programming and routing modules
+# This allows cuopt to be imported on CPU-only hosts when remote solve is configured
+_submodules = ["linear_programming", "routing"]
+
+
+def __getattr__(name):
+    """Lazy import submodules to support CPU-only hosts with remote solve."""
+    if name in _submodules:
+        import importlib
+        return importlib.import_module(f"cuopt.{name}")
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+
+
+def __dir__():
+    return __all__ + _submodules
+
+
+__all__ = ["__git_commit__", "__version__", "__version_major_minor__"]
diff --git a/python/cuopt/cuopt/linear_programming/__init__.py b/python/cuopt/cuopt/linear_programming/__init__.py
index d267c2171..233161865 100644
--- a/python/cuopt/cuopt/linear_programming/__init__.py
+++ b/python/cuopt/cuopt/linear_programming/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from cuopt.linear_programming import internals
diff --git a/python/cuopt/cuopt/linear_programming/internals/internals.pyx b/python/cuopt/cuopt/linear_programming/internals/internals.pyx
index 0e4342fe1..706f42048 100644
--- a/python/cuopt/cuopt/linear_programming/internals/internals.pyx
+++ b/python/cuopt/cuopt/linear_programming/internals/internals.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 # cython: profile=False
@@ -10,6 +10,7 @@
 from libc.stdint cimport uintptr_t
 
 import numpy as np
+import ctypes
 from numba.cuda.api import from_cuda_array_interface
 
 
@@ -49,18 +50,10 @@ cdef class PyCallback:
         return data
 
     def get_numpy_array(self, data, shape, typestr):
-        sizeofType = 4 if typestr == "float32" else 8
-        desc = {
-            'shape': (shape,),
-            'strides': None,
-            'typestr': typestr,
-            'data': (data, False),
-            'version': 3
-        }
-        data = desc['data'][0]
-        shape = desc['shape']
-
-        numpy_array = np.array([data], dtype=desc['typestr']).reshape(shape)
+        ctype = ctypes.c_float if typestr == "float32" else ctypes.c_double
+        buf_type = ctype * shape
+        buf = buf_type.from_address(data)
+        numpy_array = np.ctypeslib.as_array(buf)
         return numpy_array
 
 cdef class GetSolutionCallback(PyCallback):
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
index c140e3d0c..9e211e212 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd
+++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -120,9 +120,16 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_solution.hpp" namespace "
 
 cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace "cuopt::cython": # noqa
     cdef cppclass linear_programming_ret_t:
+        # GPU (device) storage
         unique_ptr[device_buffer] primal_solution_
         unique_ptr[device_buffer] dual_solution_
         unique_ptr[device_buffer] reduced_cost_
+        # CPU (host) storage for remote solve
+        vector[double] primal_solution_host_
+        vector[double] dual_solution_host_
+        vector[double] reduced_cost_host_
+        # Flag indicating where solution data is stored
+        bool is_device_memory_
         # PDLP warm start data
         unique_ptr[device_buffer] current_primal_solution_
         unique_ptr[device_buffer] current_dual_solution_
@@ -155,7 +162,12 @@ cdef extern from "cuopt/linear_programming/utilities/cython_solve.hpp" namespace
         bool solved_by_pdlp_
 
     cdef cppclass mip_ret_t:
+        # GPU (device) storage
         unique_ptr[device_buffer] solution_
+        # CPU (host) storage for remote solve
+        vector[double] solution_host_
+        # Flag indicating where solution data is stored
+        bool is_device_memory_
         mip_termination_status_t termination_status_
         error_type_t error_status_
         string error_message_
diff --git a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
index 1991af0d6..5a02e1bfa 100644
--- a/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/solver/solver_wrapper.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # noqa
 # SPDX-License-Identifier: Apache-2.0
 
 
@@ -13,8 +13,6 @@ from datetime import date, datetime
 
 from dateutil.relativedelta import relativedelta
 
-from cuopt.utilities import type_cast
-
 from libc.stdint cimport uintptr_t
 from libc.stdlib cimport free, malloc
 from libc.string cimport memcpy, strcpy, strlen
@@ -42,25 +40,56 @@ from cuopt.linear_programming.solver.solver cimport (
 )
 
 import math
+import os
 import sys
 import warnings
 from enum import IntEnum
 
-import cupy as cp
 import numpy as np
-from numba import cuda
-
-import cudf
 
 from cuopt.linear_programming.solver_settings.solver_settings import (
     PDLPSolverMode,
     SolverSettings,
 )
-from cuopt.utilities import InputValidationError, series_from_buf
+from cuopt.utilities import InputValidationError
 
 import pyarrow as pa
 
 
+def is_remote_solve_enabled():
+    """Check if remote solve is enabled via environment variables.
+
+    Remote solve is enabled when both CUOPT_REMOTE_HOST and CUOPT_REMOTE_PORT
+    environment variables are set and valid.
+
+    Returns
+    -------
+    bool
+        True if remote solve is enabled, False otherwise.
+    """
+    host = os.environ.get("CUOPT_REMOTE_HOST", "")
+    port = os.environ.get("CUOPT_REMOTE_PORT", "")
+
+    if host and port:
+        try:
+            int(port)  # Validate port is a valid integer
+            return True
+        except ValueError:
+            return False
+    return False
+
+
+def _get_cuda_imports():
+    """Lazily import CUDA-dependent modules.
+
+    Only call this when GPU operations are actually needed.
+    """
+    import cupy as cp
+    import cudf
+    from cuopt.utilities import series_from_buf
+    return cp, cudf, series_from_buf
+
+
 cdef extern from "cuopt/linear_programming/utilities/internals.hpp" namespace "cuopt::internals": # noqa
     cdef cppclass base_solution_callback_t
 
@@ -108,39 +137,52 @@ cdef char* c_get_string(string in_str):
 
 
 def get_data_ptr(array):
-    if isinstance(array, cudf.Series):
-        return array.__cuda_array_interface__['data'][0]
-    elif isinstance(array, np.ndarray):
+    """Get the data pointer from an array.
+
+    Works with both numpy arrays (CPU) and cudf Series (GPU).
+    """
+    if isinstance(array, np.ndarray):
         return array.__array_interface__['data'][0]
+    elif hasattr(array, '__cuda_array_interface__'):
+        # cudf.Series or other CUDA array
+        return array.__cuda_array_interface__['data'][0]
     else:
         raise Exception(
             "get_data_ptr must be called with cudf.Series or np.ndarray"
         )
 
 
-def type_cast(cudf_obj, np_type, name):
-    if isinstance(cudf_obj, cudf.Series):
-        cudf_type = cudf_obj.dtype
-    elif isinstance(cudf_obj, np.ndarray):
-        cudf_type = cudf_obj.dtype
-    elif isinstance(cudf_obj, cudf.DataFrame):
-        if all([np.issubdtype(dtype, np.number) for dtype in cudf_obj.dtypes]):  # noqa
-            cudf_type = cudf_obj.dtypes[0]
+def type_cast(obj, np_type, name):
+    """Cast array to the specified numpy type.
+
+    Works with both numpy arrays and cudf objects.
+    """
+    if isinstance(obj, np.ndarray):
+        obj_type = obj.dtype
+    elif hasattr(obj, 'dtype'):
+        obj_type = obj.dtype
+    elif hasattr(obj, 'dtypes'):
+        # DataFrame-like object
+        if all([np.issubdtype(dtype, np.number) for dtype in obj.dtypes]):  # noqa
+            obj_type = obj.dtypes[0]
         else:
             msg = "All columns in " + name + " should be numeric"
             raise Exception(msg)
+    else:
+        obj_type = type(obj)
+
     if ((np.issubdtype(np_type, np.floating) and
-         (not np.issubdtype(cudf_type, np.floating)))
+         (not np.issubdtype(obj_type, np.floating)))
        or (np.issubdtype(np_type, np.integer) and
-           (not np.issubdtype(cudf_type, np.integer)))
+           (not np.issubdtype(obj_type, np.integer)))
        or (np.issubdtype(np_type, np.bool_) and
-           (not np.issubdtype(cudf_type, np.bool_)))
+           (not np.issubdtype(obj_type, np.bool_)))
        or (np.issubdtype(np_type, np.int8) and
-           (not np.issubdtype(cudf_type, np.int8)))):
-        msg = "Casting " + name + " from " + str(cudf_type) + " to " + str(np.dtype(np_type))  # noqa
+           (not np.issubdtype(obj_type, np.int8)))):
+        msg = "Casting " + name + " from " + str(obj_type) + " to " + str(np.dtype(np_type))  # noqa
         warnings.warn(msg)
-    cudf_obj = cudf_obj.astype(np.dtype(np_type))
-    return cudf_obj
+    obj = obj.astype(np.dtype(np_type))
+    return obj
 
 
 cdef set_solver_setting(
@@ -291,6 +333,13 @@ cdef set_solver_setting(
             settings.get_pdlp_warm_start_data().iterations_since_last_restart # noqa
         )
 
+cdef _convert_device_buffer_to_numpy(device_buffer, dtype):
+    """Convert a DeviceBuffer to numpy array using lazy CUDA imports."""
+    _, _, series_from_buf = _get_cuda_imports()
+    series = series_from_buf(device_buffer, dtype)
+    return series.to_numpy()
+
+
 cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
                      DataModel data_model_obj,
                      is_batch=False):
@@ -300,9 +349,17 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
     sol_ret = move(sol_ret_ptr.get()[0])
 
     if sol_ret.problem_type == ProblemCategory.MIP or sol_ret.problem_type == ProblemCategory.IP: # noqa
-        solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.mip_ret.solution_)
-        )
+        # Check if data is on GPU or CPU
+        if sol_ret.mip_ret.is_device_memory_:
+            # GPU data - use DeviceBuffer with lazy imports
+            solution = DeviceBuffer.c_from_unique_ptr(
+                move(sol_ret.mip_ret.solution_)
+            )
+            solution = _convert_device_buffer_to_numpy(solution, pa.float64())
+        else:
+            # CPU data - convert vector directly to numpy
+            solution = np.array(sol_ret.mip_ret.solution_host_, dtype=np.float64)
+
         termination_status = sol_ret.mip_ret.termination_status_
         error_status = sol_ret.mip_ret.error_status_
         error_message = sol_ret.mip_ret.error_message_
@@ -317,8 +374,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         num_nodes = sol_ret.mip_ret.nodes_
         num_simplex_iterations = sol_ret.mip_ret.simplex_iterations_
 
-        solution = series_from_buf(solution, pa.float64()).to_numpy()
-
         return Solution(
             ProblemCategory(sol_ret.problem_type),
             dict(zip(data_model_obj.get_variable_names(), solution)),
@@ -339,15 +394,23 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
         )
 
     else:
-        primal_solution = DeviceBuffer.c_from_unique_ptr(
-            move(sol_ret.lp_ret.primal_solution_)
-        )
-        dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
-        reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
+        # Check if data is on GPU or CPU
+        if sol_ret.lp_ret.is_device_memory_:
+            # GPU data - use DeviceBuffer with lazy imports
+            primal_solution = DeviceBuffer.c_from_unique_ptr(
+                move(sol_ret.lp_ret.primal_solution_)
+            )
+            dual_solution = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.dual_solution_)) # noqa
+            reduced_cost = DeviceBuffer.c_from_unique_ptr(move(sol_ret.lp_ret.reduced_cost_)) # noqa
 
-        primal_solution = series_from_buf(primal_solution, pa.float64()).to_numpy()
-        dual_solution = series_from_buf(dual_solution, pa.float64()).to_numpy()
-        reduced_cost = series_from_buf(reduced_cost, pa.float64()).to_numpy()
+            primal_solution = _convert_device_buffer_to_numpy(primal_solution, pa.float64())
+            dual_solution = _convert_device_buffer_to_numpy(dual_solution, pa.float64())
+            reduced_cost = _convert_device_buffer_to_numpy(reduced_cost, pa.float64())
+        else:
+            # CPU data - convert vectors directly to numpy
+            primal_solution = np.array(sol_ret.lp_ret.primal_solution_host_, dtype=np.float64)
+            dual_solution = np.array(sol_ret.lp_ret.dual_solution_host_, dtype=np.float64)
+            reduced_cost = np.array(sol_ret.lp_ret.reduced_cost_host_, dtype=np.float64)
 
         termination_status = sol_ret.lp_ret.termination_status_
         error_status = sol_ret.lp_ret.error_status_
@@ -363,33 +426,77 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
 
         # In BatchSolve, we don't get the warm start data
         if not is_batch:
-            current_primal_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_primal_solution_)
-            )
-            current_dual_solution = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_dual_solution_)
-            )
-            initial_primal_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_primal_average_)
-            )
-            initial_dual_average = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.initial_dual_average_)
-            )
-            current_ATY = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.current_ATY_)
-            )
-            sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_primal_solutions_)
-            )
-            sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
-                move(sol_ret.lp_ret.sum_dual_solutions_)
-            )
-            last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
-            )
-            last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
-                move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
-            )
+            # Warm start data is only available for GPU solves
+            if sol_ret.lp_ret.is_device_memory_:
+                current_primal_solution = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_primal_solution_)
+                )
+                current_dual_solution = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_dual_solution_)
+                )
+                initial_primal_average = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.initial_primal_average_)
+                )
+                initial_dual_average = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.initial_dual_average_)
+                )
+                current_ATY = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.current_ATY_)
+                )
+                sum_primal_solutions = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.sum_primal_solutions_)
+                )
+                sum_dual_solutions = DeviceBuffer.c_from_unique_ptr(
+                    move(sol_ret.lp_ret.sum_dual_solutions_)
+                )
+                last_restart_duality_gap_primal_solution = DeviceBuffer.c_from_unique_ptr( # noqa
+                    move(sol_ret.lp_ret.last_restart_duality_gap_primal_solution_)
+                )
+                last_restart_duality_gap_dual_solution = DeviceBuffer.c_from_unique_ptr( # noqa
+                    move(sol_ret.lp_ret.last_restart_duality_gap_dual_solution_)
+                )
+
+                current_primal_solution = _convert_device_buffer_to_numpy(
+                    current_primal_solution, pa.float64()
+                )
+                current_dual_solution = _convert_device_buffer_to_numpy(
+                    current_dual_solution, pa.float64()
+                )
+                initial_primal_average = _convert_device_buffer_to_numpy(
+                    initial_primal_average, pa.float64()
+                )
+                initial_dual_average = _convert_device_buffer_to_numpy(
+                    initial_dual_average, pa.float64()
+                )
+                current_ATY = _convert_device_buffer_to_numpy(
+                    current_ATY, pa.float64()
+                )
+                sum_primal_solutions = _convert_device_buffer_to_numpy(
+                    sum_primal_solutions, pa.float64()
+                )
+                sum_dual_solutions = _convert_device_buffer_to_numpy(
+                    sum_dual_solutions, pa.float64()
+                )
+                last_restart_duality_gap_primal_solution = _convert_device_buffer_to_numpy(
+                    last_restart_duality_gap_primal_solution,
+                    pa.float64()
+                )
+                last_restart_duality_gap_dual_solution = _convert_device_buffer_to_numpy(
+                    last_restart_duality_gap_dual_solution,
+                    pa.float64()
+                )
+            else:
+                # CPU/remote solve - no warm start data available
+                current_primal_solution = np.array([], dtype=np.float64)
+                current_dual_solution = np.array([], dtype=np.float64)
+                initial_primal_average = np.array([], dtype=np.float64)
+                initial_dual_average = np.array([], dtype=np.float64)
+                current_ATY = np.array([], dtype=np.float64)
+                sum_primal_solutions = np.array([], dtype=np.float64)
+                sum_dual_solutions = np.array([], dtype=np.float64)
+                last_restart_duality_gap_primal_solution = np.array([], dtype=np.float64)
+                last_restart_duality_gap_dual_solution = np.array([], dtype=np.float64)
+
             initial_primal_weight = sol_ret.lp_ret.initial_primal_weight_
             initial_step_size = sol_ret.lp_ret.initial_step_size_
             total_pdlp_iterations = sol_ret.lp_ret.total_pdlp_iterations_
@@ -399,36 +506,6 @@ cdef create_solution(unique_ptr[solver_ret_t] sol_ret_ptr,
             sum_solution_weight = sol_ret.lp_ret.sum_solution_weight_
             iterations_since_last_restart = sol_ret.lp_ret.iterations_since_last_restart_ # noqa
 
-            current_primal_solution = series_from_buf(
-                current_primal_solution, pa.float64()
-            ).to_numpy()
-            current_dual_solution = series_from_buf(
-                current_dual_solution, pa.float64()
-            ).to_numpy()
-            initial_primal_average = series_from_buf(
-                initial_primal_average, pa.float64()
-            ).to_numpy()
-            initial_dual_average = series_from_buf(
-                initial_dual_average, pa.float64()
-            ).to_numpy()
-            current_ATY = series_from_buf(
-                current_ATY, pa.float64()
-            ).to_numpy()
-            sum_primal_solutions = series_from_buf(
-                sum_primal_solutions, pa.float64()
-            ).to_numpy()
-            sum_dual_solutions = series_from_buf(
-                sum_dual_solutions, pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_primal_solution = series_from_buf(
-                last_restart_duality_gap_primal_solution,
-                pa.float64()
-            ).to_numpy()
-            last_restart_duality_gap_dual_solution = series_from_buf(
-                last_restart_duality_gap_dual_solution,
-                pa.float64()
-            ).to_numpy()
-
             return Solution(
                 ProblemCategory(sol_ret.problem_type),
                 dict(zip(data_model_obj.get_variable_names(), primal_solution)), # noqa
diff --git a/python/cuopt/cuopt/utilities/__init__.py b/python/cuopt/cuopt/utilities/__init__.py
index 8706b9451..445fe5101 100644
--- a/python/cuopt/cuopt/utilities/__init__.py
+++ b/python/cuopt/cuopt/utilities/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 from cuopt.utilities.exception_handler import (
@@ -7,5 +7,20 @@
     OutOfMemoryError,
     catch_cuopt_exception,
 )
-from cuopt.utilities.type_casting import type_cast
-from cuopt.utilities.utils import check_solution, series_from_buf
+
+# Lazy imports for CUDA-dependent modules to support CPU-only hosts
+# These will be imported when first accessed
+
+
+def __getattr__(name):
+    """Lazy import CUDA-dependent utilities."""
+    if name == "type_cast":
+        from cuopt.utilities.type_casting import type_cast
+        return type_cast
+    elif name == "series_from_buf":
+        from cuopt.utilities.utils import series_from_buf
+        return series_from_buf
+    elif name == "check_solution":
+        from cuopt.utilities.utils import check_solution
+        return check_solution
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
diff --git a/python/cuopt/cuopt/utilities/utils.py b/python/cuopt/cuopt/utilities/utils.py
index b92968d0e..5b8d46b69 100644
--- a/python/cuopt/cuopt/utilities/utils.py
+++ b/python/cuopt/cuopt/utilities/utils.py
@@ -1,11 +1,8 @@
-# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
 import numpy as np
 
-import cudf
-import pylibcudf as plc
-
 from cuopt.linear_programming.solver.solver_parameters import (
     CUOPT_ABSOLUTE_PRIMAL_TOLERANCE,
     CUOPT_MIP_INTEGRALITY_TOLERANCE,
@@ -16,6 +13,9 @@
 def series_from_buf(buf, dtype):
     """Helper function to create a cudf series from a buffer.
 
+    This function lazily imports cudf and pylibcudf to support
+    CPU-only execution when remote solve is enabled.
+
     Parameters
     ----------
     buf : cudf.core.buffer.Buffer
@@ -28,6 +28,10 @@ def series_from_buf(buf, dtype):
     cudf.Series
         A cudf Series built from the buffer
     """
+    # Lazy imports to support CPU-only hosts with remote solve
+    import cudf
+    import pylibcudf as plc
+
     col = plc.column.Column.from_rmm_buffer(
         buf,
         dtype=plc.types.DataType.from_arrow(dtype),
diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt
index 7868d6656..eeb36787d 100644
--- a/python/libcuopt/CMakeLists.txt
+++ b/python/libcuopt/CMakeLists.txt
@@ -39,6 +39,29 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(argparse)
 
+# gRPC (required for this build). Build as an in-tree dependency so we don't rely
+# on system grpc-devel packages (not available on RockyLinux 8 images).
+#
+# NOTE: This will significantly increase build time. Prefer baking gRPC into the
+# build image if you need gRPC-enabled wheels regularly.
+set(gRPC_BUILD_TESTS OFF CACHE BOOL "" FORCE)
+set(gRPC_INSTALL OFF CACHE BOOL "" FORCE)
+set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
+set(gRPC_SSL_PROVIDER "package" CACHE STRING "" FORCE)
+set(gRPC_ZLIB_PROVIDER "package" CACHE STRING "" FORCE)
+set(gRPC_PROTOBUF_PROVIDER "module" CACHE STRING "" FORCE)
+set(gRPC_ABSL_PROVIDER "module" CACHE STRING "" FORCE)
+set(gRPC_CARES_PROVIDER "module" CACHE STRING "" FORCE)
+set(gRPC_RE2_PROVIDER "module" CACHE STRING "" FORCE)
+FetchContent_Declare(
+  grpc
+  GIT_REPOSITORY https://github.com/grpc/grpc.git
+  GIT_TAG v1.76.0
+  GIT_SHALLOW TRUE
+  GIT_PROGRESS TRUE
+  GIT_SUBMODULES_RECURSE TRUE
+)
+FetchContent_MakeAvailable(grpc)
 
 find_package(Boost 1.65 REQUIRED)
 if(Boost_FOUND)