diff --git a/.github/workflows/linux-x64-gpu.yml b/.github/workflows/linux-x64-gpu.yml
index 307c5d2193..4f197579e5 100644
--- a/.github/workflows/linux-x64-gpu.yml
+++ b/.github/workflows/linux-x64-gpu.yml
@@ -48,13 +48,14 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v3
       - name: Build
-        uses: addnab/docker-run-action@v3
-        with:
-          image: openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }}
-          options: -v ${{ github.workspace }}:/work
-          run: |
-            cd /work
-            source /opt/conda/bin/activate
-            conda activate py310
-            pip install build
-            python -m build --wheel
+        run: |
+          docker run --rm \
+            -v ${{ github.workspace }}:/work \
+            -w /work \
+            openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} \
+            bash -c "
+              source /opt/conda/bin/activate && \
+              conda activate py310 && \
+              pip install build && \
+              python -m build --wheel
+            "
diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py
index b83b13c411..c47e9750cf 100644
--- a/lmdeploy/serve/core/async_engine.py
+++ b/lmdeploy/serve/core/async_engine.py
@@ -351,6 +351,10 @@ async def generate(
             # TODO(lvhan) VLM doesn't support input_ids as an argument.
             # Figure out a graceful way to handle the invalid input
             prompt_input = dict(input_ids=input_ids)
+
+        if gen_config is None:
+            gen_config = GenerationConfig()
+
         if gen_config.max_new_tokens is None:
             max_new_tokens = max(0, self.session_len - session.step - len(input_ids))
             if max_new_tokens == 0:
diff --git a/src/turbomind/generation/guided_decoding.cc b/src/turbomind/generation/guided_decoding.cc
index 8e9fc67e96..e5c39d3e53 100644
--- a/src/turbomind/generation/guided_decoding.cc
+++ b/src/turbomind/generation/guided_decoding.cc
@@ -62,7 +62,9 @@ void GuidedDecoding::FillMask(int phase, TensorMap& env)
                     matcher->FillNextTokenBitmask(&dlbitmask, i);
                 }
                 else {
-                    std::fill_n(bitmask_buf_.data() + i * bitmask_buf_.stride(0), bitmask_buf_.stride(0), 0);
+                    std::fill_n(bitmask_buf_.data() + i * bitmask_buf_.stride(0),
+                                bitmask_buf_.stride(0),
+                                static_cast<int32_t>(-1));
                 }
             }
         }
diff --git a/tests/test_lmdeploy/test_grammar.py b/tests/test_lmdeploy/test_grammar.py
index 9bfe03cec4..f88ca455b9 100644
--- a/tests/test_lmdeploy/test_grammar.py
+++ b/tests/test_lmdeploy/test_grammar.py
@@ -95,3 +95,49 @@ def test_guided_matrix(model_id, backend_name, backend_factory, schema_type):
                 assert re.fullmatch(schema, response[0].text)
     finally:
         pipe.close()
+
+
+@pytest.mark.parametrize('model_id', MODEL_IDS)
+@pytest.mark.parametrize('backend_name,backend_factory', BACKEND_FACTORIES)
+def test_mix_guided_matrix(model_id, backend_name, backend_factory):
+    pipe = pipeline(
+        model_id,
+        backend_config=backend_factory(),
+        log_level='INFO',
+    )
+
+    schema_type = 'json_schema'
+    response_format = {'type': schema_type}
+    schema = SCHEMA_MAP[schema_type]
+    response_format[schema_type] = dict(name='test', schema=schema)
+
+    prompts = ['Make a self introduction please.'] * 4
+    try:
+        config = GenerationConfig(response_format=response_format)
+
+        gen_config = [None if idx % 3 else config for idx in range(4)]
+
+        responses = pipe.batch_infer(prompts, gen_config=gen_config)
+
+        for resp, c in zip(responses, gen_config):
+            if c is None:
+                # Unguided generation: ensure we get some text, and that it does not
+                # accidentally produce JSON that conforms to the guided schema.
+                assert resp and resp.text
+                try:
+                    data = json.loads(resp.text)
+                except json.JSONDecodeError:
+                    # Not valid JSON, so it cannot conform to the schema.
+                    continue
+                else:
+                    try:
+                        validate(instance=data, schema=schema)
+                    except Exception:
+                        # JSON is present but does not satisfy the schema.
+                        continue
+                    else:
+                        pytest.fail('Unguided generation unexpectedly produced schema-conformant JSON')
+            else:
+                validate(instance=json.loads(resp.text), schema=schema)
+    finally:
+        pipe.close()