diff --git a/.github/workflows/linux-x64-gpu.yml b/.github/workflows/linux-x64-gpu.yml index 307c5d2193..4f197579e5 100644 --- a/.github/workflows/linux-x64-gpu.yml +++ b/.github/workflows/linux-x64-gpu.yml @@ -48,13 +48,14 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - name: Build - uses: addnab/docker-run-action@v3 - with: - image: openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} - options: -v ${{ github.workspace }}:/work - run: | - cd /work - source /opt/conda/bin/activate - conda activate py310 - pip install build - python -m build --wheel + run: | + docker run --rm \ + -v ${{ github.workspace }}:/work \ + -w /work \ + openmmlab/lmdeploy-builder:cuda${{ matrix.cudaver }} \ + bash -c " + source /opt/conda/bin/activate && \ + conda activate py310 && \ + pip install build && \ + python -m build --wheel + " diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py index b83b13c411..c47e9750cf 100644 --- a/lmdeploy/serve/core/async_engine.py +++ b/lmdeploy/serve/core/async_engine.py @@ -351,6 +351,10 @@ async def generate( # TODO(lvhan) VLM doesn't support input_ids as an argument. # Figure out a graceful way to handle the invalid input prompt_input = dict(input_ids=input_ids) + + if gen_config is None: + gen_config = GenerationConfig() + if gen_config.max_new_tokens is None: max_new_tokens = max(0, self.session_len - session.step - len(input_ids)) if max_new_tokens == 0: diff --git a/src/turbomind/generation/guided_decoding.cc b/src/turbomind/generation/guided_decoding.cc index 8e9fc67e96..e5c39d3e53 100644 --- a/src/turbomind/generation/guided_decoding.cc +++ b/src/turbomind/generation/guided_decoding.cc @@ -62,7 +62,9 @@ void GuidedDecoding::FillMask(int phase, TensorMap& env) matcher->FillNextTokenBitmask(&dlbitmask, i); } else { - std::fill_n(bitmask_buf_.data() + i * bitmask_buf_.stride(0), bitmask_buf_.stride(0), 0); + std::fill_n(bitmask_buf_.data() + i * bitmask_buf_.stride(0), + bitmask_buf_.stride(0), + static_cast(-1)); } } } diff --git a/tests/test_lmdeploy/test_grammar.py b/tests/test_lmdeploy/test_grammar.py index 9bfe03cec4..f88ca455b9 100644 --- a/tests/test_lmdeploy/test_grammar.py +++ b/tests/test_lmdeploy/test_grammar.py @@ -95,3 +95,49 @@ def test_guided_matrix(model_id, backend_name, backend_factory, schema_type): assert re.fullmatch(schema, response[0].text) finally: pipe.close() + + +@pytest.mark.parametrize('model_id', MODEL_IDS) +@pytest.mark.parametrize('backend_name,backend_factory', BACKEND_FACTORIES) +def test_mix_guided_matrix(model_id, backend_name, backend_factory): + pipe = pipeline( + model_id, + backend_config=backend_factory(), + log_level='INFO', + ) + + schema_type = 'json_schema' + response_format = {'type': schema_type} + schema = SCHEMA_MAP[schema_type] + response_format[schema_type] = dict(name='test', schema=schema) + + prompts = ['Make a self introduction please.'] * 4 + try: + config = GenerationConfig(response_format=response_format) + + gen_config = [None if idx % 3 else config for idx in range(4)] + + responses = pipe.batch_infer(prompts, gen_config=gen_config) + + for resp, c in zip(responses, gen_config): + if c is None: + # Unguided generation: ensure we get some text, and that it does not + # accidentally produce JSON that conforms to the guided schema. + assert resp and resp.text + try: + data = json.loads(resp.text) + except json.JSONDecodeError: + # Not valid JSON, so it cannot conform to the schema. + continue + else: + try: + validate(instance=data, schema=schema) + except Exception: + # JSON is present but does not satisfy the schema. + continue + else: + pytest.fail('Unguided generation unexpectedly produced schema-conformant JSON') + else: + validate(instance=json.loads(resp.text), schema=schema) + finally: + pipe.close()