From b73d8e5bfa9b64370fdf5e13c3ddda2faaaacede Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Sun, 12 Apr 2026 14:55:56 +0800 Subject: [PATCH 1/5] [Cherry-Pick][CI] Sync dev optimizations to 2.4(#7335) --- .github/workflows/_accuracy_test.yml | 36 ++++++++++++++++--- .github/workflows/_base_test.yml | 32 ++++++++++++++--- .github/workflows/_build_linux.yml | 14 ++++++-- .github/workflows/_build_linux_rl.yml | 18 ++++++++-- .github/workflows/_gpu_4cards_case_test.yml | 37 +++++++++++++++---- .github/workflows/_logprob_test_linux.yml | 38 +++++++++++++++++--- .github/workflows/_pre_ce_test.yml | 34 ++++++++++++++---- .github/workflows/_stable_test.yml | 34 +++++++++++++++--- .github/workflows/_unit_test_coverage.yml | 40 ++++++++++++++++----- 9 files changed, 239 insertions(+), 44 deletions(-) diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml index 40c9d5bd98b..2e112f055c0 100644 --- a/.github/workflows/_accuracy_test.yml +++ b/.github/workflows/_accuracy_test.yml @@ -69,12 +69,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -145,7 +160,10 @@ jobs: docker rm -f ${runner_name} || true fi - docker run --rm --ipc=host --pid=host --net=host \ + docker run --rm --net=host \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -160,10 +178,11 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple @@ -206,3 +225,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml index 37fd195c179..99813aa4273 100644 --- a/.github/workflows/_base_test.yml +++ b/.github/workflows/_base_test.yml @@ -81,7 +81,14 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' @@ -111,7 +118,11 @@ jobs: exit 1 fi - tar -xf FastDeploy.tar.gz + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -182,7 +193,10 @@ jobs: docker rm -f ${runner_name} || true fi - docker run --rm --ipc=host --pid=host --net=host \ + docker run --rm --net=host \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -197,17 +211,18 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple python -m pip install ${fastdeploy_wheel_url} python -m pip install pytest - wget https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64 + wget --no-proxy https://paddle-qa.bj.bcebos.com/zhengtianyu/tools/llm-deploy-linux-amd64 chmod +x ./llm-deploy-linux-amd64 ./llm-deploy-linux-amd64 -python python3.10 \ -model_name ERNIE-4.5-0.3B-Paddle \ @@ -279,3 +294,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux.yml b/.github/workflows/_build_linux.yml index a43da2cc8ef..8ce0b3fef64 100644 --- a/.github/workflows/_build_linux.yml +++ b/.github/workflows/_build_linux.yml @@ -120,6 +120,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -150,7 +151,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -164,6 +166,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -188,7 +191,7 @@ jobs: else # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ fi pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple @@ -237,3 +240,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_build_linux_rl.yml b/.github/workflows/_build_linux_rl.yml index 38f052473e8..ac83da907cf 100644 --- a/.github/workflows/_build_linux_rl.yml +++ b/.github/workflows/_build_linux_rl.yml @@ -8,7 +8,7 @@ on: description: "Build Images" required: true type: string - default: "iregistry.baidu-int.com/tiangexiao/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-rc2" + default: "iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1" FASTDEPLOY_ARCHIVE_URL: description: "URL of the compressed FastDeploy code archive." required: true @@ -52,9 +52,10 @@ on: wheel_path_rl: description: "Output path of the generated wheel" value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }} + jobs: fd-build-rl: - runs-on: [self-hosted, GPU-Build] + runs-on: [self-hosted, GPU-Build-RL] timeout-minutes: 360 outputs: wheel_path_rl: ${{ steps.set_output.outputs.wheel_path_rl }} @@ -107,6 +108,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -137,7 +139,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache_rl:/root/.cache" \ @@ -151,6 +154,7 @@ jobs: -e "PADDLE_WHL_URL=${PADDLE_WHL_URL}" \ -e "BRANCH_REF=${BRANCH_REF}" \ -e "CCACHE_MAXSIZE=50G" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${gpu_id}\"" ${docker_image} /bin/bash -c ' if [[ -n "${FD_VERSION}" ]]; then export FASTDEPLOY_VERSION=${FD_VERSION} @@ -162,6 +166,7 @@ jobs: cd FastDeploy # Avoid using pip cache to ensure the wheel is updated to the latest version + python -m pip uninstall paddlepaddle-gpu -y || true wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Paddle-RL-Compile/release/3.3/latest/paddlepaddle_gpu-3.3.0.dev-cp310-cp310-linux_x86_64.whl python -m pip install paddlepaddle_gpu* @@ -202,3 +207,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_gpu_4cards_case_test.yml b/.github/workflows/_gpu_4cards_case_test.yml index d7a095570fc..a393552ddbc 100644 --- a/.github/workflows/_gpu_4cards_case_test.yml +++ b/.github/workflows/_gpu_4cards_case_test.yml @@ -81,12 +81,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -166,7 +181,10 @@ jobs: docker rm -f ${runner_name} || true fi - docker run --rm --ipc=host --net=host \ + docker run --rm --net=host \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ @@ -183,6 +201,7 @@ jobs: -e "fd_wheel_url=${fd_wheel_url}" \ -e "BASE_REF=${BASE_REF}" \ -e "IS_PR=${IS_PR}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy @@ -191,8 +210,7 @@ jobs: # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple python -m pip install -r scripts/unittest_requirement.txt @@ -204,3 +222,10 @@ jobs: export CUDA_VISIBLE_DEVICES=0,1,2,3 bash scripts/run_gpu_4cards.sh ' + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_logprob_test_linux.yml b/.github/workflows/_logprob_test_linux.yml index 553a84c1006..1de1f93adac 100644 --- a/.github/workflows/_logprob_test_linux.yml +++ b/.github/workflows/_logprob_test_linux.yml @@ -78,11 +78,27 @@ jobs: if ls /workspace/* >/dev/null 2>&1; then echo "ERROR: Failed to clean /workspace/* after multiple attempts" ls -ld /workspace/* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls /workspace/* >/dev/null 2>&1; then + echo "ERROR: Force cleanup failed. Exiting..." + exit 1 + else + echo "Force cleanup succeeded." + fi fi ' - wget -q --no-proxy ${paddletest_archive_url} - tar -xf PaddleTest.tar.gz + + wget -q --no-proxy ${paddletest_archive_url} || { + echo "ERROR: Failed to download archive from ${paddletest_archive_url}" + exit 1 + } + + tar --no-same-owner -xf PaddleTest.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf PaddleTest.tar.gz cd PaddleTest git config --global user.name "FastDeployCI" @@ -152,7 +168,11 @@ jobs: echo "Removing stale container: ${runner_name}" docker rm -f ${runner_name} || true fi - docker run --rm --ipc=host --pid=host --net=host \ + + docker run --rm --net=host \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -167,10 +187,11 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple @@ -221,3 +242,10 @@ jobs: run: | echo "logprob test failed with exit code ${{ env.LOGPROB_EXIT_CODE }}" exit 8 + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_pre_ce_test.yml b/.github/workflows/_pre_ce_test.yml index f73cb6c1ce1..91ba176bcd0 100644 --- a/.github/workflows/_pre_ce_test.yml +++ b/.github/workflows/_pre_ce_test.yml @@ -83,12 +83,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -163,6 +178,7 @@ jobs: fi docker run --rm --net=host \ + --shm-size=64G \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -181,14 +197,20 @@ jobs: -e "FD_ZMQ_SEND_RESPONSE_SERVER_PORT=${FD_ZMQ_SEND_RESPONSE_SERVER_PORT}" \ -e "FD_ZMQ_CONTROL_CMD_SERVER_PORTS=${FD_ZMQ_CONTROL_CMD_SERVER_PORTS}" \ -e "fd_wheel_url=${fd_wheel_url}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy cd FastDeploy # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - - pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ python -m pip install ${fd_wheel_url} bash scripts/run_pre_ce.sh ' + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_stable_test.yml b/.github/workflows/_stable_test.yml index e834816100b..b8b544468d1 100644 --- a/.github/workflows/_stable_test.yml +++ b/.github/workflows/_stable_test.yml @@ -81,12 +81,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -160,6 +175,7 @@ jobs: fi docker run --rm --net=host \ + --shm-size=64G \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ @@ -175,10 +191,11 @@ jobs: -v "${CACHE_DIR}/.cache:/root/.cache" \ -v "${CACHE_DIR}/ConfigDir:/root/.config" \ -e TZ="Asia/Shanghai" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus '"device='"${DEVICES}"'"' ${docker_image} /bin/bash -xc ' # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple @@ -190,6 +207,7 @@ jobs: TEST_EXIT_CODE=0 pushd tests/ce/stable_cases bash launch_model.sh /MODELDATA + TEST_EXIT_CODE=0 bash run.sh || { TEST_EXIT_CODE=1 @@ -211,6 +229,7 @@ jobs: echo "=======================================================" } + popd echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> /workspace/FastDeploy/exit_code.env ' @@ -220,3 +239,10 @@ jobs: fi echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" exit ${TEST_EXIT_CODE} + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/_unit_test_coverage.yml b/.github/workflows/_unit_test_coverage.yml index 6b553a036bd..4f8c411774f 100644 --- a/.github/workflows/_unit_test_coverage.yml +++ b/.github/workflows/_unit_test_coverage.yml @@ -85,12 +85,27 @@ jobs: if ls "${REPO_NAME}"* >/dev/null 2>&1; then echo "ERROR: Failed to clean ${REPO_NAME}* after multiple attempts" ls -ld "${REPO_NAME}"* - exit 1 + echo "Attempting force cleanup with find..." + find /workspace -mindepth 1 -maxdepth 1 -name "${REPO_NAME}*" -type d -exec chmod -R u+rwx {} \; -exec rm -rf {} + 2>/dev/null || true + if ls "${REPO_NAME}"* >/dev/null 2>&1; then + echo "ERROR: Force cleanup still failed" + exit 1 + else + echo "Force cleanup succeeded" + fi fi ' - wget -q --no-proxy ${fd_archive_url} - tar -xf FastDeploy.tar.gz + wget -q --no-proxy ${fd_archive_url} || { + echo "ERROR: Failed to download archive from ${fd_archive_url}" + exit 1 + } + + tar --no-same-owner -xf FastDeploy.tar.gz || { + echo "ERROR: Failed to extract archive" + exit 1 + } + rm -rf FastDeploy.tar.gz cd FastDeploy git config --global user.name "FastDeployCI" @@ -173,12 +188,16 @@ jobs: export RDMA_DEVICES=$(find /dev/infiniband/uverbs* -maxdepth 1 -not -type d | xargs -I{} echo '--device {}:{}') docker run --rm --net=host \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ --cap-add=SYS_PTRACE --cap-add=IPC_LOCK \ - --shm-size=64G \ + --shm-size=128G \ ${RDMA_DEVICES} \ --device=/dev/infiniband/rdma_cm \ --ulimit memlock=-1:-1 \ + --ulimit nofile=65536:65536 \ + --ulimit nproc=8192:8192 \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache:/root/.cache" \ @@ -198,6 +217,7 @@ jobs: -e "fd_wheel_url=${fd_wheel_url}" \ -e "BASE_REF=${BASE_REF}" \ -e "IS_PR=${IS_PR}" \ + -e "no_proxy=localhost,127.0.0.1,0.0.0.0,bcebos.com,.bcebos.com,bj.bcebos.com,su.bcebos.com,paddle-ci.gz.bcebos.com,apiin.im.baidu.com,baidu-int.com,.baidu.com,aliyun.com,gitee.com,pypi.tuna.tsinghua.edu.cn,.tuna.tsinghua.edu.cn" \ --gpus "\"device=${DEVICES}\"" ${docker_image} /bin/bash -c ' git config --global --add safe.directory /workspace/FastDeploy @@ -205,7 +225,7 @@ jobs: git diff origin/${BASE_REF}..HEAD --unified=0 > diff.txt # Avoid using pip cache to ensure the wheel is updated to the latest version wget -q --no-proxy https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Gpu-Cuda12.6-Cudnn9.5-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + python -m pip install paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl --extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ pip config set global.extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple python -m pip install -r scripts/unittest_requirement.txt @@ -223,9 +243,6 @@ jobs: fi export COVERAGE_FILE=/workspace/FastDeploy/coveragedata/.coverage export COVERAGE_RCFILE=/workspace/FastDeploy/scripts/.coveragerc - # remove flash_mask to avoid conflicts - python -m pip uninstall -y flash_mask || true - TEST_EXIT_CODE=0 bash scripts/coverage_run.sh || TEST_EXIT_CODE=8 echo "TEST_EXIT_CODE=${TEST_EXIT_CODE}" >> exit_code.env @@ -380,6 +397,13 @@ jobs: echo "coverage passed" exit 0 + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} + diff_coverage_report: needs: run_tests_with_coverage if: always() From acd3442184ffb6924b5a655479d81cffd55f18fe Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Sun, 12 Apr 2026 15:14:02 +0800 Subject: [PATCH 2/5] remove useless code in test --- .../workflows/cancel_pr_build_and_test.yml | 19 + .github/workflows/ci_hpu.yml | 1 - tests/ci_use/EB_Lite/test_EB_Lite_serving.py | 958 +----------------- .../EB_VL_Lite/test_EB_VL_Lite_serving.py | 485 +-------- 4 files changed, 30 insertions(+), 1433 deletions(-) create mode 100644 .github/workflows/cancel_pr_build_and_test.yml diff --git a/.github/workflows/cancel_pr_build_and_test.yml b/.github/workflows/cancel_pr_build_and_test.yml new file mode 100644 index 00000000000..bb488a529ea --- /dev/null +++ b/.github/workflows/cancel_pr_build_and_test.yml @@ -0,0 +1,19 @@ +name: PR Build and Test +on: + pull_request: + types: [closed] + branches: [develop, release/**] +permissions: read-all + +concurrency: + group: ${{ github.event.pull_request.number }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + cancel: + name: Cancel PR Build and Test for ${{ github.event.pull_request.number }} + runs-on: ubuntu-latest + steps: + - name: Cancel PR Build and Test + run: | + exit 0 diff --git a/.github/workflows/ci_hpu.yml b/.github/workflows/ci_hpu.yml index 18c9333e321..857442abf42 100644 --- a/.github/workflows/ci_hpu.yml +++ b/.github/workflows/ci_hpu.yml @@ -4,7 +4,6 @@ on: pull_request: branches: - develop - - 'release/*' workflow_dispatch: concurrency: diff --git a/tests/ci_use/EB_Lite/test_EB_Lite_serving.py b/tests/ci_use/EB_Lite/test_EB_Lite_serving.py index ec97fbf8ab7..ea70a7e270b 100644 --- a/tests/ci_use/EB_Lite/test_EB_Lite_serving.py +++ b/tests/ci_use/EB_Lite/test_EB_Lite_serving.py @@ -23,7 +23,6 @@ import openai import pytest -import requests tests_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) sys.path.insert(0, tests_dir) @@ -159,66 +158,6 @@ def consistent_payload(): } -# ========================== -# Helper function to calculate difference rate between two texts -# ========================== -def calculate_diff_rate(text1, text2): - """ - Calculate the difference rate between two strings - based on the normalized Levenshtein edit distance. - Returns a float in [0,1], where 0 means identical. - """ - if text1 == text2: - return 0.0 - - len1, len2 = len(text1), len(text2) - dp = [[0] * (len2 + 1) for _ in range(len1 + 1)] - - for i in range(len1 + 1): - for j in range(len2 + 1): - if i == 0 or j == 0: - dp[i][j] = i + j - elif text1[i - 1] == text2[j - 1]: - dp[i][j] = dp[i - 1][j - 1] - else: - dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) - - edit_distance = dp[len1][len2] - max_len = max(len1, len2) - return edit_distance / max_len if max_len > 0 else 0.0 - - -# ========================== -# Consistency test for repeated runs with fixed payload -# ========================== -def test_consistency_between_runs(api_url, headers, consistent_payload): - """ - Test that two runs with the same fixed input produce similar outputs. - """ - # First request - resp1 = requests.post(api_url, headers=headers, json=consistent_payload) - assert resp1.status_code == 200 - result1 = resp1.json() - content1 = result1["choices"][0]["message"]["content"] - - # Second request - resp2 = requests.post(api_url, headers=headers, json=consistent_payload) - assert resp2.status_code == 200 - result2 = resp2.json() - content2 = result2["choices"][0]["message"]["content"] - - # Calculate difference rate - diff_rate = calculate_diff_rate(content1, content2) - - # Verify that the difference rate is below the threshold - assert diff_rate < 0.05, f"Output difference too large ({diff_rate:.4%})" - - -# ========================== -# OpenAI Client chat.completions Test -# ========================== - - @pytest.fixture def openai_client(): ip = "0.0.0.0" @@ -230,896 +169,9 @@ def openai_client(): return client -# Non-streaming test -def test_non_streaming_chat(openai_client): - """ - Test non-streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant."}, - {"role": "user", "content": "List 3 countries and their capitals."}, - ], - temperature=1, - max_tokens=1024, - stream=False, - ) - - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "message") - assert hasattr(response.choices[0].message, "content") - - -# Streaming test -def test_streaming_chat(openai_client, capsys): - """ - Test streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[ - {"role": "system", "content": "You are a helpful AI assistant."}, - {"role": "user", "content": "List 3 countries and their capitals."}, - { - "role": "assistant", - "content": "China(Beijing), France(Paris), Australia(Canberra).", - }, - {"role": "user", "content": "OK, tell more."}, - ], - temperature=1, - max_tokens=1024, - stream=True, - ) - - output = [] - for chunk in response: - if hasattr(chunk.choices[0], "delta") and hasattr(chunk.choices[0].delta, "content"): - output.append(chunk.choices[0].delta.content) - assert len(output) > 2 - - -# ========================== -# OpenAI Client completions Test -# ========================== - - -def test_non_streaming(openai_client): - """ - Test non-streaming chat functionality with the local service - """ - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=1024, - stream=False, - ) - - # Assertions to check the response structure - assert hasattr(response, "choices") - assert len(response.choices) > 0 - - -def test_streaming(openai_client, capsys): - """ - Test streaming functionality with the local service - """ - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=1024, - stream=True, - ) - - # Collect streaming output - output = [] - for chunk in response: - output.append(chunk.choices[0].text) - assert len(output) > 0 - - # ========================== -# OpenAI Client additional chat/completions test +# Helper functions for structured outputs testing # ========================== - - -def test_non_streaming_with_stop_str(openai_client): - """ - Test non-streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"include_stop_str_in_output": True}, - stream=False, - ) - # Assertions to check the response structure - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert response.choices[0].message.content.endswith("") - - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"include_stop_str_in_output": False}, - stream=False, - ) - # Assertions to check the response structure - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert not response.choices[0].message.content.endswith("") - - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=1024, - stream=False, - ) - assert not response.choices[0].text.endswith("") - - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=1024, - extra_body={"include_stop_str_in_output": True}, - stream=False, - ) - assert response.choices[0].text.endswith("") - - -def test_streaming_with_stop_str(openai_client): - """ - Test non-streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"min_tokens": 1, "include_stop_str_in_output": True}, - stream=True, - ) - # Assertions to check the response structure - last_token = "" - for chunk in response: - last_token = chunk.choices[0].delta.content - if last_token: - assert last_token.endswith(""), f"last_token did not end with '': {last_token!r}" - else: - print("Warning: empty output received, skipping test_streaming_with_stop_str.") - - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"include_stop_str_in_output": False}, - stream=True, - ) - # Assertions to check the response structure - last_token = "" - for chunk in response: - last_token = chunk.choices[0].delta.content - assert last_token != "" - - response_1 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - max_tokens=10, - stream=True, - ) - last_token = "" - for chunk in response_1: - last_token = chunk.choices[0].text - assert not last_token.endswith("") - - response_1 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - max_tokens=10, - extra_body={"include_stop_str_in_output": True}, - stream=True, - ) - last_token = "" - for chunk in response_1: - last_token = chunk.choices[0].text - assert last_token.endswith("") - - -def test_non_streaming_chat_with_return_token_ids(openai_client, capsys): - """ - Test return_token_ids option in non-streaming chat functionality with the local service - """ - # enable return_token_ids - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": True}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "message") - assert hasattr(response.choices[0].message, "prompt_token_ids") - assert isinstance(response.choices[0].message.prompt_token_ids, list) - assert hasattr(response.choices[0].message, "completion_token_ids") - assert isinstance(response.choices[0].message.completion_token_ids, list) - - # disable return_token_ids - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": False}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "message") - assert hasattr(response.choices[0].message, "prompt_token_ids") - assert response.choices[0].message.prompt_token_ids is None - assert hasattr(response.choices[0].message, "completion_token_ids") - assert response.choices[0].message.completion_token_ids is None - - -def test_streaming_chat_with_return_token_ids(openai_client, capsys): - """ - Test return_token_ids option in streaming chat functionality with the local service - """ - # enable return_token_ids - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": True}, - stream=True, - ) - is_first_chunk = True - for chunk in response: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "delta") - assert hasattr(chunk.choices[0].delta, "prompt_token_ids") - assert hasattr(chunk.choices[0].delta, "completion_token_ids") - if is_first_chunk: - is_first_chunk = False - assert isinstance(chunk.choices[0].delta.prompt_token_ids, list) - assert chunk.choices[0].delta.completion_token_ids is None - else: - assert chunk.choices[0].delta.prompt_token_ids is None - assert isinstance(chunk.choices[0].delta.completion_token_ids, list) - - # disable return_token_ids - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": False}, - stream=True, - ) - for chunk in response: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "delta") - assert hasattr(chunk.choices[0].delta, "prompt_token_ids") - assert chunk.choices[0].delta.prompt_token_ids is None - assert hasattr(chunk.choices[0].delta, "completion_token_ids") - assert chunk.choices[0].delta.completion_token_ids is None - - -def test_non_streaming_completion_with_return_token_ids(openai_client, capsys): - """ - Test return_token_ids option in non-streaming completion functionality with the local service - """ - # enable return_token_ids - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": True}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "prompt_token_ids") - assert isinstance(response.choices[0].prompt_token_ids, list) - assert hasattr(response.choices[0], "completion_token_ids") - assert isinstance(response.choices[0].completion_token_ids, list) - - # disable return_token_ids - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": False}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "prompt_token_ids") - assert response.choices[0].prompt_token_ids is None - assert hasattr(response.choices[0], "completion_token_ids") - assert response.choices[0].completion_token_ids is None - - -def test_streaming_completion_with_return_token_ids(openai_client, capsys): - """ - Test return_token_ids option in streaming completion functionality with the local service - """ - # enable return_token_ids - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": True}, - stream=True, - ) - is_first_chunk = True - for chunk in response: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "prompt_token_ids") - assert hasattr(chunk.choices[0], "completion_token_ids") - if is_first_chunk: - is_first_chunk = False - assert isinstance(chunk.choices[0].prompt_token_ids, list) - assert chunk.choices[0].completion_token_ids is None - else: - assert chunk.choices[0].prompt_token_ids is None - assert isinstance(chunk.choices[0].completion_token_ids, list) - - # disable return_token_ids - response = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - max_tokens=5, - extra_body={"return_token_ids": False}, - stream=True, - ) - for chunk in response: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "prompt_token_ids") - assert chunk.choices[0].prompt_token_ids is None - assert hasattr(chunk.choices[0], "completion_token_ids") - assert chunk.choices[0].completion_token_ids is None - - -def test_non_streaming_chat_with_prompt_token_ids(openai_client, capsys): - """ - Test prompt_token_ids option in non-streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[], - temperature=1, - max_tokens=5, - extra_body={"prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937]}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response, "usage") - assert hasattr(response.usage, "prompt_tokens") - assert response.usage.prompt_tokens == 9 - - -def test_streaming_chat_with_prompt_token_ids(openai_client, capsys): - """ - Test prompt_token_ids option in streaming chat functionality with the local service - """ - response = openai_client.chat.completions.create( - model="default", - messages=[], - temperature=1, - max_tokens=5, - extra_body={"prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937]}, - stream=True, - stream_options={"include_usage": True}, - ) - for chunk in response: - assert hasattr(chunk, "choices") - assert hasattr(chunk, "usage") - if len(chunk.choices) > 0: - assert chunk.usage is None - else: - assert hasattr(chunk.usage, "prompt_tokens") - assert chunk.usage.prompt_tokens == 9 - - -def test_non_streaming_completion_with_prompt_token_ids(openai_client, capsys): - """ - Test prompt_token_ids option in streaming completion functionality with the local service - """ - response = openai_client.completions.create( - model="default", - prompt="", - temperature=1, - max_tokens=5, - extra_body={"prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937]}, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response, "usage") - assert hasattr(response.usage, "prompt_tokens") - assert response.usage.prompt_tokens == 9 - - -def test_streaming_completion_with_prompt_token_ids(openai_client, capsys): - """ - Test prompt_token_ids option in non-streaming completion functionality with the local service - """ - response = openai_client.completions.create( - model="default", - prompt="", - temperature=1, - max_tokens=5, - extra_body={"prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937]}, - stream=True, - stream_options={"include_usage": True}, - ) - for chunk in response: - assert hasattr(chunk, "choices") - assert hasattr(chunk, "usage") - if len(chunk.choices) > 0: - assert chunk.usage is None - else: - assert hasattr(chunk.usage, "prompt_tokens") - assert chunk.usage.prompt_tokens == 9 - - -def test_non_streaming_chat_with_disable_chat_template(openai_client, capsys): - """ - Test disable_chat_template option in chat functionality with the local service. - """ - enabled_response = openai_client.chat.completions.create( - model="default", - messages=[], - max_tokens=10, - temperature=0.0, - top_p=0, - extra_body={ - "disable_chat_template": True, - "prompt_token_ids": [5209, 626, 274, 45954, 1071, 3265, 3934, 1869, 93937], - }, - stream=False, - ) - assert hasattr(enabled_response, "choices") - assert len(enabled_response.choices) > 0 - - enabled_response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - max_tokens=10, - temperature=0.0, - top_p=0, - extra_body={"disable_chat_template": False}, - stream=False, - ) - assert hasattr(enabled_response, "choices") - assert len(enabled_response.choices) > 0 - - # from fastdeploy.input.ernie4_5_tokenizer import Ernie4_5Tokenizer - # tokenizer = Ernie4_5Tokenizer.from_pretrained("PaddlePaddle/ERNIE-4.5-0.3B-Paddle", trust_remote_code=True) - # prompt = tokenizer.apply_chat_template([{"role": "user", "content": "Hello, how are you?"}], tokenize=False) - prompt = "<|begin_of_sentence|>User: Hello, how are you?\nAssistant: " - disabled_response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": prompt}], - max_tokens=10, - temperature=0, - top_p=0, - extra_body={"disable_chat_template": True}, - stream=False, - ) - assert hasattr(disabled_response, "choices") - assert len(disabled_response.choices) > 0 - assert enabled_response.choices[0].message.content == disabled_response.choices[0].message.content - - -def test_non_streaming_chat_with_min_tokens(openai_client, capsys): - """ - Test min_tokens option in non-streaming chat functionality with the local service - """ - min_tokens = 1000 - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - max_tokens=1010, - extra_body={"min_tokens": min_tokens}, - stream=False, - ) - assert hasattr(response, "usage") - assert hasattr(response.usage, "completion_tokens") - assert response.usage.completion_tokens >= min_tokens - - -def test_non_streaming_min_max_token_equals_one(openai_client, capsys): - """ - Test chat/completion when min_tokens equals max_tokens equals 1. - Verify it returns exactly one token. - """ - # Test non-streaming chat - response = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello"}], - max_tokens=1, - temperature=0.0, - stream=False, - ) - assert hasattr(response, "choices") - assert len(response.choices) > 0 - assert hasattr(response.choices[0], "message") - assert hasattr(response.choices[0].message, "content") - # Verify usage shows exactly 1 completion token - assert hasattr(response, "usage") - assert response.usage.completion_tokens == 1 - - -def test_non_streaming_chat_with_bad_words(openai_client, capsys): - """ - Test bad_words option in non-streaming chat functionality with the local service - """ - base_path = os.getenv("MODEL_PATH") - if base_path: - model_path = os.path.join(base_path, "ernie-4_5-21b-a3b-bf16-paddle") - else: - model_path = "./ernie-4_5-21b-a3b-bf16-paddle" - response_0 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - stream=False, - extra_body={"return_token_ids": True}, - ) - - assert hasattr(response_0, "choices") - assert len(response_0.choices) > 0 - assert hasattr(response_0.choices[0], "message") - assert hasattr(response_0.choices[0].message, "completion_token_ids") - assert isinstance(response_0.choices[0].message.completion_token_ids, list) - - from fastdeploy.input.ernie4_5_tokenizer import Ernie4_5Tokenizer - - tokenizer = Ernie4_5Tokenizer.from_pretrained(model_path, trust_remote_code=True) - output_tokens_0 = [] - output_ids_0 = [] - for ids in response_0.choices[0].message.completion_token_ids: - output_tokens_0.append(tokenizer.decode(ids)) - output_ids_0.append(ids) - - # add bad words - bad_tokens = output_tokens_0[6:10] - bad_token_ids = output_ids_0[6:10] - response_1 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words": bad_tokens, "return_token_ids": True}, - stream=False, - ) - assert hasattr(response_1, "choices") - assert len(response_1.choices) > 0 - assert hasattr(response_1.choices[0], "message") - assert hasattr(response_1.choices[0].message, "completion_token_ids") - assert isinstance(response_1.choices[0].message.completion_token_ids, list) - - response_2 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words_token_ids": bad_token_ids, "return_token_ids": True}, - stream=False, - ) - assert hasattr(response_2, "choices") - assert len(response_2.choices) > 0 - assert hasattr(response_2.choices[0], "message") - assert hasattr(response_2.choices[0].message, "completion_token_ids") - assert isinstance(response_2.choices[0].message.completion_token_ids, list) - - assert not any(ids in response_1.choices[0].message.completion_token_ids for ids in bad_token_ids) - assert not any(ids in response_2.choices[0].message.completion_token_ids for ids in bad_token_ids) - - -def test_streaming_chat_with_bad_words(openai_client, capsys): - """ - Test bad_words option in streaming chat functionality with the local service - """ - response_0 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - stream=True, - extra_body={"return_token_ids": True}, - ) - output_tokens_0 = [] - output_ids_0 = [] - is_first_chunk = True - for chunk in response_0: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "delta") - assert hasattr(chunk.choices[0].delta, "content") - assert hasattr(chunk.choices[0].delta, "completion_token_ids") - if is_first_chunk: - is_first_chunk = False - else: - assert isinstance(chunk.choices[0].delta.completion_token_ids, list) - output_tokens_0.append(chunk.choices[0].delta.content) - output_ids_0.extend(chunk.choices[0].delta.completion_token_ids) - - # add bad words - bad_tokens = output_tokens_0[6:10] - bad_token_ids = output_ids_0[6:10] - response_1 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words": bad_tokens, "return_token_ids": True}, - stream=True, - ) - output_tokens_1 = [] - output_ids_1 = [] - is_first_chunk = True - for chunk in response_1: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "delta") - assert hasattr(chunk.choices[0].delta, "content") - assert hasattr(chunk.choices[0].delta, "completion_token_ids") - if is_first_chunk: - is_first_chunk = False - else: - assert isinstance(chunk.choices[0].delta.completion_token_ids, list) - output_tokens_1.append(chunk.choices[0].delta.content) - output_ids_1.extend(chunk.choices[0].delta.completion_token_ids) - - response_2 = openai_client.chat.completions.create( - model="default", - messages=[{"role": "user", "content": "Hello, how are you?"}], - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words_token_ids": bad_token_ids, "return_token_ids": True}, - stream=True, - ) - output_tokens_2 = [] - output_ids_2 = [] - is_first_chunk = True - for chunk in response_2: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "delta") - assert hasattr(chunk.choices[0].delta, "content") - assert hasattr(chunk.choices[0].delta, "completion_token_ids") - if is_first_chunk: - is_first_chunk = False - else: - assert isinstance(chunk.choices[0].delta.completion_token_ids, list) - output_tokens_2.append(chunk.choices[0].delta.content) - output_ids_2.extend(chunk.choices[0].delta.completion_token_ids) - - assert not any(ids in output_ids_1 for ids in bad_token_ids) - assert not any(ids in output_ids_2 for ids in bad_token_ids) - - -def test_non_streaming_completion_with_bad_words(openai_client, capsys): - """ - Test bad_words option in non-streaming completion functionality with the local service - """ - base_path = os.getenv("MODEL_PATH") - if base_path: - model_path = os.path.join(base_path, "ernie-4_5-21b-a3b-bf16-paddle") - else: - model_path = "./ernie-4_5-21b-a3b-bf16-paddle" - - response_0 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - stream=False, - extra_body={"return_token_ids": True}, - ) - assert hasattr(response_0, "choices") - assert len(response_0.choices) > 0 - assert hasattr(response_0.choices[0], "completion_token_ids") - assert isinstance(response_0.choices[0].completion_token_ids, list) - - from fastdeploy.input.ernie4_5_tokenizer import Ernie4_5Tokenizer - - tokenizer = Ernie4_5Tokenizer.from_pretrained(model_path, trust_remote_code=True) - output_tokens_0 = [] - output_ids_0 = [] - for ids in response_0.choices[0].completion_token_ids: - output_tokens_0.append(tokenizer.decode(ids)) - output_ids_0.append(ids) - - # add bad words - bad_tokens = output_tokens_0[6:10] - bad_token_ids = output_ids_0[6:10] - response_1 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words": bad_tokens, "return_token_ids": True}, - stream=False, - ) - assert hasattr(response_1, "choices") - assert len(response_1.choices) > 0 - assert hasattr(response_1.choices[0], "completion_token_ids") - assert isinstance(response_1.choices[0].completion_token_ids, list) - - response_2 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words_token_ids": bad_token_ids, "return_token_ids": True}, - stream=False, - ) - assert hasattr(response_2, "choices") - assert len(response_2.choices) > 0 - assert hasattr(response_2.choices[0], "completion_token_ids") - assert isinstance(response_2.choices[0].completion_token_ids, list) - - assert not any(ids in response_1.choices[0].completion_token_ids for ids in bad_token_ids) - assert not any(ids in response_2.choices[0].completion_token_ids for ids in bad_token_ids) - - -def test_streaming_completion_with_bad_words(openai_client, capsys): - """ - Test bad_words option in streaming completion functionality with the local service - """ - response_0 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - stream=True, - extra_body={"return_token_ids": True}, - ) - output_tokens_0 = [] - output_ids_0 = [] - is_first_chunk = True - for chunk in response_0: - if is_first_chunk: - is_first_chunk = False - else: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "text") - assert hasattr(chunk.choices[0], "completion_token_ids") - output_tokens_0.append(chunk.choices[0].text) - output_ids_0.extend(chunk.choices[0].completion_token_ids) - - # add bad words - bad_token_ids = output_ids_0[6:10] - bad_tokens = output_tokens_0[6:10] - response_1 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words": bad_tokens, "return_token_ids": True}, - stream=True, - ) - output_tokens_1 = [] - output_ids_1 = [] - is_first_chunk = True - for chunk in response_1: - if is_first_chunk: - is_first_chunk = False - else: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "text") - assert hasattr(chunk.choices[0], "completion_token_ids") - output_tokens_1.append(chunk.choices[0].text) - output_ids_1.extend(chunk.choices[0].completion_token_ids) - # add bad words token ids - response_2 = openai_client.completions.create( - model="default", - prompt="Hello, how are you?", - temperature=1, - top_p=0.0, - max_tokens=20, - extra_body={"bad_words_token_ids": bad_token_ids, "return_token_ids": True}, - stream=True, - ) - output_tokens_2 = [] - output_ids_2 = [] - is_first_chunk = True - for chunk in response_2: - if is_first_chunk: - is_first_chunk = False - else: - assert hasattr(chunk, "choices") - assert len(chunk.choices) > 0 - assert hasattr(chunk.choices[0], "text") - assert hasattr(chunk.choices[0], "completion_token_ids") - output_tokens_2.append(chunk.choices[0].text) - output_ids_2.extend(chunk.choices[0].completion_token_ids) - - assert not any(ids in output_ids_1 for ids in bad_token_ids) - assert not any(ids in output_ids_2 for ids in bad_token_ids) - - -def test_profile_reset_block_num(): - """测试profile reset_block_num功能,与baseline diff不能超过5%""" - log_file = "./log/config.log" - baseline = 31446 - - if not os.path.exists(log_file): - pytest.fail(f"Log file not found: {log_file}") - - with open(log_file, "r") as f: - log_lines = f.readlines() - - target_line = None - for line in log_lines: - if "Reset block num" in line: - target_line = line.strip() - break - - if target_line is None: - pytest.fail("日志中没有Reset block num信息") - - match = re.search(r"total_block_num:(\d+)", target_line) - if not match: - pytest.fail(f"Failed to extract total_block_num from line: {target_line}") - - try: - actual_value = int(match.group(1)) - except ValueError: - pytest.fail(f"Invalid number format: {match.group(1)}") - - lower_bound = baseline * (1 - 0.05) - upper_bound = baseline * (1 + 0.05) - print(f"Reset total_block_num: {actual_value}. baseline: {baseline}") - - assert lower_bound <= actual_value <= upper_bound, ( - f"Reset total_block_num {actual_value} 与 baseline {baseline} diff需要在5%以内" - f"Allowed range: [{lower_bound:.1f}, {upper_bound:.1f}]" - ) - - def streaming_chat_base(openai_client, chat_param): """ Test streaming chat base functionality with the local service @@ -1161,6 +213,9 @@ def non_streaming_chat_base(openai_client, chat_param): return response.choices[0].message.content +# ========================== +# Structured outputs tests +# ========================== @pytest.mark.skip(reason="Temporarily skip this case due to unstable execution") def test_structured_outputs_json_schema(openai_client): """ @@ -1396,8 +451,6 @@ def test_structured_outputs_regex(openai_client): "extra_body": {"guided_regex": r"^https:\/\/www\.[a-zA-Z]+\.com\/?$\n"}, } - import re - response = streaming_chat_base(openai_client, regex_param) assert re.fullmatch( r"^https:\/\/www\.[a-zA-Z]+\.com\/?$\n", response @@ -1436,6 +489,7 @@ def test_structured_outputs_grammar(openai_client): grammar_param = { "temperature": 1, + "top_p": 0.0, "max_tokens": 1024, "messages": [ { @@ -1446,8 +500,6 @@ def test_structured_outputs_grammar(openai_client): "extra_body": {"guided_grammar": html_h1_grammar}, } - import re - pattern = r'^