From c56ce10de7f55bdeec7f25519c477267c759c5df Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Sat, 11 Apr 2026 11:25:29 +0800 Subject: [PATCH 1/2] [CI] Fix nightly test error and add container cleanup in build_rl --- .github/workflows/_accuracy_test.yml | 4 +- .github/workflows/_build_linux_rl.yml | 12 +- .github/workflows/pr_build_and_test.yml | 152 ++++++++++++++---------- 3 files changed, 101 insertions(+), 67 deletions(-) diff --git a/.github/workflows/_accuracy_test.yml b/.github/workflows/_accuracy_test.yml index c62a522e98d..87994625c58 100644 --- a/.github/workflows/_accuracy_test.yml +++ b/.github/workflows/_accuracy_test.yml @@ -161,7 +161,9 @@ jobs: fi docker run --rm --net=host \ - --shm-size=64G \ + --shm-size=64g \ + --sysctl kernel.msgmax=1048576 \ + --sysctl kernel.msgmnb=268435456 \ --name ${runner_name} \ -v $(pwd):/workspace \ -w /workspace \ diff --git a/.github/workflows/_build_linux_rl.yml b/.github/workflows/_build_linux_rl.yml index dacf374133f..1a131adb1a1 100644 --- a/.github/workflows/_build_linux_rl.yml +++ b/.github/workflows/_build_linux_rl.yml @@ -52,6 +52,7 @@ on: wheel_path_rl: description: "Output path of the generated wheel" value: ${{ jobs.fd-build-rl.outputs.wheel_path_rl }} + jobs: fd-build-rl: runs-on: [self-hosted, GPU-Build-RL] @@ -107,6 +108,7 @@ jobs: git config --global user.name "FastDeployCI" git config --global user.email "fastdeploy_ci@example.com" git log -n 3 --oneline + - name: FastDeploy Build shell: bash env: @@ -137,7 +139,8 @@ jobs: PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host \ - --cap-add=SYS_PTRACE --privileged --shm-size=64G \ + --cap-add=SYS_PTRACE --shm-size=64G \ + --name ${runner_name} \ -v $(pwd):/workspace -w /workspace \ -v "${CACHE_DIR}/gitconfig:/etc/gitconfig:ro" \ -v "${CACHE_DIR}/.cache_rl:/root/.cache" \ @@ -203,3 +206,10 @@ jobs: target_path_stripped="${target_path#paddle-github-action/}" WHEEL_PATH=https://paddle-github-action.bj.bcebos.com/${target_path_stripped}/${fd_wheel_name} echo "wheel_path_rl=${WHEEL_PATH}" >> $GITHUB_OUTPUT + + - name: Terminate and delete the container + if: always() + run: | + set +e + docker exec -t ${{ runner.name }} /bin/bash -c 'find /workspace -mindepth 1 -delete' + docker rm -f ${{ runner.name }} diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 9ffcd75ee5c..709583dc85e 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -29,6 +29,18 @@ jobs: secrets: github-token: ${{ secrets.GITHUB_TOKEN }} + build_sm8090_rl: + name: BUILD_SM8090_RL + needs: clone + uses: ./.github/workflows/_build_linux_rl.yml + with: + DOCKER_IMAGE: iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1 + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + COMPILE_ARCH: "80,90" + WITH_NIGHTLY_BUILD: OFF + FD_VERSION: 0.0.0 + PADDLE_WHL_URL: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-TagBuild-Training-Linux-Gpu-Cuda12.9-Cudnn9.9-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl + resultshow: name: Use Build Output needs: build @@ -37,75 +49,85 @@ jobs: - name: Print wheel path run: | echo "The built wheel is located at: ${{ needs.build.outputs.wheel_path }}" +# +# unittest_coverage: +# name: Run FastDeploy Unit Tests and Coverage +# needs: [clone,build] +# uses: ./.github/workflows/_unit_test_coverage.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev +# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} +# +# four_cards_test: +# name: Run Four Cards Tests +# needs: [clone,build] +# uses: ./.github/workflows/_gpu_4cards_case_test.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev +# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} +# +# logprob_test: +# name: Run FastDeploy LogProb Tests +# needs: [build] +# uses: ./.github/workflows/_logprob_test_linux.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev +# PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz" +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} +# +# pre_ce_test: +# name: Extracted partial CE model tasks to run in CI. +# needs: [clone,build] +# uses: ./.github/workflows/_pre_ce_test.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate +# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} +# +# base_test: +# name: Run Base Tests +# needs: [clone,build] +# uses: ./.github/workflows/_base_test.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev +# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} +# +# stable_test: +# name: Run Stable Tests +# needs: [clone,build] +# uses: ./.github/workflows/_stable_test.yml +# with: +# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate +# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} +# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} +# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" +# secrets: +# github-token: ${{ secrets.GITHUB_TOKEN }} - unittest_coverage: - name: Run FastDeploy Unit Tests and Coverage - needs: [clone,build] - uses: ./.github/workflows/_unit_test_coverage.yml - with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev - FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - four_cards_test: - name: Run Four Cards Tests + accuracy_test: + name: Run Accuracy Tests needs: [clone,build] - uses: ./.github/workflows/_gpu_4cards_case_test.yml - with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev - FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - logprob_test: - name: Run FastDeploy LogProb Tests - needs: [build] - uses: ./.github/workflows/_logprob_test_linux.yml - with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev - PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz" - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - pre_ce_test: - name: Extracted partial CE model tasks to run in CI. - needs: [clone,build] - uses: ./.github/workflows/_pre_ce_test.yml - with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate - FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - base_test: - name: Run Base Tests - needs: [clone,build] - uses: ./.github/workflows/_base_test.yml - with: - DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev - FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} - MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} - - stable_test: - name: Run Stable Tests - needs: [clone,build] - uses: ./.github/workflows/_stable_test.yml + uses: ./.github/workflows/_accuracy_test.yml with: DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" - secrets: - github-token: ${{ secrets.GITHUB_TOKEN }} From 684ed924599409050102eafb44c5d16139fc0d50 Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Sat, 11 Apr 2026 11:57:55 +0800 Subject: [PATCH 2/2] remove debug code --- .github/workflows/pr_build_and_test.yml | 152 ++++++++++-------------- 1 file changed, 65 insertions(+), 87 deletions(-) diff --git a/.github/workflows/pr_build_and_test.yml b/.github/workflows/pr_build_and_test.yml index 709583dc85e..9ffcd75ee5c 100644 --- a/.github/workflows/pr_build_and_test.yml +++ b/.github/workflows/pr_build_and_test.yml @@ -29,18 +29,6 @@ jobs: secrets: github-token: ${{ secrets.GITHUB_TOKEN }} - build_sm8090_rl: - name: BUILD_SM8090_RL - needs: clone - uses: ./.github/workflows/_build_linux_rl.yml - with: - DOCKER_IMAGE: iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1 - FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} - COMPILE_ARCH: "80,90" - WITH_NIGHTLY_BUILD: OFF - FD_VERSION: 0.0.0 - PADDLE_WHL_URL: https://paddle-qa.bj.bcebos.com/paddle-pipeline/Develop-TagBuild-Training-Linux-Gpu-Cuda12.9-Cudnn9.9-Trt10.5-Mkl-Avx-Gcc11-SelfBuiltPypiUse/latest/paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl - resultshow: name: Use Build Output needs: build @@ -49,85 +37,75 @@ jobs: - name: Print wheel path run: | echo "The built wheel is located at: ${{ needs.build.outputs.wheel_path }}" -# -# unittest_coverage: -# name: Run FastDeploy Unit Tests and Coverage -# needs: [clone,build] -# uses: ./.github/workflows/_unit_test_coverage.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev -# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} -# -# four_cards_test: -# name: Run Four Cards Tests -# needs: [clone,build] -# uses: ./.github/workflows/_gpu_4cards_case_test.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev -# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} -# -# logprob_test: -# name: Run FastDeploy LogProb Tests -# needs: [build] -# uses: ./.github/workflows/_logprob_test_linux.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev -# PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz" -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} -# -# pre_ce_test: -# name: Extracted partial CE model tasks to run in CI. -# needs: [clone,build] -# uses: ./.github/workflows/_pre_ce_test.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate -# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} -# -# base_test: -# name: Run Base Tests -# needs: [clone,build] -# uses: ./.github/workflows/_base_test.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev -# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} -# -# stable_test: -# name: Run Stable Tests -# needs: [clone,build] -# uses: ./.github/workflows/_stable_test.yml -# with: -# DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate -# FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} -# FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} -# MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" -# secrets: -# github-token: ${{ secrets.GITHUB_TOKEN }} - accuracy_test: - name: Run Accuracy Tests + unittest_coverage: + name: Run FastDeploy Unit Tests and Coverage + needs: [clone,build] + uses: ./.github/workflows/_unit_test_coverage.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + four_cards_test: + name: Run Four Cards Tests needs: [clone,build] - uses: ./.github/workflows/_accuracy_test.yml + uses: ./.github/workflows/_gpu_4cards_case_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + logprob_test: + name: Run FastDeploy LogProb Tests + needs: [build] + uses: ./.github/workflows/_logprob_test_linux.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev + PADDLETEST_ARCHIVE_URL: "https://xly-devops.bj.bcebos.com/PaddleTest/PaddleTest.tar.gz" + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + pre_ce_test: + name: Extracted partial CE model tasks to run in CI. + needs: [clone,build] + uses: ./.github/workflows/_pre_ce_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + base_test: + name: Run Base Tests + needs: [clone,build] + uses: ./.github/workflows/_base_test.yml + with: + DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-paddle-dev + FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} + FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} + MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + + stable_test: + name: Run Stable Tests + needs: [clone,build] + uses: ./.github/workflows/_stable_test.yml with: DOCKER_IMAGE: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddleqa:fastdeploy-ciuse-cuda126-dailyupdate FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }} FASTDEPLOY_WHEEL_URL: ${{ needs.build.outputs.wheel_path }} MODEL_CACHE_DIR: "/ssd2/actions-runner/ModelData" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }}