Skip to content

Fix DeepCompile for PyTorch 2.8/2.9 compatibility (#7755) #19

Fix DeepCompile for PyTorch 2.8/2.9 compatibility (#7755)

Fix DeepCompile for PyTorch 2.8/2.9 compatibility (#7755) #19

################################################################################
# DeepSpeed CI - AWS L40S GPU Tests (PyTorch Latest)
#
# Runs the same tests as modal-torch-latest.yml but on AWS self-hosted runners.
# Uses 4x NVIDIA L40S GPUs on g6e.12xlarge instances.
################################################################################
name: aws-torch-latest
on:
workflow_dispatch:
push:
branches:
- master
pull_request:
paths-ignore:
- 'docs/**'
- 'blogs/**'
- 'deepspeed/inference/v2/**'
- 'tests/unit/inference/v2/**'
branches:
- master
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
unit-tests:
name: Unit Tests (V1)
runs-on: [self-hosted, gpu-ci, gpu-l40s, l40s-4gpu, aws]
container:
image: nvidia/cuda:12.6.3-devel-ubuntu22.04
options: --gpus all --shm-size "32G"
env:
TORCH_VER: "2.7"
CUDA_VER: "12.6"
steps:
- name: Install system dependencies
run: |
apt-get update && apt-get install -y git git-lfs libaio-dev python3 python3-pip
git lfs install
ln -sf /usr/bin/python3 /usr/bin/python
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true
- name: Install PyTorch
run: |
pip install torch==2.7.1 torchvision==0.22.1 torchaudio==2.7.1 --index-url https://download.pytorch.org/whl/cu126
- name: Install Python dependencies
run: |
pip install --upgrade pip
pip install -r requirements/requirements.txt
pip install -r requirements/requirements-dev.txt
pip install -r requirements/requirements-deepcompile.txt
- name: Check environment
run: |
echo "=== GPU Information ==="
nvidia-smi
echo ""
echo "=== CUDA Version ==="
nvcc --version
echo ""
echo "=== Python/PyTorch Info ==="
python --version
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
python -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
python -c "import torch; print(f'CUDA devices: {torch.cuda.device_count()}')"
python -c "import torch; print(f'BF16 support: {torch.cuda.is_bf16_supported()}')"
- name: Install DeepSpeed
run: |
# Initialize CUDA before install so setup.py can detect NCCL version
python -c "import torch; torch.cuda.init(); print(f'NCCL version: {torch.cuda.nccl.version()}')"
# Use --no-build-isolation so setup.py can access pre-installed PyTorch
pip install --no-build-isolation .
ds_report
# Debug: Check captured torch_info values
python -c "from deepspeed.git_version_info import torch_info; print(f'torch_info: {torch_info}')"
- name: Run unit tests
run: |
pytest -n 4 --forked --verbose tests/unit/v1/ --torch_ver=${{ env.TORCH_VER }} --cuda_ver=${{ env.CUDA_VER }}