Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
21b2926
update use of CUDA images (#21062)
Borda Aug 13, 2025
ad366e8
Clarification of docs on schedulers (#21061)
SkafteNicki Aug 13, 2025
1d9449d
Adding test for legacy checkpoint created with 2.5.3 (#21066)
pl-ghost Aug 13, 2025
39d1a9e
let `_get_default_process_group_backend_for_device` support more har…
taozhiwei Aug 15, 2025
524a3e7
Fix LR not being correctly set after using `LearningRateFinder` callb…
littlebullGit Aug 15, 2025
79beae9
add GPU tests with minimal req. to be required (#21075)
Borda Aug 15, 2025
f47d409
Couple of `Trainer.fit` typos (#21080)
matsumotosan Aug 18, 2025
89db2e6
build(deps): bump actions/checkout from 4 to 5 (#21091)
dependabot[bot] Aug 18, 2025
b9e3ae3
build(deps): bump coverage from 7.10.3 to 7.10.4 in /requirements (#2…
dependabot[bot] Aug 18, 2025
520ff60
build(deps): update matplotlib requirement from <3.10.0,>3.1 to >3.1,…
dependabot[bot] Aug 18, 2025
91ca390
build(deps): update ipython[notebook] requirement from <8.19.0 to <9.…
dependabot[bot] Aug 18, 2025
9fbea49
build(deps): update onnxruntime requirement from <1.21.0,>=1.12.0 to …
dependabot[bot] Aug 18, 2025
fdd4798
build(deps): bump torch from 2.7.1 to 2.8.0 in /requirements (#21085)
dependabot[bot] Aug 18, 2025
543d5cd
build(deps): update scikit-learn requirement from <1.7.0,>0.22.1 to >…
dependabot[bot] Aug 18, 2025
743297f
debug failing tests for Fabric with `ddp_fork` on PT 2.8 -> revert #2…
Borda Aug 18, 2025
d9f918d
build(deps): bump click from 8.1.8 to 8.2.1 in /requirements (#21088)
dependabot[bot] Aug 18, 2025
e9e3aad
Fix: AsyncCheckpointIO snapshots tensors to avoid race with parameter…
littlebullGit Aug 18, 2025
e7b4945
switch to lightning_utilities.cli requirements set-oldest (#21077)
Borda Aug 19, 2025
5ac8273
bump: try `deepspeed >=0.14.1,<=0.15.0` (#21076)
Borda Aug 19, 2025
b468885
Make asyncio checkpointing work if validate/fit is called more than o…
jjh42 Aug 19, 2025
601a3ff
docs: add note on TorchMetrics integration for logging best practices…
bhimrazy Aug 21, 2025
088e1a5
Fix: TorchMetrics documentation source link (#21104)
bhimrazy Aug 21, 2025
da37ff2
fix mis-alignment column while using rich model summary in `DeepSpeed…
GdoongMathew Aug 21, 2025
3b0eaa8
ci: pin also test requirements for minimal setup (#21102)
Borda Aug 21, 2025
c110661
Fix rich progress bar crashing on empty val dataloader sanity checkin…
SkafteNicki Aug 22, 2025
d640c89
docs: fix `log_metrics` step description (#21109)
clumsy Aug 23, 2025
20f0cc8
build(deps): update onnxscript requirement from <0.4.0,>=0.2.2 to >=0…
dependabot[bot] Aug 25, 2025
94a0e1f
build(deps): bump coverage from 7.10.4 to 7.10.5 in /requirements (#2…
dependabot[bot] Aug 25, 2025
9a56df8
build(deps): update myst-parser requirement from <4.0.0,>=0.18.1 to >…
dependabot[bot] Aug 25, 2025
ec253dc
docs: replace broken link to Torch 2.x (#21121)
Borda Aug 27, 2025
6d1d601
Update versioning governance document (#21107)
speediedan Aug 27, 2025
faaa3f5
Update throughput table to include H200 stats (#21119)
SkafteNicki Aug 27, 2025
fae5ca7
releasing `2.5.4`
Borda Aug 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 0 additions & 41 deletions .actions/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,47 +341,6 @@ def create_mirror_package(source_dir: str, package_mapping: dict[str, str]) -> N


class AssistantCLI:
@staticmethod
def requirements_prune_pkgs(packages: Sequence[str], req_files: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
"""Remove some packages from given requirement files."""
if isinstance(req_files, str):
req_files = [req_files]
for req in req_files:
AssistantCLI._prune_packages(req, packages)

@staticmethod
def _prune_packages(req_file: str, packages: Sequence[str]) -> None:
"""Remove some packages from given requirement files."""
path = Path(req_file)
assert path.exists()
text = path.read_text()
lines = text.splitlines()
final = []
for line in lines:
ln_ = line.strip()
if not ln_ or ln_.startswith("#"):
final.append(line)
continue
req = list(_parse_requirements([ln_]))[0]
if req.name not in packages:
final.append(line)
print(final)
path.write_text("\n".join(final) + "\n")

@staticmethod
def _replace_min(fname: str) -> None:
with open(fname, encoding="utf-8") as fopen:
req = fopen.read().replace(">=", "==")
with open(fname, "w", encoding="utf-8") as fwrite:
fwrite.write(req)

@staticmethod
def replace_oldest_ver(requirement_fnames: Sequence[str] = REQUIREMENT_FILES_ALL) -> None:
"""Replace the min package version by fixed one."""
for fname in requirement_fnames:
print(fname)
AssistantCLI._replace_min(fname)

@staticmethod
def copy_replace_imports(
source_dir: str,
Expand Down
2 changes: 1 addition & 1 deletion .azure/gpu-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.5-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
options: "--gpus=all --shm-size=32g"
strategy:
matrix:
Expand Down
18 changes: 13 additions & 5 deletions .azure/gpu-tests-fabric.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ jobs:
strategy:
matrix:
"Fabric | oldest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
PACKAGE_NAME: "fabric"
"Fabric | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
PACKAGE_NAME: "fabric"
#"Fabric | future":
# image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
# image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
# PACKAGE_NAME: "fabric"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
PACKAGE_NAME: "lightning"
workspace:
clean: all
Expand Down Expand Up @@ -99,8 +99,16 @@ jobs:
displayName: "Image info & NVIDIA"

- bash: |
python .actions/assistant.py replace_oldest_ver
set -ex
pip install "cython<3.0" wheel # for compatibility
pip install -U "lightning-utilities[cli]"
cd requirements/fabric
# replace range by pin minimal requirements
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'strategies.txt']"
# drop deepspeed since it is not supported by our minimal Torch requirements
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
# uninstall deepspeed since some older docker images have it pre-installed
pip uninstall -y deepspeed
condition: contains(variables['Agent.JobName'], 'oldest')
displayName: "setting oldest dependencies"

Expand Down
18 changes: 13 additions & 5 deletions .azure/gpu-tests-pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,16 @@ jobs:
strategy:
matrix:
"PyTorch | oldest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.1"
image: "pytorchlightning/pytorch_lightning:base-cuda12.1.1-py3.10-torch2.1"
PACKAGE_NAME: "pytorch"
"PyTorch | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
PACKAGE_NAME: "pytorch"
#"PyTorch | future":
# image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.7-cuda12.6.3"
# image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.7"
# PACKAGE_NAME: "pytorch"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.8-cuda12.6.3"
image: "pytorchlightning/pytorch_lightning:base-cuda12.6.3-py3.12-torch2.8"
PACKAGE_NAME: "lightning"
pool: lit-rtx-3090
variables:
Expand Down Expand Up @@ -103,8 +103,16 @@ jobs:
displayName: "Image info & NVIDIA"

- bash: |
python .actions/assistant.py replace_oldest_ver
set -ex
pip install "cython<3.0" wheel # for compatibility
pip install -U "lightning-utilities[cli]"
cd requirements/pytorch
# replace range by pin minimal requirements
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'extra.txt', 'strategies.txt', 'examples.txt']"
# drop deepspeed since it is not supported by our minimal Torch requirements
python -m lightning_utilities.cli requirements prune-pkgs --packages deepspeed --req_files strategies.txt
# uninstall deepspeed since some older docker images have it pre-installed
pip uninstall -y deepspeed
condition: contains(variables['Agent.JobName'], 'oldest')
displayName: "setting oldest dependencies"

Expand Down
2 changes: 2 additions & 0 deletions .github/checkgroup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ subprojects:
- "!**/*.md"
checks:
- "pytorch-lightning (GPUs) (testing Lightning | latest)"
- "pytorch-lightning (GPUs) (testing PyTorch | oldest)"
- "pytorch-lightning (GPUs) (testing PyTorch | latest)"

- id: "pytorch_lightning: Benchmarks"
Expand Down Expand Up @@ -174,6 +175,7 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "lightning-fabric (GPUs) (testing Fabric | oldest)"
- "lightning-fabric (GPUs) (testing Fabric | latest)"
- "lightning-fabric (GPUs) (testing Lightning | latest)"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_build-packages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
matrix:
pkg-name: ${{ fromJSON(inputs.pkg-names) }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: "3.x"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/_legacy-checkpoints.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
outputs:
pl-version: ${{ steps.decide-version.outputs.pl-version }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- uses: actions/setup-python@v5
with:
Expand Down Expand Up @@ -135,7 +135,7 @@ jobs:
env:
PL_VERSION: ${{ needs.create-legacy-ckpts.outputs.pl-version }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
ref: master

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-pkg-install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
pkg-name: ["fabric", "pytorch", "lightning", "notset"]
python-version: ["3.9", "3.11"]
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/ci-tests-fabric.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
# TODO: Remove this - Enable running MPS tests on this platform
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
Expand All @@ -94,7 +94,9 @@ jobs:
- name: Set min. dependencies
if: ${{ matrix.requires == 'oldest' }}
run: |
python .actions/assistant.py replace_oldest_ver
cd requirements/fabric
pip install -U "lightning-utilities[cli]"
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'strategies.txt', 'test.txt']"
pip install "cython<3.0" wheel
pip install "pyyaml==5.4" --no-build-isolation

Expand Down Expand Up @@ -140,7 +142,8 @@ jobs:
run: |
pip install -e ".[${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
-U --upgrade-strategy=eager --prefer-binary \
--extra-index-url="${TORCH_URL}" --find-links="${PYPI_CACHE_DIR}"
--extra-index-url="${TORCH_URL}" \
--find-links="${PYPI_CACHE_DIR}"
pip list
- name: Dump handy wheels
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/ci-tests-pytorch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ jobs:
# TODO: Remove this - Enable running MPS tests on this platform
DISABLE_MPS: ${{ matrix.os == 'macOS-14' && '1' || '0' }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
Expand All @@ -99,7 +99,9 @@ jobs:
- name: Set min. dependencies
if: ${{ matrix.requires == 'oldest' }}
run: |
python .actions/assistant.py replace_oldest_ver
cd requirements/pytorch
pip install -U "lightning-utilities[cli]"
python -m lightning_utilities.cli requirements set-oldest --req_files "['base.txt', 'extra.txt', 'strategies.txt', 'examples.txt', 'test.txt']"
pip install "cython<3.0" wheel
pip install "pyyaml==5.4" --no-build-isolation

Expand Down Expand Up @@ -139,7 +141,8 @@ jobs:
pip install ".[${EXTRA_PREFIX}extra,${EXTRA_PREFIX}test,${EXTRA_PREFIX}strategies]" \
-U --upgrade-strategy=eager --prefer-binary \
-r requirements/_integrations/accelerators.txt \
--extra-index-url="${TORCH_URL}" --find-links="${PYPI_CACHE_DIR}"
--extra-index-url="${TORCH_URL}" \
--find-links="${PYPI_CACHE_DIR}"
pip list
- name: Drop LAI from extensions
if: ${{ matrix.pkg-name != 'lightning' }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/cleanup-caches.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v4
uses: actions/checkout@v5

- name: Cleanup
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
mypy:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: "3.11"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
- { python_version: "3.12", pytorch_version: "2.7", cuda_version: "12.6.3" }
- { python_version: "3.12", pytorch_version: "2.8", cuda_version: "12.6.3", latest: "true" }
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
submodules: true
- uses: docker/setup-buildx-action@v3
Expand Down Expand Up @@ -112,7 +112,7 @@ jobs:
- { python_version: "3.12", pytorch_version: "2.7.1", cuda_version: "12.6.3" }
- { python_version: "3.12", pytorch_version: "2.8.0", cuda_version: "12.6.3" }
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI'
Expand Down Expand Up @@ -154,7 +154,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
uses: actions/checkout@v5
- name: Build Conda Docker
# publish master/release
continue-on-error: true
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ jobs:
PIN_RELEASE_VERSIONS: 1
ARTIFACT_DAYS: 0
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
ref: ${{ inputs.checkout }}
# only Pytorch has/uses notebooks
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs-tutorials.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
docs-update:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
submodules: true
fetch-depth: 0
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/labeler-issue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
contents: read

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5

- name: Parse issue form
uses: stefanbuck/github-issue-parser@v3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
env:
PKG_NAME: "lightning"
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: 3.9
Expand Down Expand Up @@ -53,7 +53,7 @@ jobs:
env:
PKG_NAME: "lightning"
steps:
- uses: actions/checkout@v4 # needed to use local composite action
- uses: actions/checkout@v5 # needed to use local composite action
- uses: actions/download-artifact@v5
with:
name: nightly-packages-${{ github.sha }}
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/release-pkg.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
needs: build-packages
if: github.event_name == 'release'
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/download-artifact@v5
with:
name: dist-packages-${{ github.sha }}
Expand All @@ -54,7 +54,7 @@ jobs:
outputs:
tag: ${{ steps.lai-package.outputs.version }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VER }}
Expand All @@ -74,7 +74,7 @@ jobs:
TAG: ${{ needs.release-version.outputs.tag }}
BRANCH_NAME: "trigger/lightning-${{ needs.release-version.outputs.tag }}"
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v5
with:
repository: gridai/base-images
token: ${{ secrets.PAT_GHOST }}
Expand Down Expand Up @@ -139,7 +139,7 @@ jobs:
matrix:
name: ["FABRIC", "PYTORCH", "LIGHTNING"]
steps:
- uses: actions/checkout@v4 # needed for local action below
- uses: actions/checkout@v5 # needed for local action below
- uses: actions/download-artifact@v5
with:
name: dist-packages-${{ github.sha }}
Expand All @@ -164,7 +164,7 @@ jobs:
matrix:
name: ["FABRIC", "PYTORCH", "LIGHTNING"]
steps:
- uses: actions/checkout@v4 # needed for local action below
- uses: actions/checkout@v5 # needed for local action below
- uses: actions/download-artifact@v5
with:
name: dist-packages-${{ github.sha }}
Expand Down
Loading
Loading