Skip to content

Commit 6fa9db8

Browse files
committed
Add the pytorch-mnist with GPU support container image
1 parent 8dcc7d3 commit 6fa9db8

File tree

15 files changed

+61
-25
lines changed

15 files changed

+61
-25
lines changed

.github/workflows/publish-trial-images.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ jobs:
3131
include:
3232
- trial-name: mxnet-mnist
3333
dockerfile: examples/v1beta1/trial-images/mxnet-mnist/Dockerfile
34-
- trial-name: pytorch-mnist
35-
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile
34+
- trial-name: pytorch-mnist-cpu
35+
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu
36+
- trial-name: pytorch-mnist-gpu
37+
dockerfile: examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu
3638
- trial-name: tf-mnist-with-summaries
3739
dockerfile: examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile
3840
- trial-name: enas-cnn-cifar10-gpu

.github/workflows/pytorch-mnist-e2e-test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
experiments: ${{ matrix.experiments }}
2525
training-operator: true
2626
# Comma Delimited
27-
trial-images: pytorch-mnist
27+
trial-images: pytorch-mnist-cpu
2828

2929
strategy:
3030
fail-fast: false

docs/images-location.md

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,13 +273,24 @@ The following table shows images for training containers which are used in the
273273
</tr>
274274
<tr align="center">
275275
<td>
276-
<code>docker.io/kubeflowkatib/pytorch-mnist</code>
276+
<code>docker.io/kubeflowkatib/pytorch-mnist-cpu</code>
277277
</td>
278278
<td>
279-
PyTorch MNIST example with printing metrics to the file or StdOut
279+
PyTorch MNIST example with printing metrics to the file or StdOut with CPU support
280280
</td>
281281
<td>
282-
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile">Dockerfile</a>
282+
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.cpu">Dockerfile</a>
283+
</td>
284+
</tr>
285+
<tr align="center">
286+
<td>
287+
<code>docker.io/kubeflowkatib/pytorch-mnist-gpu</code>
288+
</td>
289+
<td>
290+
PyTorch MNIST example with printing metrics to the file or StdOut with GPU support
291+
</td>
292+
<td>
293+
<a href="https://github.com/kubeflow/katib/blob/master/examples/v1beta1/trial-images/pytorch-mnist/Dockerfile.gpu">Dockerfile</a>
283294
</td>
284295
</tr>
285296
<tr align="center">

examples/v1beta1/early-stopping/median-stop-with-json-format.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ spec:
6262
spec:
6363
containers:
6464
- name: training-container
65-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
65+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
6666
command:
6767
- "python3"
6868
- "/opt/pytorch-mnist/mnist.py"

examples/v1beta1/kubeflow-training-operator/pytorchjob-mnist.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ spec:
4646
spec:
4747
containers:
4848
- name: pytorch
49-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
49+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
5050
command:
5151
- "python3"
5252
- "/opt/pytorch-mnist/mnist.py"
@@ -61,7 +61,7 @@ spec:
6161
spec:
6262
containers:
6363
- name: pytorch
64-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
64+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
6565
command:
6666
- "python3"
6767
- "/opt/pytorch-mnist/mnist.py"

examples/v1beta1/metrics-collector/custom-metrics-collector.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ spec:
6767
spec:
6868
containers:
6969
- name: training-container
70-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
70+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
7171
command:
7272
- "python3"
7373
- "/opt/pytorch-mnist/mnist.py"

examples/v1beta1/metrics-collector/file-metrics-collector-with-json-format.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ spec:
5252
spec:
5353
containers:
5454
- name: training-container
55-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
55+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
5656
command:
5757
- "python3"
5858
- "/opt/pytorch-mnist/mnist.py"

examples/v1beta1/metrics-collector/file-metrics-collector.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ spec:
5454
spec:
5555
containers:
5656
- name: training-container
57-
image: docker.io/kubeflowkatib/pytorch-mnist:v0.14.0-rc.0
57+
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.14.0-rc.0
5858
command:
5959
- "python3"
6060
- "/opt/pytorch-mnist/mnist.py"
File renamed without changes.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
2+
3+
ADD examples/v1beta1/trial-images/pytorch-mnist /opt/pytorch-mnist
4+
WORKDIR /opt/pytorch-mnist
5+
6+
# Add folder for the logs.
7+
RUN mkdir /katib
8+
RUN pip install --no-cache-dir -r requirements.txt
9+
10+
RUN chgrp -R 0 /opt/pytorch-mnist \
11+
&& chmod -R g+rwX /opt/pytorch-mnist \
12+
&& chgrp -R 0 /katib \
13+
&& chmod -R g+rwX /katib
14+
15+
ENTRYPOINT ["python3", "/opt/pytorch-mnist/mnist.py"]

0 commit comments

Comments
 (0)