Skip to content

Commit cdcf523

Browse files
committed
[docker] bump neuron to 2.19 SDK
1 parent 04445f8 commit cdcf523

File tree

7 files changed

+21
-24
lines changed

7 files changed

+21
-24
lines changed

engines/python/setup/djl_python/neuron_utils/model_loader.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from transformers import AutoModelForCausalLM, GenerationConfig
2424
from transformers_neuronx import NeuronAutoModelForCausalLM
2525
from transformers_neuronx.config import NeuronConfig, QuantizationConfig, ContinuousBatchingConfig, GenerationConfig as NeuronGenerationConfig
26-
from djl_python.properties_manager.tnx_properties import TnXGenerationStrategy, TnXModelSchema
26+
from djl_python.properties_manager.tnx_properties import TnXGenerationStrategy, TnXModelSchema, TnXMemoryLayout
2727
from transformers_neuronx.module import save_pretrained_split
2828
from djl_python.neuron_utils.utils import NeuronXModelAdapter, get_neuronxcc_version
2929
from huggingface_hub import hf_hub_download
@@ -228,11 +228,12 @@ def get_model_specific_kwargs(self) -> dict:
228228
]
229229
elif self.config.context_length_estimate != [
230230
self.config.n_positions
231-
]:
231+
] and self.config.cache_layout == TnXMemoryLayout.LAYOUT_BSH:
232232
raise RuntimeError(
233233
f"context_length_estimate {self.config.context_length_estimate}"
234234
f" need to be the same as n_positions {self.config.n_positions}"
235-
f" You can also unset option.context_length_estimate to make continuous batching to work"
235+
f" when using alternative cache layouts,"
236+
f" you can always unset cache_layout to support multi bucketing w/ continuous batching."
236237
)
237238
return model_kwargs
238239

engines/python/setup/djl_python/transformers_neuronx_scheduler/optimum_modeling.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,8 +346,4 @@ def generate_tokens(
346346
if unfinished_sequences.max() == 0:
347347
break
348348

349-
# stop if we exceed the maximum length
350-
if selector.stopping_criteria(input_ids, None):
351-
break
352-
353349
return input_ids

engines/python/setup/djl_python/transformers_neuronx_scheduler/optimum_neuron_scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def _preprocess_prefill(self,
276276

277277
prefill_slots = []
278278
for request in new_requests:
279-
slot = empty_slots.pop()
279+
slot = empty_slots.pop(0)
280280
slot.assign(request, self.model.generation_config, self.tokenizer,
281281
self.acceptor)
282282
prefill_slots.append(slot)

engines/python/setup/djl_python/transformers_neuronx_scheduler/slot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ def accept_speculated_tokens(self, *args, **kwargs):
250250

251251
@property
252252
def stopped(self) -> bool:
253-
return self._selector.stopping_criteria(self._tokens, None)
253+
return self._selector.stopping_criteria(self._tokens.view(1,-1), None)
254254

255255
@property
256256
def tokens(self) -> torch.LongTensor:

engines/python/setup/djl_python/transformers_neuronx_scheduler/token_selector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def create(
161161
generation_config.pad_token_id = eos_token_id if isinstance(
162162
eos_token_id, int) else eos_token_id[0]
163163

164-
generation_mode = model._get_generation_mode(generation_config, None)
164+
generation_mode = generation_config.get_generation_mode()
165165
if generation_mode not in [
166166
GenerationMode.GREEDY_SEARCH, GenerationMode.SAMPLE
167167
]:

serving/docker/pytorch-inf2.Dockerfile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,21 @@
1010
# BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
1111
# the specific language governing permissions and limitations under the License.
1212
FROM ubuntu:22.04
13-
ARG djl_version=0.28.0~SNAPSHOT
13+
ARG djl_version=0.29.0~SNAPSHOT
1414
ARG torch_version=2.1.2
1515
ARG torchvision_version=0.16.2
1616
ARG python_version=3.10
17-
ARG neuronsdk_version=2.18.2
18-
ARG torch_neuronx_version=2.1.2.2.1.0
19-
ARG transformers_neuronx_version=0.10.0.360
20-
ARG neuronx_distributed_version=0.7.0
21-
ARG neuronx_cc_version=2.13.72.0
17+
ARG neuronsdk_version=2.19.0
18+
ARG torch_neuronx_version=2.1.2.2.2.0
19+
ARG transformers_neuronx_version=0.11.351
20+
ARG neuronx_distributed_version=0.8.0
21+
ARG neuronx_cc_version=2.14.213.0
2222
ARG protobuf_version=3.19.6
23-
ARG transformers_version=4.36.2
24-
ARG accelerate_version=0.23.0
25-
ARG diffusers_version=0.26.1
23+
ARG transformers_version=4.41.1
24+
ARG accelerate_version=0.29.2
25+
ARG diffusers_version=0.28.2
2626
ARG pydantic_version=2.6.1
27-
ARG optimum_neuron_version=0.0.22
27+
ARG optimum_neuron_version=0.0.23
2828
# %2B is the url escape for the '+' character
2929
ARG vllm_wheel="https://publish.djl.ai/neuron_vllm/vllm-0.4.2%2Bnightly-py3-none-any.whl"
3030
EXPOSE 8080

serving/docker/scripts/install_inferentia2.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" >/etc
1515
curl -L https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
1616

1717
# https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/releasecontent.html#inf2-packages
18-
apt-get update -y && apt-get install -y aws-neuronx-collectives=2.20.22.0* \
19-
aws-neuronx-runtime-lib=2.20.22.0* \
20-
aws-neuronx-tools=2.17.1.0
18+
apt-get update -y && apt-get install -y aws-neuronx-collectives=2.21.46.0* \
19+
aws-neuronx-runtime-lib=2.21.41.0* \
20+
aws-neuronx-tools=2.18.3.0
2121

2222
# TODO: Remove this hack after aws-neuronx-dkms install no longer throws an error, this bypasses the `set -ex`
2323
# exit criteria. The package is installed and functional after running, just throws an error on install.
24-
apt-get install -y aws-neuronx-dkms=2.16.7.0 || echo "Installed aws-neuronx-dkms with errors"
24+
apt-get install -y aws-neuronx-dkms=2.17.17.0 || echo "Installed aws-neuronx-dkms with errors"
2525

2626
export PATH=/opt/aws/neuron/bin:$PATH

0 commit comments

Comments
 (0)