[Question]: ERNIE-ViL 2.0微调报错

### 请提出你的问题

按照实例方式，微调报错
环境：ubuntu20.04  python3.9
aiohttp                  3.9.5
aiosignal                1.3.1
aistudio-sdk             0.2.4
annotated-types          0.7.0
anyio                    3.7.1
asgiref                  3.8.1
astor                    0.8.1
async-timeout            4.0.3
attrs                    23.2.0
Babel                    2.15.0
bce-python-sdk           0.9.17
blinker                  1.8.2
cachetools               6.0.0
certifi                  2024.8.30
chardet                  5.2.0
charset-normalizer       3.3.2
click                    8.1.7
colorama                 0.4.6
coloredlogs              15.0.1
colorlog                 6.8.2
contourpy                1.2.1
cssselect                1.3.0
cssutils                 2.11.1
cycler                   0.12.1
dataclasses-json         0.6.7
datasets                 2.20.0
decorator                5.1.1
dill                     0.3.4
distro                   1.9.0
dnspython                2.6.1
easydict                 1.13
einops                   0.8.1
email_validator          2.2.0
et_xmlfile               2.0.0
exceptiongroup           1.2.1
faiss-cpu                1.8.0.post1
fastapi                  0.111.0
fastapi-cli              0.0.4
fastdeploy-gpu-python    1.0.7
fastdeploy-tools         0.0.5
filelock                 3.15.4
Flask                    3.0.3
flask-babel              4.0.0
flatbuffers              24.3.25
fonttools                4.53.0
frozenlist               1.4.1
fsspec                   2024.5.0
ftfy                     6.3.1
future                   1.0.0
gast                     0.3.3
gevent                   24.2.1
GPUtil                   1.4.0
greenlet                 3.0.3
gunicorn                 22.0.0
h11                      0.14.0
hnswlib                  0.8.0
httpcore                 1.0.5
httptools                0.3.0
httpx                    0.27.0
huggingface-hub          0.23.4
humanfriendly            10.0
idna                     3.7
imagesize                1.4.1
importlib_metadata       8.0.0
importlib_resources      6.4.0
itsdangerous             2.2.0
jieba                    0.42.1
Jinja2                   3.1.4
jiter                    0.10.0
joblib                   1.4.2
jsonlines                4.0.0
jsonpatch                1.33
jsonpointer              3.0.0
kiwisolver               1.4.5
langchain                0.2.17
langchain-community      0.2.17
langchain-core           0.2.43
langchain-openai         0.1.25
langchain-text-splitters 0.2.4
langsmith                0.1.147
lxml                     5.4.0
markdown-it-py           3.0.0
MarkupSafe               2.1.5
marshmallow              3.26.1
matplotlib               3.9.0
mdurl                    0.1.2
more-itertools           10.7.0
mpmath                   1.3.0
multidict                6.0.5
multiprocess             0.70.12.2
mypy_extensions          1.1.0
numpy                    1.24.4
onnx                     1.16.1
onnxruntime              1.18.1
openai                   1.63.2
opencv-contrib-python    4.10.0.84
opencv-python            4.6.0.66
openpyxl                 3.1.5
opt-einsum               3.3.0
orjson                   3.10.6
packaging                24.1
paddle2onnx              1.3.1
paddleclas               2.6.0
paddlefsl                1.1.0
paddlenlp                2.8.1
paddleocr                3.0.0
paddlepaddle-gpu         2.6.1
paddleslim               2.6.0
paddlex                  3.0.0
pandas                   1.5.3
pillow                   10.4.0
pip                      24.2
premailer                3.10.0
prettytable              3.10.0
protobuf                 5.27.2
psutil                   6.0.0
py-cpuinfo               9.0.0
pyarrow                  16.1.0
pyarrow-hotfix           0.6
pybind11                 2.13.1
pyclipper                1.3.0.post6
pycryptodome             3.20.0
pydantic                 2.8.0
pydantic_core            2.20.0
Pygments                 2.18.0
pyparsing                3.1.2
pypdfium2                4.30.1
python-dateutil          2.9.0.post0
python-dotenv            1.0.1
python-multipart         0.0.9
pytz                     2024.1
PyYAML                   6.0.2
pyzmq                    26.2.0
rarfile                  4.2
regex                    2024.5.15
requests                 2.32.3
requests-toolbelt        1.0.0
rich                     13.7.1
ruamel.yaml              0.18.11
ruamel.yaml.clib         0.2.12
safetensors              0.4.3
scikit-learn             1.5.0
scipy                    1.13.1
sentencepiece            0.2.0
seqeval                  1.2.2
setuptools               75.1.0
shapely                  2.0.7
shellingham              1.5.4
six                      1.16.0
sniffio                  1.3.1
SQLAlchemy               2.0.41
starlette                0.37.2
swig                     4.3.0
sympy                    1.12.1
tenacity                 8.5.0
threadpoolctl            3.5.0
tiktoken                 0.9.0
timeout-decorator        0.5.0
tokenizers               0.19.1
tool-helpers             0.1.1
tqdm                     4.66.4
typer                    0.12.3
typing_extensions        4.12.2
typing-inspect           0.9.0
tzdata                   2024.1
ujson                    5.10.0
urllib3                  2.2.2
uvicorn                  0.16.0
uvloop                   0.19.0
visualdl                 2.5.3
watchfiles               0.22.0
watchgod                 0.8.2
wcwidth                  0.2.13
websockets               12.0
Werkzeug                 3.0.3
wheel                    0.44.0
xxhash                   3.4.1
yarl                     1.9.4
zipp                     3.19.2
zope.event               5.0
zope.interface           6.4.post2

DATAPATH=..//traindata

# data options
train_data=${DATAPATH}/arrow
val_data=${DATAPATH}/arrow
python run_finetune.py \
    --device gpu \
    --output_dir ./output/output_0731 \
    --model_name_or_path ../baseModel/ernie_vil-2.0-base-zh \
    --train_data=${train_data} \
    --val_data=${val_data} \
    --do_train \
    --learning_rate 5e-5 \
    --warmup_steps 100 \
    --logging_steps 50 \
    --per_device_train_batch_size 128 \
    --dataloader_num_workers 8 \
    --save_steps 50 \
    --num_train_epochs 5 \
    --weight_decay 0.001 \
    --save_total_limit 50 \
    --seed 1 \
    --label_names index \
    --data_root $DATAPATH \
    --lr_scheduler_type cosine \
    --recompute


报错信息如下：
Traceback (most recent call last):
  File "/appslog/homezwj/pycharmProject/clsLab/ERNIE_VIL/trainScript/run_finetune.py", line 117, in <module>
    do_train()
  File "/appslog/homezwj/pycharmProject/clsLab/ERNIE_VIL/trainScript/run_finetune.py", line 107, in do_train
    train_result = trainer.train(resume_from_checkpoint=checkpoint)
  File "/appslog/miniconda3/envs/pEnv39/lib/python3.9/site-packages/paddlenlp/trainer/trainer.py", line 668, in train
    train_dataloader = self.get_train_dataloader()
  File "/appslog/miniconda3/envs/pEnv39/lib/python3.9/site-packages/paddlenlp/trainer/trainer.py", line 1387, in get_train_dataloader
    train_sampler = self._get_train_sampler()
  File "/appslog/miniconda3/envs/pEnv39/lib/python3.9/site-packages/paddlenlp/trainer/trainer.py", line 1206, in _get_train_sampler
    return paddle.io.BatchSampler(
  File "/appslog/miniconda3/envs/pEnv39/lib/python3.9/site-packages/paddle/io/dataloader/batch_sampler.py", line 128, in __init__
    self.sampler = RandomSampler(dataset)
  File "/appslog/miniconda3/envs/pEnv39/lib/python3.9/site-packages/paddle/io/dataloader/sampler.py", line 223, in __init__
    raise ValueError(
ValueError: num_samples should be a positive integer, but got num_samples=0


微调数据共16类图片，共计3600多个文件，每个文件一行，请问这个报错如何解决。



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[Question]: ERNIE-ViL 2.0微调报错 #10903

请提出你的问题

data options

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[Question]: ERNIE-ViL 2.0微调报错 #10903

Description

请提出你的问题

data options

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions