You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Need steps for the custom dataset registration. Query:
bash scripts/NVILA-Lite/sft.sh runs/train/NVILA-Lite-8B-stage2 "alias to data"
where "alias to data" is
/home/sample_ft/M3IT/data/captioning/coco/captioning_coco_train.pkl
Error:
2024-12-30 11:13:46.201 | INFO | llava.data.builder:register_datasets:39 - Registering datasets from environment: 'default'.
2024-12-30 11:13:46.202 | INFO | llava.data.builder:register_datasets:44 - Registering datasets from: '/home/user/VILA/llava/data/registry/datasets/default.yaml'.
Traceback (most recent call last):
File "/home/user/VILA/llava/train/train_mem.py", line 22, in
from llava.train.train import train
File "/home/user/VILA/llava/train/train.py", line 31, in
import llava.data.dataset as dataset
File "/home/user/VILA/llava/data/init.py", line 1, in
from .builder import *
File "/home/user/VILA/llava/data/builder.py", line 54, in
DATASETS = register_datasets()
File "/home/user/VILA/llava/data/builder.py", line 46, in register_datasets
dataset_meta.update(meta)
TypeError: 'NoneType' object is not iterable
E1230 11:13:47.318000 128108121974592 torch/distributed/elastic/multiprocessing/api.py:826] failed (exitcode: 1) local_rank: 0 (pid: 185298) of binary: /root/anaconda3/envs/vila_adv/bin/python
Traceback (most recent call last):
File "/root/anaconda3/envs/vila_adv/bin/torchrun", line 8, in
sys.exit(main())
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 347, in wrapper
return f(*args, **kwargs)
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/run.py", line 879, in main
run(args)
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/run.py", line 870, in run
elastic_launch(
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 263, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
llava/train/train_mem.py FAILED
The text was updated successfully, but these errors were encountered:
Need steps for the custom dataset registration.
Query:
bash scripts/NVILA-Lite/sft.sh runs/train/NVILA-Lite-8B-stage2 "alias to data"
where "alias to data" is
/home/sample_ft/M3IT/data/captioning/coco/captioning_coco_train.pkl
Error:
2024-12-30 11:13:46.201 | INFO | llava.data.builder:register_datasets:39 - Registering datasets from environment: 'default'.
2024-12-30 11:13:46.202 | INFO | llava.data.builder:register_datasets:44 - Registering datasets from: '/home/user/VILA/llava/data/registry/datasets/default.yaml'.
Traceback (most recent call last):
File "/home/user/VILA/llava/train/train_mem.py", line 22, in
from llava.train.train import train
File "/home/user/VILA/llava/train/train.py", line 31, in
import llava.data.dataset as dataset
File "/home/user/VILA/llava/data/init.py", line 1, in
from .builder import *
File "/home/user/VILA/llava/data/builder.py", line 54, in
DATASETS = register_datasets()
File "/home/user/VILA/llava/data/builder.py", line 46, in register_datasets
dataset_meta.update(meta)
TypeError: 'NoneType' object is not iterable
E1230 11:13:47.318000 128108121974592 torch/distributed/elastic/multiprocessing/api.py:826] failed (exitcode: 1) local_rank: 0 (pid: 185298) of binary: /root/anaconda3/envs/vila_adv/bin/python
Traceback (most recent call last):
File "/root/anaconda3/envs/vila_adv/bin/torchrun", line 8, in
sys.exit(main())
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/init.py", line 347, in wrapper
return f(*args, **kwargs)
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/run.py", line 879, in main
run(args)
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/run.py", line 870, in run
elastic_launch(
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 132, in call
return launch_agent(self._config, self._entrypoint, list(args))
File "/root/anaconda3/envs/vila_adv/lib/python3.10/site-packages/torch/distributed/launcher/api.py", line 263, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
llava/train/train_mem.py FAILED
The text was updated successfully, but these errors were encountered: