| ### Best practice to generate data list |
| User can use monai to generate the 5-fold data lists. Full exampls can be found in VISTA3D open source [codebase](https://github.com/Project-MONAI/VISTA/blob/main/vista3d/data/make_datalists.py) |
| ```python |
| from monai.data.utils import partition_dataset |
| from monai.bundle import ConfigParser |
| base_url = "/path_to_your_folder/" |
| json_name = "./your_5_folds.json" |
| # create matching image and label lists. |
| # The code to generate the lists is based on your local data structure. |
| # You can use glob.glob("**.nii.gz") e.t.c. |
| image_list = ['images/1.nii.gz', 'images/2.nii.gz', ...] |
| label_list = ['labels/1.nii.gz', 'labels/2.nii.gz', ...] |
| items = [{"image": img, "label": lab} for img, lab in zip(image_list, label_list)] |
| # 80% for training 20% for testing. |
| train_test = partition_dataset(items, ratios=[0.8, 0.2], shuffle=True, seed=0) |
| print(f"training: {len(train_test[0])}, testing: {len(train_test[1])}") |
| # num_partitions-fold split for the training set. |
| train_val = partition_dataset(train_test[0], num_partitions=5, shuffle=True, seed=0) |
| print(f"training validation folds sizes: {[len(x) for x in train_val]}") |
| # add the fold index to each training data. |
| training = [] |
| for f, x in enumerate(train_val): |
| for item in x: |
| item["fold"] = f |
| training.append(item) |
| # save json file |
| parser = ConfigParser({}) |
| parser["training"] = training |
| parser["testing"] = train_test[1] |
| print(f"writing {json_name}\n\n") |
| if os.path.exists(json_name): |
| logger.warning(f"rewrite existing datalist file: {json_name}") |
| ConfigParser.export_config_file(parser.config, json_name, indent=4) |
| ``` |
|
|