Skip to content

Commit

Permalink
Fix things.
Browse files Browse the repository at this point in the history
  • Loading branch information
tsalo committed Sep 1, 2024
1 parent 8f5c011 commit 25428f3
Show file tree
Hide file tree
Showing 2 changed files with 385 additions and 2 deletions.
385 changes: 385 additions & 0 deletions src/fmripost_phase/utils/bids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,385 @@
"""Utilities to handle BIDS inputs."""

from __future__ import annotations

import json
from collections import defaultdict
from pathlib import Path

from bids.layout import BIDSLayout
from bids.utils import listify
from niworkflows.utils.spaces import SpatialReferences

from fmripost_phase.data import load as load_data


def extract_entities(file_list: str | list[str]) -> dict:
"""Return a dictionary of common entities given a list of files.
Parameters
----------
file_list : str | list[str]
File path or list of file paths.
Returns
-------
entities : dict
Dictionary of entities.
Examples
--------
>>> extract_entities("sub-01/anat/sub-01_T1w.nii.gz")
{'subject': '01', 'suffix': 'T1w', 'datatype': 'anat', 'extension': '.nii.gz'}
>>> extract_entities(["sub-01/anat/sub-01_T1w.nii.gz"] * 2)
{'subject': '01', 'suffix': 'T1w', 'datatype': 'anat', 'extension': '.nii.gz'}
>>> extract_entities(["sub-01/anat/sub-01_run-1_T1w.nii.gz",
... "sub-01/anat/sub-01_run-2_T1w.nii.gz"])
{'subject': '01', 'run': [1, 2], 'suffix': 'T1w', 'datatype': 'anat', 'extension': '.nii.gz'}
"""
from collections import defaultdict

from bids.layout import parse_file_entities

entities = defaultdict(list)
for e, v in [
ev_pair for f in listify(file_list) for ev_pair in parse_file_entities(f).items()
]:
entities[e].append(v)

def _unique(inlist):
inlist = sorted(set(inlist))
if len(inlist) == 1:
return inlist[0]
return inlist

return {k: _unique(v) for k, v in entities.items()}


def collect_derivatives(
raw_dataset: Path | BIDSLayout | None,
derivatives_dataset: Path | BIDSLayout | None,
entities: dict | None,
fieldmap_id: str | None,
spec: dict | None = None,
patterns: list[str] | None = None,
allow_multiple: bool = False,
spaces: SpatialReferences | None = None,
) -> dict:
"""Gather existing derivatives and compose a cache.
TODO: Ingress 'spaces' and search for BOLD+mask in the spaces *or* xfms.
Parameters
----------
raw_dataset : Path | BIDSLayout | None
Path to the raw dataset or a BIDSLayout instance.
derivatives_dataset : Path | BIDSLayout
Path to the derivatives dataset or a BIDSLayout instance.
entities : dict
Dictionary of entities to use for filtering.
fieldmap_id : str | None
Fieldmap ID to use for filtering.
spec : dict | None
Specification dictionary.
patterns : list[str] | None
List of patterns to use for filtering.
allow_multiple : bool
Allow multiple files to be returned for a given query.
spaces : SpatialReferences | None
Spatial references to select for.
Returns
-------
derivs_cache : dict
Dictionary with keys corresponding to the derivatives and values
corresponding to the file paths.
"""
if not entities:
entities = {}

if spec is None or patterns is None:
_spec = json.loads(load_data.readable('io_spec.json').read_text())

if spec is None:
spec = _spec['queries']

if patterns is None:
patterns = _spec['patterns']

# Search for derivatives data
derivs_cache = defaultdict(list, {})
if derivatives_dataset is not None:
layout = derivatives_dataset
if isinstance(layout, Path):
layout = BIDSLayout(
layout,
config=['bids', 'derivatives'],
validate=False,
)

for k, q in spec['derivatives'].items():
# Combine entities with query. Query values override file entities.
query = {**entities, **q}
item = layout.get(return_type='filename', **query)
if not item:
derivs_cache[k] = None
elif not allow_multiple and len(item) > 1:
raise ValueError(f'Multiple files found for {k}: {item}')
else:
derivs_cache[k] = item[0] if len(item) == 1 else item

for k, q in spec['transforms'].items():
# Combine entities with query. Query values override file entities.
# TODO: Drop functional entities (task, run, etc.) from anat transforms.
query = {**entities, **q}
if k == 'boldref2fmap':
query['to'] = fieldmap_id

item = layout.get(return_type='filename', **query)
if not item:
derivs_cache[k] = None
elif not allow_multiple and len(item) > 1:
raise ValueError(f'Multiple files found for {k}: {item}')
else:
derivs_cache[k] = item[0] if len(item) == 1 else item

# Search for requested output spaces
if spaces is not None:
# Put the output-space files/transforms in lists so they can be parallelized with
# template_iterator_wf.
spaces_found, bold_outputspaces, bold_mask_outputspaces = [], [], []
for space in spaces.references:
# First try to find processed BOLD+mask files in the requested space
bold_query = {**entities, **spec['derivatives']['bold_mni152nlin6asym']}
bold_query['space'] = space.space
bold_query = {**bold_query, **space.spec}
bold_item = layout.get(return_type='filename', **bold_query)
bold_outputspaces.append(bold_item[0] if bold_item else None)

mask_query = {**entities, **spec['derivatives']['bold_mask_mni152nlin6asym']}
mask_query['space'] = space.space
mask_query = {**mask_query, **space.spec}
mask_item = layout.get(return_type='filename', **mask_query)
bold_mask_outputspaces.append(mask_item[0] if mask_item else None)

spaces_found.append(bool(bold_item) and bool(mask_item))

if all(spaces_found):
derivs_cache['bold_outputspaces'] = bold_outputspaces
derivs_cache['bold_mask_outputspaces'] = bold_mask_outputspaces
else:
# The requested spaces were not found, try to find transforms
print(
'Not all requested output spaces were found. '
'We will try to find transforms to these spaces and apply them to the BOLD data.',
flush=True,
)

spaces_found, anat2outputspaces_xfm = [], []
for space in spaces.references:
# First try to find processed BOLD+mask files in the requested space
anat2space_query = {**entities, **spec['transforms']['anat2mni152nlin6asym']}
anat2space_query['to'] = space.space
item = layout.get(return_type='filename', **anat2space_query)
anat2outputspaces_xfm.append(item[0] if item else None)
spaces_found.append(bool(item))

if all(spaces_found):
derivs_cache['anat2outputspaces_xfm'] = anat2outputspaces_xfm
else:
missing_spaces = ', '.join(
[s.space for s, found in zip(spaces.references, spaces_found) if not found]
)
raise ValueError(
f'Transforms to the following requested spaces not found: {missing_spaces}.'
)

# Search for raw BOLD data
if not derivs_cache and raw_dataset is not None:
if isinstance(raw_dataset, Path):
raw_layout = BIDSLayout(raw_dataset, config=['bids'], validate=False)
else:
raw_layout = raw_dataset

for k, q in spec['raw'].items():
# Combine entities with query. Query values override file entities.
query = {**entities, **q}
item = raw_layout.get(return_type='filename', **query)
if not item:
derivs_cache[k] = None
elif not allow_multiple and len(item) > 1:
raise ValueError(f'Multiple files found for {k}: {item}')
else:
derivs_cache[k] = item[0] if len(item) == 1 else item

return derivs_cache


def write_bidsignore(deriv_dir):
bids_ignore = (
'*.html',
'logs/',
'figures/', # Reports
'*_xfm.*', # Unspecified transform files
'*.surf.gii', # Unspecified structural outputs
# Unspecified functional outputs
'*_boldref.nii.gz',
'*_bold.func.gii',
'*_mixing.tsv',
'*_timeseries.tsv',
)
ignore_file = Path(deriv_dir) / '.bidsignore'

ignore_file.write_text('\n'.join(bids_ignore) + '\n')


def write_derivative_description(bids_dir, deriv_dir, dataset_links=None):
import os

from fmripost_phase import __version__

DOWNLOAD_URL = f'https://github.com/nipreps/fmripost_phase/archive/{__version__}.tar.gz'

bids_dir = Path(bids_dir)
deriv_dir = Path(deriv_dir)
desc = {
'Name': 'fMRIPost-AROMA- ICA-AROMA Postprocessing Outputs',
'BIDSVersion': '1.9.0dev',
'DatasetType': 'derivative',
'GeneratedBy': [
{
'Name': 'fMRIPost-AROMA',
'Version': __version__,
'CodeURL': DOWNLOAD_URL,
}
],
'HowToAcknowledge': 'Please cite fMRIPost-AROMA when using these results.',
}

# Keys that can only be set by environment
if 'FMRIPOST_AROMA_DOCKER_TAG' in os.environ:
desc['GeneratedBy'][0]['Container'] = {
'Type': 'docker',
'Tag': f"nipreps/fmriprep:{os.environ['FMRIPOST_AROMA__DOCKER_TAG']}",
}
if 'FMRIPOST_AROMA__SINGULARITY_URL' in os.environ:
desc['GeneratedBy'][0]['Container'] = {
'Type': 'singularity',
'URI': os.getenv('FMRIPOST_AROMA__SINGULARITY_URL'),
}

# Keys deriving from source dataset
orig_desc = {}
fname = bids_dir / 'dataset_description.json'
if fname.exists():
orig_desc = json.loads(fname.read_text())

if 'DatasetDOI' in orig_desc:
desc['SourceDatasets'] = [
{'URL': f'https://doi.org/{orig_desc["DatasetDOI"]}', 'DOI': orig_desc['DatasetDOI']}
]
if 'License' in orig_desc:
desc['License'] = orig_desc['License']

# Add DatasetLinks
if dataset_links:
desc['DatasetLinks'] = {k: str(v) for k, v in dataset_links.items()}
if 'templateflow' in dataset_links:
desc['DatasetLinks']['templateflow'] = 'https://github.com/templateflow/templateflow'

Path.write_text(deriv_dir / 'dataset_description.json', json.dumps(desc, indent=4))


def validate_input_dir(exec_env, bids_dir, participant_label, need_T1w=True):
# Ignore issues and warnings that should not influence FMRIPREP
import subprocess
import sys
import tempfile

validator_config_dict = {
'ignore': [
'EVENTS_COLUMN_ONSET',
'EVENTS_COLUMN_DURATION',
'TSV_EQUAL_ROWS',
'TSV_EMPTY_CELL',
'TSV_IMPROPER_NA',
'VOLUME_COUNT_MISMATCH',
'BVAL_MULTIPLE_ROWS',
'BVEC_NUMBER_ROWS',
'DWI_MISSING_BVAL',
'INCONSISTENT_SUBJECTS',
'INCONSISTENT_PARAMETERS',
'BVEC_ROW_LENGTH',
'B_FILE',
'PARTICIPANT_ID_COLUMN',
'PARTICIPANT_ID_MISMATCH',
'TASK_NAME_MUST_DEFINE',
'PHENOTYPE_SUBJECTS_MISSING',
'STIMULUS_FILE_MISSING',
'DWI_MISSING_BVEC',
'EVENTS_TSV_MISSING',
'TSV_IMPROPER_NA',
'ACQTIME_FMT',
'Participants age 89 or higher',
'DATASET_DESCRIPTION_JSON_MISSING',
'FILENAME_COLUMN',
'WRONG_NEW_LINE',
'MISSING_TSV_COLUMN_CHANNELS',
'MISSING_TSV_COLUMN_IEEG_CHANNELS',
'MISSING_TSV_COLUMN_IEEG_ELECTRODES',
'UNUSED_STIMULUS',
'CHANNELS_COLUMN_SFREQ',
'CHANNELS_COLUMN_LOWCUT',
'CHANNELS_COLUMN_HIGHCUT',
'CHANNELS_COLUMN_NOTCH',
'CUSTOM_COLUMN_WITHOUT_DESCRIPTION',
'ACQTIME_FMT',
'SUSPICIOUSLY_LONG_EVENT_DESIGN',
'SUSPICIOUSLY_SHORT_EVENT_DESIGN',
'MALFORMED_BVEC',
'MALFORMED_BVAL',
'MISSING_TSV_COLUMN_EEG_ELECTRODES',
'MISSING_SESSION',
],
'error': ['NO_T1W'] if need_T1w else [],
'ignoredFiles': ['/dataset_description.json', '/participants.tsv'],
}
# Limit validation only to data from requested participants
if participant_label:
all_subs = {s.name[4:] for s in bids_dir.glob('sub-*')}
selected_subs = {s[4:] if s.startswith('sub-') else s for s in participant_label}
bad_labels = selected_subs.difference(all_subs)
if bad_labels:
error_msg = (
'Data for requested participant(s) label(s) not found. Could '
'not find data for participant(s): %s. Please verify the requested '
'participant labels.'
)
if exec_env == 'docker':
error_msg += (
' This error can be caused by the input data not being '
'accessible inside the docker container. Please make sure all '
'volumes are mounted properly (see https://docs.docker.com/'
'engine/reference/commandline/run/#mount-volume--v---read-only)'
)
if exec_env == 'singularity':
error_msg += (
' This error can be caused by the input data not being '
'accessible inside the singularity container. Please make sure '
'all paths are mapped properly (see https://www.sylabs.io/'
'guides/3.0/user-guide/bind_paths_and_mounts.html)'
)
raise RuntimeError(error_msg % ','.join(bad_labels))

ignored_subs = all_subs.difference(selected_subs)
if ignored_subs:
for sub in ignored_subs:
validator_config_dict['ignoredFiles'].append(f'/sub-{sub}/**')
with tempfile.NamedTemporaryFile(mode='w+', suffix='.json') as temp:
temp.write(json.dumps(validator_config_dict))
temp.flush()
try:
subprocess.check_call(['bids-validator', str(bids_dir), '-c', temp.name]) # noqa: S607
except FileNotFoundError:
print('bids-validator does not appear to be installed', file=sys.stderr)
2 changes: 0 additions & 2 deletions src/fmripost_phase/workflows/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,7 @@ def init_single_run_wf(bold_file):
from fmriprep.workflows.bold.stc import init_bold_stc_wf
from nipype.interfaces import utility as niu
from niworkflows.engine.workflows import LiterateWorkflow as Workflow
from niworkflows.interfaces.fixes import FixHeaderApplyTransforms as ApplyTransforms
from niworkflows.interfaces.header import ValidateImage
from templateflow.api import get as get_template

from fmripost_phase.interfaces.bids import DerivativesDataSink
from fmripost_phase.interfaces.laynii import LayNiiPhaseJolt
Expand Down

0 comments on commit 25428f3

Please sign in to comment.