Skip to content

Commit

Permalink
Make anonymization of uid optional (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
joeranbosma authored Oct 8, 2024
1 parent a5e8c18 commit d2f3903
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 7 deletions.
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
docker build . --tag dragon_prep:latest --tag dragon_prep:v0.2.3
docker build . --tag dragon_prep:latest --tag dragon_prep:v0.2.4
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
long_description = fh.read()

setuptools.setup(
version='0.2.3',
version='0.2.4',
author_email='[email protected]',
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
10 changes: 6 additions & 4 deletions src/dragon_prep/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def split_and_save_data(
df_test: pd.DataFrame | None = None,
split_by: str = "patient_id",
recommended_truncation_side: str = "left",
anonymize_uid: bool = True,
) -> Tuple[Dict[str, pd.DataFrame], Dict[str, int], Dict[str, Any]]:
"""Make train, val and test splits.
Expand Down Expand Up @@ -343,10 +344,11 @@ def split_and_save_data(
label_names=label_names,
)

# override the uid
df["uid"] = [f"{task_name}_case{idx}" for idx in range(len(df))]
if df_test is not None:
df_test["uid"] = [f"{task_name}_test_case{idx}" for idx in range(len(df_test))]
if anonymize_uid:
# override the uid
df["uid"] = [f"{task_name}_case{idx}" for idx in range(len(df))]
if df_test is not None:
df_test["uid"] = [f"{task_name}_test_case{idx}" for idx in range(len(df_test))]

# make the splits
dataframes = make_cv_splits(
Expand Down
2 changes: 1 addition & 1 deletion tests/development-README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ AutoPEP8 for formatting (this can be done automatically on save, see e.g. https:
# Push release to PyPI
1. Increase version in setup.py, and set below
2. Build: `python -m build`
3. Distribute package to PyPI: `python -m twine upload dist/*0.2.3*`
3. Distribute package to PyPI: `python -m twine upload dist/*0.2.4*`

0 comments on commit d2f3903

Please sign in to comment.