Skip to content

Commit

Permalink
Merge branch 'master' into smaller-font
Browse files Browse the repository at this point in the history
  • Loading branch information
jnothman authored Dec 28, 2023
2 parents adacd66 + ffd4aad commit 7961b04
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 5 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ What's new in version 0.9
introduced in version 0.7. (:issue:`248`)
- Ability to disable totals plot with `totals_plot_elements=0`. (:issue:`246`)
- Ability to set totals y axis label (:issue:`243`)
- Added ``max_subset_rank`` to get only n most populous subsets.

What's new in version 0.8
-------------------------
Expand Down
8 changes: 8 additions & 0 deletions examples/plot_customize_after_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@
plot_result["totals"].set_ylabel("Category size")
plot_result["matrix"].set_xlabel("Subsets between categories")
plt.show()


##########################################################################
# Or we can place the totals label on the x axis

plot_result = plot(example)
plot_result["totals"].set_xlabel("Category size")
plt.show()
4 changes: 2 additions & 2 deletions examples/plot_generated.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@

##########################################################################

plot(example, show_counts="{:d}")
plt.suptitle("With counts shown")
plot(example, show_counts="{:,}")
plt.suptitle("With counts shown, using a thousands separator")
plt.show()

##########################################################################
Expand Down
13 changes: 12 additions & 1 deletion examples/plot_highlight.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
plt.show()

##########################################################################
# ... or their size or degree.
# ... or their size.

upset = UpSet(example)
upset.style_subsets(
Expand All @@ -44,6 +44,17 @@
plt.suptitle("Hatch subsets with size >1000")
plt.show()

##########################################################################
# ... or degree.

upset = UpSet(example)
upset.style_subsets(min_degree=1, facecolor="blue")
upset.style_subsets(min_degree=2, facecolor="purple")
upset.style_subsets(min_degree=3, facecolor="red")
upset.plot()
plt.suptitle("Coloring by degree")
plt.show()

##########################################################################
# Multiple stylings can be applied with different criteria in the same
# plot.
Expand Down
16 changes: 16 additions & 0 deletions upsetplot/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def _process_data(
sum_over,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
reverse=False,
Expand All @@ -41,6 +42,7 @@ def _process_data(
sum_over=sum_over,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
include_empty_subsets=include_empty_subsets,
Expand Down Expand Up @@ -200,6 +202,11 @@ class UpSet:
a size greater than this threshold will be omitted from plotting.
.. versionadded:: 0.5
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be shown in the plot.
Expand Down Expand Up @@ -270,6 +277,7 @@ def __init__(
sum_over=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
facecolor="auto",
Expand Down Expand Up @@ -324,6 +332,7 @@ def __init__(
sum_over=sum_over,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
reverse=not self._horizontal,
Expand All @@ -345,6 +354,7 @@ def style_subsets(
absent=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
facecolor=None,
Expand All @@ -371,6 +381,11 @@ def style_subsets(
Minimum size of a subset to be styled.
max_subset_size : int, optional
Maximum size of a subset to be styled.
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be styled.
max_degree : int, optional
Expand Down Expand Up @@ -405,6 +420,7 @@ def style_subsets(
absent=absent,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
)
Expand Down
31 changes: 29 additions & 2 deletions upsetplot/reformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,25 @@ def _scalar_to_list(val):


def _get_subset_mask(
agg, min_subset_size, max_subset_size, min_degree, max_degree, present, absent
agg,
min_subset_size,
max_subset_size,
max_subset_rank,
min_degree,
max_degree,
present,
absent,
):
"""Get a mask over subsets based on size, degree or category presence"""
subset_mask = True
if min_subset_size is not None:
subset_mask = np.logical_and(subset_mask, agg >= min_subset_size)
if max_subset_size is not None:
subset_mask = np.logical_and(subset_mask, agg <= max_subset_size)
if max_subset_rank is not None:
subset_mask = np.logical_and(
subset_mask, agg.rank(method="min", ascending=False) <= max_subset_rank
)
if (min_degree is not None and min_degree >= 0) or max_degree is not None:
degree = agg.index.to_frame().sum(axis=1)
if min_degree is not None:
Expand All @@ -121,12 +132,21 @@ def _get_subset_mask(


def _filter_subsets(
df, agg, min_subset_size, max_subset_size, min_degree, max_degree, present, absent
df,
agg,
min_subset_size,
max_subset_size,
max_subset_rank,
min_degree,
max_degree,
present,
absent,
):
subset_mask = _get_subset_mask(
agg,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
present=present,
Expand Down Expand Up @@ -189,6 +209,7 @@ def query(
absent=None,
min_subset_size=None,
max_subset_size=None,
max_subset_rank=None,
min_degree=None,
max_degree=None,
sort_by="degree",
Expand Down Expand Up @@ -221,6 +242,11 @@ def query(
Size may be a sum of values, see `subset_size`.
max_subset_size : int, optional
Maximum size of a subset to be reported.
max_subset_rank : int, optional
Limit to the top N ranked subsets in descending order of size.
All tied subsets are included.
.. versionadded:: 0.9
min_degree : int, optional
Minimum degree of a subset to be reported.
max_degree : int, optional
Expand Down Expand Up @@ -348,6 +374,7 @@ def query(
agg,
min_subset_size=min_subset_size,
max_subset_size=max_subset_size,
max_subset_rank=max_subset_rank,
min_degree=min_degree,
max_degree=max_degree,
present=present,
Expand Down
31 changes: 31 additions & 0 deletions upsetplot/tests/test_upsetplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,14 @@ def test_index_must_be_bool(x):
(True, True, True): 990,
},
),
(
{"max_subset_rank": 3},
{
(True, False, False): 884,
(True, True, False): 1547,
(True, True, True): 990,
},
),
(
{"min_subset_size": 800, "max_subset_size": 990},
{
Expand Down Expand Up @@ -822,6 +830,29 @@ def test_filter_subsets(filter_params, expected, sort_by):
assert upset_full.total == pytest.approx(upset_filtered.total)


def test_filter_subsets_max_subset_rank_tie():
data = generate_samples(seed=0, n_samples=5, n_categories=3)
tested_non_tie = False
tested_tie = True
full = UpSet(data, subset_size="count").intersections
prev = None
for max_rank in range(1, 5):
cur = UpSet(data, subset_size="count", max_subset_rank=max_rank).intersections
if prev is not None:
if cur.shape[0] > prev.shape[0]:
# check we add rows only when they are new
assert cur.min() < prev.min()
tested_non_tie = True
elif cur.shape[0] != full.shape[0]:
assert (cur == cur.min()).sum() > 1
tested_tie = True

prev = cur
assert tested_non_tie
assert tested_tie
assert cur.shape[0] == full.shape[0]


@pytest.mark.parametrize(
"x",
[
Expand Down

0 comments on commit 7961b04

Please sign in to comment.