From 5378208e6d1cfa854d5c12c6f8d60f70bc1e14c3 Mon Sep 17 00:00:00 2001 From: Thiago Curvelo Date: Sun, 20 Oct 2019 19:27:27 -0300 Subject: [PATCH] Wrap DataFrames in order to allow custom styling (closes #76) --- src/arche/readers/items.py | 9 +++++---- src/arche/tools/dataframe.py | 15 +++++++++++++++ tests/tools/test_stylizeddataframe.py | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 src/arche/tools/dataframe.py create mode 100644 tests/tools/test_stylizeddataframe.py diff --git a/src/arche/readers/items.py b/src/arche/readers/items.py index 93453a9..4353466 100755 --- a/src/arche/readers/items.py +++ b/src/arche/readers/items.py @@ -4,6 +4,7 @@ from arche import SH_URL from arche.tools import api +from arche.tools.dataframe import StylizedDataFrame import numpy as np import pandas as pd from scrapinghub import ScrapinghubClient @@ -22,11 +23,11 @@ def __len__(self) -> int: return len(self.df) @staticmethod - def process_df(df: pd.DataFrame) -> pd.DataFrame: + def process_df(df: pd.DataFrame) -> StylizedDataFrame: # clean empty objects - mainly lists and dicts, but keep everything else df = df.applymap(lambda x: x if x or isinstance(x, numbers.Real) else np.nan) Items.categorize(df) - return df + return StylizedDataFrame(df, copy=True) @staticmethod def categorize(df: pd.DataFrame) -> pd.DataFrame: @@ -47,7 +48,7 @@ def from_df(cls, df: pd.DataFrame): @classmethod def from_array(cls, iterable: RawItems): - return cls(raw=iterable, df=pd.DataFrame(list(iterable))) + return cls(raw=iterable, df=StylizedDataFrame(list(iterable))) class CloudItems(Items): @@ -62,7 +63,7 @@ def __init__( self._limit: int = 0 self.filters = filters raw = self.fetch_data() - df = pd.DataFrame(list(raw)) + df = StylizedDataFrame(list(raw)) df.index = self.format_keys(df["_key"]) df.index.name = None df = df.drop(columns=["_key", "_type"], errors="ignore") diff --git a/src/arche/tools/dataframe.py b/src/arche/tools/dataframe.py new file mode 100644 index 0000000..2893307 --- /dev/null +++ b/src/arche/tools/dataframe.py @@ -0,0 +1,15 @@ +import re + +import pandas as pd + + +class StylizedDataFrame(pd.DataFrame): + def _make_urls_clickable(self, val): + if isinstance(val, str) and re.search("^https?://", val): + return f'{val}' + else: + return val + + def to_html(self, *args, **kwargs): + styler = self.style.format(self._make_urls_clickable) + return styler.render(*args, **kwargs) diff --git a/tests/tools/test_stylizeddataframe.py b/tests/tools/test_stylizeddataframe.py new file mode 100644 index 0000000..d9dc767 --- /dev/null +++ b/tests/tools/test_stylizeddataframe.py @@ -0,0 +1,18 @@ +from arche.tools.dataframe import StylizedDataFrame +import pandas as pd + + +def test_stylized_df_make_urls_clickable(): + data = {"col1": [1, 2], "col2": ["http://foo.com", "https://bar.com"]} + df1 = pd.DataFrame(data) + assert "http://foo.com' in html + assert 'https://bar.com' in html + + +def test_stylized_df_does_not_add_links_if_no_url_found(): + data = {"col1": [1, 2], "col2": ["foo", "bar"]} + html = pd.DataFrame(data).to_html() + assert "