From 21c64e232a0051c2c6f6019dc54be673cd982e82 Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 15 Aug 2023 17:16:11 +0200 Subject: [PATCH 1/2] fix issue 414. Replace broken link --- category_encoders/backward_difference.py | 2 +- category_encoders/base_contrast_encoder.py | 2 +- category_encoders/helmert.py | 4 ++-- category_encoders/one_hot.py | 4 ++-- category_encoders/ordinal.py | 3 +-- category_encoders/polynomial.py | 4 ++-- category_encoders/sum_coding.py | 2 +- 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/category_encoders/backward_difference.py b/category_encoders/backward_difference.py index 385c93a0..7757a0e7 100644 --- a/category_encoders/backward_difference.py +++ b/category_encoders/backward_difference.py @@ -71,7 +71,7 @@ class BackwardDifferenceEncoder(BaseContrastEncoder): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf """ diff --git a/category_encoders/base_contrast_encoder.py b/category_encoders/base_contrast_encoder.py index 7f1c83b3..c90cd72b 100644 --- a/category_encoders/base_contrast_encoder.py +++ b/category_encoders/base_contrast_encoder.py @@ -41,7 +41,7 @@ class BaseContrastEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf """ prefit_ordinal = True diff --git a/category_encoders/helmert.py b/category_encoders/helmert.py index 198c5725..42ae7c7e 100644 --- a/category_encoders/helmert.py +++ b/category_encoders/helmert.py @@ -72,8 +72,8 @@ class HelmertEncoder(BaseContrastEncoder): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf - + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf + """ def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix: return Helmert().code_without_intercept(values_to_encode) diff --git a/category_encoders/one_hot.py b/category_encoders/one_hot.py index f1ee2c24..b923686f 100644 --- a/category_encoders/one_hot.py +++ b/category_encoders/one_hot.py @@ -86,8 +86,8 @@ class OneHotEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf - + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf + """ prefit_ordinal = True encoding_relation = util.EncodingRelation.ONE_TO_N_UNIQUE diff --git a/category_encoders/ordinal.py b/category_encoders/ordinal.py index 3c8d1813..d8f08c33 100644 --- a/category_encoders/ordinal.py +++ b/category_encoders/ordinal.py @@ -77,8 +77,7 @@ class OrdinalEncoder(util.BaseEncoder, util.UnsupervisedTransformerMixin): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf - + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf """ prefit_ordinal = False encoding_relation = util.EncodingRelation.ONE_TO_ONE diff --git a/category_encoders/polynomial.py b/category_encoders/polynomial.py index a32e149e..bcef1228 100644 --- a/category_encoders/polynomial.py +++ b/category_encoders/polynomial.py @@ -71,8 +71,8 @@ class PolynomialEncoder(BaseContrastEncoder): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf - + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf + """ def get_contrast_matrix(self, values_to_encode: np.array) -> ContrastMatrix: return Poly().code_without_intercept(values_to_encode) diff --git a/category_encoders/sum_coding.py b/category_encoders/sum_coding.py index db07b46e..27e40411 100644 --- a/category_encoders/sum_coding.py +++ b/category_encoders/sum_coding.py @@ -71,7 +71,7 @@ class SumEncoder(BaseContrastEncoder): https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/ .. [2] Gregory Carey (2003). Coding Categorical Variables, from - http://psych.colorado.edu/~carey/Courses/PSYC5741/handouts/Coding%20Categorical%20Variables%202006-03-03.pdf + http://ibgwww.colorado.edu/~carey/p5741ndir/Coding_Categorical_Variables.pdf """ From 6f412ccf7f93c6a7f23f93aaaaef19d60eb9c1cd Mon Sep 17 00:00:00 2001 From: paul Date: Tue, 15 Aug 2023 17:54:37 +0200 Subject: [PATCH 2/2] fixed issue 412, ordinal encoder converting timestamps --- category_encoders/ordinal.py | 2 +- tests/test_ordinal.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/category_encoders/ordinal.py b/category_encoders/ordinal.py index d8f08c33..d704e033 100644 --- a/category_encoders/ordinal.py +++ b/category_encoders/ordinal.py @@ -224,7 +224,7 @@ def ordinal_encoding(X_in, mapping=None, cols=None, handle_unknown='value', hand if pd.isna(categories).any(): categories = [c for c in categories if not pd.isna(c)] + [nan_identity] else: - categories = categories.tolist() + categories = list(categories) if util.is_category(X[col].dtype): # Avoid using pandas category dtype meta-data if possible, see #235, #238. if X[col].dtype.ordered: diff --git a/tests/test_ordinal.py b/tests/test_ordinal.py index 00787f94..e62b10f2 100644 --- a/tests/test_ordinal.py +++ b/tests/test_ordinal.py @@ -177,6 +177,33 @@ def test_HaveNaNInTrain_ExpectCodedAsOne(self): self.assertEqual(expected, result) + def test_Timestamp(self): + df = pd.DataFrame( + { + "timestamps": { + 0: pd.Timestamp("1997-09-03 00:00:00"), + 1: pd.Timestamp("1997-09-03 00:00:00"), + 2: pd.Timestamp("2000-09-03 00:00:00"), + 3: pd.Timestamp("1997-09-03 00:00:00"), + 4: pd.Timestamp("1999-09-04 00:00:00"), + 5: pd.Timestamp("2001-09-03 00:00:00"), + }, + } + ) + enc = encoders.OrdinalEncoder(cols=["timestamps"]) + encoded_df = enc.fit_transform(df) + expected_index = [pd.Timestamp("1997-09-03 00:00:00"), + pd.Timestamp("2000-09-03 00:00:00"), + pd.Timestamp("1999-09-04 00:00:00"), + pd.Timestamp("2001-09-03 00:00:00"), + pd.NaT + ] + expected_mapping = pd.Series([1, 2, 3, 4, -2], index=expected_index) + expected_values = [1, 1, 2, 1, 3, 4] + + pd.testing.assert_series_equal(expected_mapping, enc.mapping[0]["mapping"]) + self.assertListEqual(expected_values, encoded_df["timestamps"].tolist()) + def test_NoGaps(self): train = pd.DataFrame({"city": ["New York", np.nan, "Rio", None, "Rosenheim"]}) expected_mapping_value = pd.Series([1, 2, 3, 4], index=["New York", "Rio", "Rosenheim", np.nan])