From 34a2445ffb1397e417f442a3aeae9398ae01be1d Mon Sep 17 00:00:00 2001 From: "Joses W. Ho" Date: Mon, 6 May 2019 18:19:22 +0800 Subject: [PATCH 1/5] version bump --- dabest/__init__.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dabest/__init__.py b/dabest/__init__.py index 9a58f706..6edf9a78 100644 --- a/dabest/__init__.py +++ b/dabest/__init__.py @@ -23,4 +23,4 @@ from ._stats_tools import effsize as effsize from ._classes import TwoGroupsEffectSize -__version__ = "0.2.2" +__version__ = "0.2.3" diff --git a/setup.py b/setup.py index eaa189ef..375b4699 100644 --- a/setup.py +++ b/setup.py @@ -89,7 +89,7 @@ def check_dependencies(): author_email='joseshowh@gmail.com', maintainer='Joses W. Ho', maintainer_email='joseshowh@gmail.com', - version='0.2.2', + version='0.2.3', description=DESCRIPTION, long_description=LONG_DESCRIPTION, packages=find_packages(), From b81e016d550ddbbb8e4f60949a09151ad86cb0f9 Mon Sep 17 00:00:00 2001 From: "Joses W. Ho" Date: Mon, 6 May 2019 18:19:38 +0800 Subject: [PATCH 2/5] fix handling of Categorical x-columns --- dabest/_classes.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/dabest/_classes.py b/dabest/_classes.py index c81e47b1..da40b868 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -124,15 +124,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples, value_vars=all_plot_groups, value_name=self.__yvar, var_name=self.__xvar) + + # Lines 131 to 140 added in v0.2.3. + # Fixes a bug that jammed up when the xvar column was already + # a pandas Categorical. Now we check for this and act appropriately. + if isinstance(plot_data[self.__xvar].dtype, + pd.CategoricalDtype) is True: + plot_data[self.__xvar].cat.remove_unused_categories(inplace=True) + plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, + ordered=True, + inplace=True) + else: + plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], + categories=all_plot_groups, + ordered=True) - plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar], - categories=all_plot_groups, - ordered=True) self.__plot_data = plot_data - + self.__all_plot_groups = all_plot_groups + # Sanity check that all idxs are paired, if so desired. if paired is True: if id_col is None: From 32e9a5600fb3662dffa8dc15b04b949165094ff1 Mon Sep 17 00:00:00 2001 From: "Joses W. Ho" Date: Mon, 6 May 2019 18:22:59 +0800 Subject: [PATCH 3/5] update docs --- docs/source/index.rst | 31 ++++++++----------------------- docs/source/release-notes.rst | 6 ++++++ 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 72c28a34..f79535d2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -9,7 +9,7 @@ DABEST ----------------------------------------------- Data Analysis with Bootstrap-coupled ESTimation ----------------------------------------------- -*version 0.2.2* +*version 0.2.3* Analyze your data with estimation statistics! --------------------------------------------- @@ -19,31 +19,16 @@ Analyze your data with estimation statistics! News ---- -April 2019: - - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. See the :doc:`release-notes`. +May 2019: + - v0.2.3 released. This is a fix for a bug that did not properly handle x-columns which were pandas Categorical objects. See the :doc:`release-notes`. -March 2019: - - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`. +April 2019: + - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. - - Release of v0.2.0. This is a major update that makes several breaking changes to the API. +March 2019: + - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. + - v0.2.0 released. This is a major update that makes several breaking changes to the API. -January 2019: - - Release of v0.1.7. Added `cumming_vertical_spacing` option. - -October 2018: - - Release of v0.1.6. Added more keywords for control of plot elements. - -July 2018: - - Release of v0.1.5. *bugfix for setup and package management* - - Release of v0.1.4. - -June 2018: - - Release of v0.1.3. Also added a short tutorial for dabest in R. - -December 2017: - - We have made a `webapp `_ that produces Gardner-Altman and Cumming plots! - - Contents -------- diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst index 4c34ab10..821e2a83 100644 --- a/docs/source/release-notes.rst +++ b/docs/source/release-notes.rst @@ -4,6 +4,12 @@ Release Notes ============= +v0.2.2 +------ + +This release fixes a bug that did not handle when the supplied ``x`` was a :py:mod:`pandas` :py:class:`Categorical` object, but the ``idx`` did not include all the original categories. + + v0.2.2 ------ From e97b27bf20c83d5105f09e67336b153d5465327c Mon Sep 17 00:00:00 2001 From: "Joses W. Ho" Date: Tue, 7 May 2019 10:50:45 +0800 Subject: [PATCH 4/5] tweak ylim setting when base_mean is 0 --- dabest/tests/test_02_plotting.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/dabest/tests/test_02_plotting.py b/dabest/tests/test_02_plotting.py index 99ab8eb8..6459e583 100644 --- a/dabest/tests/test_02_plotting.py +++ b/dabest/tests/test_02_plotting.py @@ -79,7 +79,13 @@ def test_cummings_unpaired(): rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1), np.random.uniform(base_mean, base_mean+10, 1)) - rand_contrast_ylim = (-base_mean/3, base_mean/3) + + if base_mean == 0: + # Have to set the contrast ylim, because the way I dynamically generate + # the contrast ylims will flunk out with base_mean = 0. + rand_contrast_ylim = (-0.5, 0.5) + else: + rand_contrast_ylim = (-base_mean/3, base_mean/3) f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim, contrast_ylim=rand_contrast_ylim, @@ -89,18 +95,12 @@ def test_cummings_unpaired(): rawswarm_axes = f1.axes[0] contrast_axes = f1.axes[1] - # Check ylims match the desired ones. + # Check swarm ylims match the desired ones. assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0]) assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1]) - - # This needs to be rounded, because if the base mean is 0, - # the ylim might be -0.001, which will not match 0. - if base_mean == 0: - ylim_low = np.round(contrast_axes.get_ylim()[0]) - else: - ylim_low = contrast_axes.get_ylim()[0] - assert ylim_low == pytest.approx(rand_contrast_ylim[0]) - + + # Check contrast ylims match the desired ones. + assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0]) assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1]) # Check xtick labels. From a4693f5098d43c9fd3a3a1c5bbaadb9b5edf155d Mon Sep 17 00:00:00 2001 From: "Joses W. Ho" Date: Tue, 7 May 2019 10:57:59 +0800 Subject: [PATCH 5/5] improve variable names --- dabest/tests/test_03_confint.py | 39 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/dabest/tests/test_03_confint.py b/dabest/tests/test_03_confint.py index bbdc5404..0d47d939 100644 --- a/dabest/tests/test_03_confint.py +++ b/dabest/tests/test_03_confint.py @@ -12,21 +12,20 @@ -def test_unpaired_ci(reps=50, ci=95): - n = 10 - N = 10000 - - - - # Create data for hedges g and cohens d +def test_unpaired_ci(reps=40, ci=95): + + POPULATION_N = 10000 + SAMPLE_N = 10 + + # Create data for hedges g and cohens d. CONTROL_MEAN = np.random.randint(1, 1000) POP_SD = np.random.randint(1, 15) POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2) TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD) - norm_rvs_kwargs = dict(scale=POP_SD, size=n) - c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs) - t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_rvs_kwargs) + norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N) + c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs) + t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs) std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1}) @@ -36,10 +35,9 @@ def test_unpaired_ci(reps=50, ci=95): CONTROL_MEAN = np.random.randint(1, 1000) POP_SD = np.random.randint(1, 15) TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5) - - norm_rvs_kwargs = dict(scale=POP_SD, size=n) - c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs) - t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_rvs_kwargs) + + c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs) + t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs) mean_df = pd.DataFrame({'Control' : c1, 'Test': t1}) @@ -49,11 +47,11 @@ def test_unpaired_ci(reps=50, ci=95): MEDIAN_DIFFERENCE = np.random.randint(-5, 5) A = np.random.randint(-7, 7) - skew_kwargs = dict(a=A, scale=5, size=N) + skew_kwargs = dict(a=A, scale=5, size=POPULATION_N) skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100) skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE) - sample_kwargs = dict(size=n, replace=False) + sample_kwargs = dict(replace=False, size=SAMPLE_N) skewsample1 = np.random.choice(skewpop1, **sample_kwargs) skewsample2 = np.random.choice(skewpop2, **sample_kwargs) @@ -65,13 +63,11 @@ def test_unpaired_ci(reps=50, ci=95): CD_DIFFERENCE = np.random.randint(1, 10) SD = np.abs(CD_DIFFERENCE) - N = 10000 - pop_kwargs = dict(scale=SD, size=N) + pop_kwargs = dict(scale=SD, size=POPULATION_N) pop1 = norm.rvs(loc=100, **pop_kwargs) pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs) - n = 20 - sample_kwargs = dict(size=n, replace=False) + sample_kwargs = dict(replace=False, size=SAMPLE_N) sample1 = np.random.choice(pop1, **sample_kwargs) sample2 = np.random.choice(pop2, **sample_kwargs) @@ -129,7 +125,8 @@ def test_unpaired_ci(reps=50, ci=95): error_count_cliffs_delta += 1 - max_errors = reps * (100 - ci) / 100 + max_errors = int(np.ceil(reps * (100 - ci) / 100)) + assert error_count_cohens_d <= max_errors assert error_count_hedges_g <= max_errors assert error_count_mean_diff <= max_errors