From 34a2445ffb1397e417f442a3aeae9398ae01be1d Mon Sep 17 00:00:00 2001
From: "Joses W. Ho" <joseshowh@gmail.com>
Date: Mon, 6 May 2019 18:19:22 +0800
Subject: [PATCH 1/5] version bump

---
 dabest/__init__.py | 2 +-
 setup.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dabest/__init__.py b/dabest/__init__.py
index 9a58f706..6edf9a78 100644
--- a/dabest/__init__.py
+++ b/dabest/__init__.py
@@ -23,4 +23,4 @@
 from ._stats_tools import effsize as effsize
 from ._classes import TwoGroupsEffectSize 
 
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/setup.py b/setup.py
index eaa189ef..375b4699 100644
--- a/setup.py
+++ b/setup.py
@@ -89,7 +89,7 @@ def check_dependencies():
         author_email='joseshowh@gmail.com',
         maintainer='Joses W. Ho',
         maintainer_email='joseshowh@gmail.com',
-        version='0.2.2',
+        version='0.2.3',
         description=DESCRIPTION,
         long_description=LONG_DESCRIPTION,
         packages=find_packages(),

From b81e016d550ddbbb8e4f60949a09151ad86cb0f9 Mon Sep 17 00:00:00 2001
From: "Joses W. Ho" <joseshowh@gmail.com>
Date: Mon, 6 May 2019 18:19:38 +0800
Subject: [PATCH 2/5] fix handling of Categorical x-columns

---
 dabest/_classes.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/dabest/_classes.py b/dabest/_classes.py
index c81e47b1..da40b868 100644
--- a/dabest/_classes.py
+++ b/dabest/_classes.py
@@ -124,15 +124,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
                                 value_vars=all_plot_groups,
                                 value_name=self.__yvar,
                                 var_name=self.__xvar)
+        
+        # Lines 131 to 140 added in v0.2.3.
+        # Fixes a bug that jammed up when the xvar column was already 
+        # a pandas Categorical. Now we check for this and act appropriately.
+        if isinstance(plot_data[self.__xvar].dtype, 
+                      pd.CategoricalDtype) is True:
+            plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
+            plot_data[self.__xvar].cat.reorder_categories(all_plot_groups, 
+                                                          ordered=True, 
+                                                          inplace=True)
+        else:
+            plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
+                                               categories=all_plot_groups,
+                                               ordered=True)
 
-        plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
-                                           categories=all_plot_groups,
-                                           ordered=True)
 
         self.__plot_data = plot_data
-
+        
         self.__all_plot_groups = all_plot_groups
 
+
         # Sanity check that all idxs are paired, if so desired.
         if paired is True:
             if id_col is None:

From 32e9a5600fb3662dffa8dc15b04b949165094ff1 Mon Sep 17 00:00:00 2001
From: "Joses W. Ho" <joseshowh@gmail.com>
Date: Mon, 6 May 2019 18:22:59 +0800
Subject: [PATCH 3/5] update docs

---
 docs/source/index.rst         | 31 ++++++++-----------------------
 docs/source/release-notes.rst |  6 ++++++
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 72c28a34..f79535d2 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -9,7 +9,7 @@ DABEST
 -----------------------------------------------
 Data Analysis with Bootstrap-coupled ESTimation
 -----------------------------------------------
-*version 0.2.2*
+*version 0.2.3*
 
 Analyze your data with estimation statistics!
 ---------------------------------------------
@@ -19,31 +19,16 @@ Analyze your data with estimation statistics!
 
 News
 ----
-April 2019:
-  - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. See the :doc:`release-notes`.
+May 2019:
+  - v0.2.3 released. This is a fix for a bug that did not properly handle x-columns which were pandas Categorical objects. See the :doc:`release-notes`.
 
-March 2019:
-  - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`.
+April 2019:
+  - v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero.
   
-  - Release of v0.2.0. This is a major update that makes several breaking changes to the API. 
+March 2019:
+  - v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting.
+  - v0.2.0 released. This is a major update that makes several breaking changes to the API. 
   
-January 2019:
-  - Release of v0.1.7. Added `cumming_vertical_spacing` option.
-
-October 2018:
-  - Release of v0.1.6. Added more keywords for control of plot elements.
-
-July 2018:
- - Release of v0.1.5. *bugfix for setup and package management*
- - Release of v0.1.4.
-
-June 2018:
-  - Release of v0.1.3. Also added a short tutorial for dabest in R.
-
-December 2017:
-  - We have made a `webapp <https://www.estimationstats.com>`_ that produces Gardner-Altman and Cumming plots!
-
-
 Contents
 --------
 
diff --git a/docs/source/release-notes.rst b/docs/source/release-notes.rst
index 4c34ab10..821e2a83 100644
--- a/docs/source/release-notes.rst
+++ b/docs/source/release-notes.rst
@@ -4,6 +4,12 @@
 Release Notes
 =============
 
+v0.2.2
+------
+
+This release fixes a bug that did not handle when the supplied ``x`` was a :py:mod:`pandas` :py:class:`Categorical` object, but the ``idx`` did not include all the original categories.
+
+
 v0.2.2
 ------
 

From e97b27bf20c83d5105f09e67336b153d5465327c Mon Sep 17 00:00:00 2001
From: "Joses W. Ho" <joseshowh@gmail.com>
Date: Tue, 7 May 2019 10:50:45 +0800
Subject: [PATCH 4/5] tweak ylim setting when base_mean is 0

---
 dabest/tests/test_02_plotting.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/dabest/tests/test_02_plotting.py b/dabest/tests/test_02_plotting.py
index 99ab8eb8..6459e583 100644
--- a/dabest/tests/test_02_plotting.py
+++ b/dabest/tests/test_02_plotting.py
@@ -79,7 +79,13 @@ def test_cummings_unpaired():
 
     rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1),
                        np.random.uniform(base_mean, base_mean+10, 1))
-    rand_contrast_ylim = (-base_mean/3, base_mean/3)
+                       
+    if base_mean == 0:
+        # Have to set the contrast ylim, because the way I dynamically generate
+        # the contrast ylims will flunk out with base_mean = 0.
+        rand_contrast_ylim = (-0.5, 0.5)
+    else:
+        rand_contrast_ylim = (-base_mean/3, base_mean/3)
 
     f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim,
                                               contrast_ylim=rand_contrast_ylim,
@@ -89,18 +95,12 @@ def test_cummings_unpaired():
     rawswarm_axes = f1.axes[0]
     contrast_axes = f1.axes[1]
 
-    # Check ylims match the desired ones.
+    # Check swarm ylims match the desired ones.
     assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0])
     assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1])
-    
-    # This needs to be rounded, because if the base mean is 0,
-    # the ylim might be -0.001, which will not match 0.
-    if base_mean == 0:
-        ylim_low = np.round(contrast_axes.get_ylim()[0])
-    else:
-        ylim_low = contrast_axes.get_ylim()[0]
-    assert ylim_low == pytest.approx(rand_contrast_ylim[0])
-    
+
+    # Check contrast ylims match the desired ones.
+    assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0])
     assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1])
 
     # Check xtick labels.

From a4693f5098d43c9fd3a3a1c5bbaadb9b5edf155d Mon Sep 17 00:00:00 2001
From: "Joses W. Ho" <joseshowh@gmail.com>
Date: Tue, 7 May 2019 10:57:59 +0800
Subject: [PATCH 5/5] improve variable names

---
 dabest/tests/test_03_confint.py | 39 +++++++++++++++------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/dabest/tests/test_03_confint.py b/dabest/tests/test_03_confint.py
index bbdc5404..0d47d939 100644
--- a/dabest/tests/test_03_confint.py
+++ b/dabest/tests/test_03_confint.py
@@ -12,21 +12,20 @@
 
 
 
-def test_unpaired_ci(reps=50, ci=95):
-    n = 10
-    N = 10000
-
-
-
-    # Create data for hedges g and cohens d
+def test_unpaired_ci(reps=40, ci=95):
+    
+    POPULATION_N = 10000
+    SAMPLE_N = 10
+    
+    # Create data for hedges g and cohens d.
     CONTROL_MEAN = np.random.randint(1, 1000)
     POP_SD       = np.random.randint(1, 15)
     POP_D        = np.round(np.random.uniform(-2, 2, 1)[0], 2)
 
     TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)
-    norm_rvs_kwargs = dict(scale=POP_SD, size=n)
-    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
-    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_rvs_kwargs)
+    norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)
+    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
+    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)
 
     std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})
 
@@ -36,10 +35,9 @@ def test_unpaired_ci(reps=50, ci=95):
     CONTROL_MEAN = np.random.randint(1, 1000)
     POP_SD       = np.random.randint(1, 15)
     TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)
-
-    norm_rvs_kwargs = dict(scale=POP_SD, size=n)
-    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
-    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_rvs_kwargs)
+    
+    c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
+    t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)
 
     mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})
 
@@ -49,11 +47,11 @@ def test_unpaired_ci(reps=50, ci=95):
     MEDIAN_DIFFERENCE = np.random.randint(-5, 5)
     A = np.random.randint(-7, 7)
 
-    skew_kwargs = dict(a=A, scale=5, size=N)
+    skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)
     skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)
     skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)
 
-    sample_kwargs = dict(size=n, replace=False)
+    sample_kwargs = dict(replace=False, size=SAMPLE_N)
     skewsample1 = np.random.choice(skewpop1, **sample_kwargs)
     skewsample2 = np.random.choice(skewpop2, **sample_kwargs)
 
@@ -65,13 +63,11 @@ def test_unpaired_ci(reps=50, ci=95):
     CD_DIFFERENCE = np.random.randint(1, 10)
     SD = np.abs(CD_DIFFERENCE)
 
-    N = 10000
-    pop_kwargs = dict(scale=SD, size=N)
+    pop_kwargs = dict(scale=SD, size=POPULATION_N)
     pop1 = norm.rvs(loc=100, **pop_kwargs)
     pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)
 
-    n = 20
-    sample_kwargs = dict(size=n, replace=False)
+    sample_kwargs = dict(replace=False, size=SAMPLE_N)
     sample1 = np.random.choice(pop1, **sample_kwargs)
     sample2 = np.random.choice(pop2, **sample_kwargs)
 
@@ -129,7 +125,8 @@ def test_unpaired_ci(reps=50, ci=95):
             error_count_cliffs_delta += 1
 
 
-    max_errors = reps * (100 - ci) / 100
+    max_errors = int(np.ceil(reps * (100 - ci) / 100))
+
     assert error_count_cohens_d     <= max_errors
     assert error_count_hedges_g     <= max_errors
     assert error_count_mean_diff    <= max_errors