Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dabest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
from ._stats_tools import effsize as effsize
from ._classes import TwoGroupsEffectSize

__version__ = "0.2.2"
__version__ = "0.2.3"
20 changes: 16 additions & 4 deletions dabest/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,27 @@ def __init__(self, data, idx, x, y, paired, id_col, ci, resamples,
value_vars=all_plot_groups,
value_name=self.__yvar,
var_name=self.__xvar)

# Lines 131 to 140 added in v0.2.3.
# Fixes a bug that jammed up when the xvar column was already
# a pandas Categorical. Now we check for this and act appropriately.
if isinstance(plot_data[self.__xvar].dtype,
pd.CategoricalDtype) is True:
plot_data[self.__xvar].cat.remove_unused_categories(inplace=True)
plot_data[self.__xvar].cat.reorder_categories(all_plot_groups,
ordered=True,
inplace=True)
else:
plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
categories=all_plot_groups,
ordered=True)

plot_data.loc[:, self.__xvar] = pd.Categorical(plot_data[self.__xvar],
categories=all_plot_groups,
ordered=True)

self.__plot_data = plot_data

self.__all_plot_groups = all_plot_groups


# Sanity check that all idxs are paired, if so desired.
if paired is True:
if id_col is None:
Expand Down
22 changes: 11 additions & 11 deletions dabest/tests/test_02_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,13 @@ def test_cummings_unpaired():

rand_swarm_ylim = (np.random.uniform(base_mean-10, base_mean, 1),
np.random.uniform(base_mean, base_mean+10, 1))
rand_contrast_ylim = (-base_mean/3, base_mean/3)

if base_mean == 0:
# Have to set the contrast ylim, because the way I dynamically generate
# the contrast ylims will flunk out with base_mean = 0.
rand_contrast_ylim = (-0.5, 0.5)
else:
rand_contrast_ylim = (-base_mean/3, base_mean/3)

f1 = multi_2group_unpaired.mean_diff.plot(swarm_ylim=rand_swarm_ylim,
contrast_ylim=rand_contrast_ylim,
Expand All @@ -89,18 +95,12 @@ def test_cummings_unpaired():
rawswarm_axes = f1.axes[0]
contrast_axes = f1.axes[1]

# Check ylims match the desired ones.
# Check swarm ylims match the desired ones.
assert rawswarm_axes.get_ylim()[0] == pytest.approx(rand_swarm_ylim[0])
assert rawswarm_axes.get_ylim()[1] == pytest.approx(rand_swarm_ylim[1])

# This needs to be rounded, because if the base mean is 0,
# the ylim might be -0.001, which will not match 0.
if base_mean == 0:
ylim_low = np.round(contrast_axes.get_ylim()[0])
else:
ylim_low = contrast_axes.get_ylim()[0]
assert ylim_low == pytest.approx(rand_contrast_ylim[0])


# Check contrast ylims match the desired ones.
assert contrast_axes.get_ylim()[0] == pytest.approx(rand_contrast_ylim[0])
assert contrast_axes.get_ylim()[1] == pytest.approx(rand_contrast_ylim[1])

# Check xtick labels.
Expand Down
39 changes: 18 additions & 21 deletions dabest/tests/test_03_confint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,20 @@



def test_unpaired_ci(reps=50, ci=95):
n = 10
N = 10000



# Create data for hedges g and cohens d
def test_unpaired_ci(reps=40, ci=95):

POPULATION_N = 10000
SAMPLE_N = 10

# Create data for hedges g and cohens d.
CONTROL_MEAN = np.random.randint(1, 1000)
POP_SD = np.random.randint(1, 15)
POP_D = np.round(np.random.uniform(-2, 2, 1)[0], 2)

TRUE_STD_DIFFERENCE = CONTROL_MEAN + (POP_D * POP_SD)
norm_rvs_kwargs = dict(scale=POP_SD, size=n)
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_rvs_kwargs)
norm_sample_kwargs = dict(scale=POP_SD, size=SAMPLE_N)
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_STD_DIFFERENCE, **norm_sample_kwargs)

std_diff_df = pd.DataFrame({'Control' : c1, 'Test': t1})

Expand All @@ -36,10 +35,9 @@ def test_unpaired_ci(reps=50, ci=95):
CONTROL_MEAN = np.random.randint(1, 1000)
POP_SD = np.random.randint(1, 15)
TRUE_DIFFERENCE = np.random.randint(-POP_SD*5, POP_SD*5)

norm_rvs_kwargs = dict(scale=POP_SD, size=n)
c1 = norm.rvs(loc=CONTROL_MEAN, **norm_rvs_kwargs)
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_rvs_kwargs)

c1 = norm.rvs(loc=CONTROL_MEAN, **norm_sample_kwargs)
t1 = norm.rvs(loc=CONTROL_MEAN+TRUE_DIFFERENCE, **norm_sample_kwargs)

mean_df = pd.DataFrame({'Control' : c1, 'Test': t1})

Expand All @@ -49,11 +47,11 @@ def test_unpaired_ci(reps=50, ci=95):
MEDIAN_DIFFERENCE = np.random.randint(-5, 5)
A = np.random.randint(-7, 7)

skew_kwargs = dict(a=A, scale=5, size=N)
skew_kwargs = dict(a=A, scale=5, size=POPULATION_N)
skewpop1 = skewnorm.rvs(**skew_kwargs, loc=100)
skewpop2 = skewnorm.rvs(**skew_kwargs, loc=100+MEDIAN_DIFFERENCE)

sample_kwargs = dict(size=n, replace=False)
sample_kwargs = dict(replace=False, size=SAMPLE_N)
skewsample1 = np.random.choice(skewpop1, **sample_kwargs)
skewsample2 = np.random.choice(skewpop2, **sample_kwargs)

Expand All @@ -65,13 +63,11 @@ def test_unpaired_ci(reps=50, ci=95):
CD_DIFFERENCE = np.random.randint(1, 10)
SD = np.abs(CD_DIFFERENCE)

N = 10000
pop_kwargs = dict(scale=SD, size=N)
pop_kwargs = dict(scale=SD, size=POPULATION_N)
pop1 = norm.rvs(loc=100, **pop_kwargs)
pop2 = norm.rvs(loc=100+CD_DIFFERENCE, **pop_kwargs)

n = 20
sample_kwargs = dict(size=n, replace=False)
sample_kwargs = dict(replace=False, size=SAMPLE_N)
sample1 = np.random.choice(pop1, **sample_kwargs)
sample2 = np.random.choice(pop2, **sample_kwargs)

Expand Down Expand Up @@ -129,7 +125,8 @@ def test_unpaired_ci(reps=50, ci=95):
error_count_cliffs_delta += 1


max_errors = reps * (100 - ci) / 100
max_errors = int(np.ceil(reps * (100 - ci) / 100))

assert error_count_cohens_d <= max_errors
assert error_count_hedges_g <= max_errors
assert error_count_mean_diff <= max_errors
Expand Down
31 changes: 8 additions & 23 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ DABEST
-----------------------------------------------
Data Analysis with Bootstrap-coupled ESTimation
-----------------------------------------------
*version 0.2.2*
*version 0.2.3*

Analyze your data with estimation statistics!
---------------------------------------------
Expand All @@ -19,31 +19,16 @@ Analyze your data with estimation statistics!

News
----
April 2019:
- v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero. See the :doc:`release-notes`.
May 2019:
- v0.2.3 released. This is a fix for a bug that did not properly handle x-columns which were pandas Categorical objects. See the :doc:`release-notes`.

March 2019:
- v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting. See the :doc:`release-notes`.
April 2019:
- v0.2.2 released. This is a minor bugfix that addressed an issue for an edge case where the mean or median difference was exactly zero.

- Release of v0.2.0. This is a major update that makes several breaking changes to the API.
March 2019:
- v0.2.1 released. This is a minor bugfix that addressed an issue in gapped line plotting.
- v0.2.0 released. This is a major update that makes several breaking changes to the API.

January 2019:
- Release of v0.1.7. Added `cumming_vertical_spacing` option.

October 2018:
- Release of v0.1.6. Added more keywords for control of plot elements.

July 2018:
- Release of v0.1.5. *bugfix for setup and package management*
- Release of v0.1.4.

June 2018:
- Release of v0.1.3. Also added a short tutorial for dabest in R.

December 2017:
- We have made a `webapp <https://www.estimationstats.com>`_ that produces Gardner-Altman and Cumming plots!


Contents
--------

Expand Down
6 changes: 6 additions & 0 deletions docs/source/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
Release Notes
=============

v0.2.2
------

This release fixes a bug that did not handle when the supplied ``x`` was a :py:mod:`pandas` :py:class:`Categorical` object, but the ``idx`` did not include all the original categories.


v0.2.2
------

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def check_dependencies():
author_email='[email protected]',
maintainer='Joses W. Ho',
maintainer_email='[email protected]',
version='0.2.2',
version='0.2.3',
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
packages=find_packages(),
Expand Down