Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 59 additions & 30 deletions pymc/gp/gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,26 @@
__all__ = ["Latent", "Marginal", "TP", "MarginalApprox", "LatentKron", "MarginalKron"]


_noise_deprecation_warning = (
"The 'noise' parameter has been been changed to 'sigma' "
"in order to standardize the GP API and will be "
"deprecated in future releases."
)


def _handle_sigma_noise_parameters(sigma, noise):
"""Helper function for transition of 'noise' parameter to be named 'sigma'."""

if (sigma is None and noise is None) or (sigma is not None and noise is not None):
raise ValueError("'sigma' argument must be specified.")
Comment on lines +50 to +51
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, just want to understand the logic: If sigma is not None and noise is not None, should we raise this error, as we already specified sigma in this case?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe, the logic here could be:

if `sigma is not None`: good
else:
      if `noise is not None`: raise warning
      else: raise error.

OR

if `sigma is None`:
   if `noise is None`: raise error
   else: raise warning

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If both sigma and noise are specified, it is not really wrong, right? :) The user just provides more inputs than what is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hear what you are staying but thought it might be fine to nudge them toward the new input. Not likely for both to be used, but thought being explicit about the intended behavior would be best instead of throwing away or checking the two parameters silently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the user passes in sigma=0.1 and noise=0.2, it's not really possible from the outside to know which value is going to be used in the model. The user must be a bit confused then, and an error message would help them out. Both your alternatives for doing the logic make sense to me too though @danhphan, but I think it also makes sense how it is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well said @bwengals That's what I had in mind.
Was trying to make it easier from the developer's point of view

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, thanks @wd60622 and @bwengals for the clarification. Total agree with "nudge users toward the new sigma" :)


if sigma is None:
warnings.warn(_noise_deprecation_warning, FutureWarning)
return noise

return sigma


class Base:
R"""
Base class.
Expand Down Expand Up @@ -218,7 +238,7 @@ def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
Xnew: array-like
Function input values.
given: dict
Can optionally take as key value pairs: `X`, `y`, `noise`,
Can optionally take as key value pairs: `X`, `y`,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to replace noise by sigma here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was for the latent class that doesn't use noise. I believe this was a mistake to begin with. Let me know if I am wrong

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes @wd60622 you are right! thanks to you both for catching this I didn't notice it....

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thanks for pointing it out :)

and `gp`. See the section in the documentation on additive GP
models in PyMC for more information.
jitter: scalar
Expand Down Expand Up @@ -359,7 +379,7 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs)


@conditioned_vars(["X", "y", "noise"])
@conditioned_vars(["X", "y", "sigma"])
class Marginal(Base):
R"""
Marginal Gaussian process.
Expand Down Expand Up @@ -393,7 +413,7 @@ class Marginal(Base):

# Place a GP prior over the function f.
sigma = pm.HalfCauchy("sigma", beta=3)
y_ = gp.marginal_likelihood("y", X=X, y=y, noise=sigma)
y_ = gp.marginal_likelihood("y", X=X, y=y, sigma=sigma)

...

Expand All @@ -405,15 +425,15 @@ class Marginal(Base):
fcond = gp.conditional("fcond", Xnew=Xnew)
"""

def _build_marginal_likelihood(self, X, noise, jitter):
def _build_marginal_likelihood(self, X, noise_func, jitter):
mu = self.mean_func(X)
Kxx = self.cov_func(X)
Knx = noise(X)
Knx = noise_func(X)
cov = Kxx + Knx
return mu, stabilize(cov, jitter)

def marginal_likelihood(
self, name, X, y, noise, jitter=JITTER_DEFAULT, is_observed=True, **kwargs
self, name, X, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, is_observed=True, **kwargs
):
R"""
Returns the marginal likelihood distribution, given the input
Expand All @@ -435,23 +455,25 @@ def marginal_likelihood(
y: array-like
Data that is the sum of the function with the GP prior and Gaussian
noise. Must have shape `(n, )`.
noise: scalar, Variable, or Covariance
sigma: scalar, Variable, or Covariance
Standard deviation of the Gaussian noise. Can also be a Covariance for
non-white noise.
noise: scalar, Variable, or Covariance
Previous parameterization of `sigma`.
jitter: scalar
A small correction added to the diagonal of positive semi-definite
covariance matrices to ensure numerical stability.
**kwargs
Extra keyword arguments that are passed to `MvNormal` distribution
constructor.
"""
sigma = _handle_sigma_noise_parameters(sigma=sigma, noise=noise)

if not isinstance(noise, Covariance):
noise = pm.gp.cov.WhiteNoise(noise)
mu, cov = self._build_marginal_likelihood(X, noise, jitter)
noise_func = sigma if isinstance(sigma, Covariance) else pm.gp.cov.WhiteNoise(sigma)
mu, cov = self._build_marginal_likelihood(X=X, noise_func=noise_func, jitter=jitter)
self.X = X
self.y = y
self.noise = noise
self.sigma = noise_func
if is_observed:
return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs)
else:
Expand All @@ -472,20 +494,24 @@ def _get_given_vals(self, given):
else:
cov_total = self.cov_func
mean_total = self.mean_func
if all(val in given for val in ["X", "y", "noise"]):
X, y, noise = given["X"], given["y"], given["noise"]
if not isinstance(noise, Covariance):
noise = pm.gp.cov.WhiteNoise(noise)

if "noise" in given:
warnings.warn(_noise_deprecation_warning, FutureWarning)
given["sigma"] = given["noise"]

if all(val in given for val in ["X", "y", "sigma"]):
X, y, sigma = given["X"], given["y"], given["sigma"]
noise_func = sigma if isinstance(sigma, Covariance) else pm.gp.cov.WhiteNoise(sigma)
else:
X, y, noise = self.X, self.y, self.noise
return X, y, noise, cov_total, mean_total
X, y, noise_func = self.X, self.y, self.sigma
return X, y, noise_func, cov_total, mean_total

def _build_conditional(
self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter
self, Xnew, pred_noise, diag, X, y, noise_func, cov_total, mean_total, jitter
):
Kxx = cov_total(X)
Kxs = self.cov_func(X, Xnew)
Knx = noise(X)
Knx = noise_func(X)
rxx = y - mean_total(X)
L = cholesky(stabilize(Kxx, jitter) + Knx)
A = solve_lower(L, Kxs)
Expand All @@ -495,13 +521,13 @@ def _build_conditional(
Kss = self.cov_func(Xnew, diag=True)
var = Kss - at.sum(at.square(A), 0)
if pred_noise:
var += noise(Xnew, diag=True)
var += noise_func(Xnew, diag=True)
return mu, var
else:
Kss = self.cov_func(Xnew)
cov = Kss - at.dot(at.transpose(A), A)
if pred_noise:
cov += noise(Xnew)
cov += noise_func(Xnew)
return mu, cov if pred_noise else stabilize(cov, jitter)

def conditional(
Expand Down Expand Up @@ -531,7 +557,7 @@ def conditional(
Whether or not observation noise is included in the conditional.
Default is `False`.
given: dict
Can optionally take as key value pairs: `X`, `y`, `noise`,
Can optionally take as key value pairs: `X`, `y`, `sigma`,
and `gp`. See the section in the documentation on additive GP
models in PyMC for more information.
jitter: scalar
Expand Down Expand Up @@ -720,7 +746,9 @@ def _build_marginal_likelihood_loglik(self, y, X, Xu, sigma, jitter):
quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c))
return -1.0 * (constant + logdet + quadratic + trace)

def marginal_likelihood(self, name, X, Xu, y, noise=None, jitter=JITTER_DEFAULT, **kwargs):
def marginal_likelihood(
self, name, X, Xu, y, sigma=None, noise=None, jitter=JITTER_DEFAULT, **kwargs
):
R"""
Returns the approximate marginal likelihood distribution, given the input
locations `X`, inducing point locations `Xu`, data `y`, and white noise
Expand All @@ -738,8 +766,10 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, jitter=JITTER_DEFAULT,
y: array-like
Data that is the sum of the function with the GP prior and Gaussian
noise. Must have shape `(n, )`.
noise: scalar, Variable
sigma: scalar, Variable
Standard deviation of the Gaussian noise.
noise: scalar, Variable
Previous parameterization of `sigma`
jitter: scalar
A small correction added to the diagonal of positive semi-definite
covariance matrices to ensure numerical stability.
Expand All @@ -752,12 +782,11 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, jitter=JITTER_DEFAULT,
self.Xu = Xu
self.y = y

if noise is None:
raise ValueError("noise argument must be specified")
else:
self.sigma = noise
self.sigma = _handle_sigma_noise_parameters(sigma=sigma, noise=noise)

approx_loglik = self._build_marginal_likelihood_loglik(y, X, Xu, noise, jitter)
approx_loglik = self._build_marginal_likelihood_loglik(
y=self.y, X=self.X, Xu=self.Xu, sigma=self.sigma, jitter=jitter
)
pm.Potential(f"marginalapprox_loglik_{name}", approx_loglik, **kwargs)

def _build_conditional(
Expand Down Expand Up @@ -828,7 +857,7 @@ def conditional(
Whether or not observation noise is included in the conditional.
Default is `False`.
given: dict
Can optionally take as key value pairs: `X`, `Xu`, `y`, `noise`,
Can optionally take as key value pairs: `X`, `Xu`, `y`, `sigma`,
and `gp`. See the section in the documentation on additive GP
models in PyMC for more information.
jitter: scalar
Expand Down
88 changes: 80 additions & 8 deletions pymc/tests/gp/test_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,78 @@
from pymc.math import cartesian


class TestSigmaParams:
def setup_method(self):
"""Common setup."""
self.x = np.linspace(-5, 5, 30)[:, None]
self.xu = np.linspace(-5, 5, 10)[:, None]
self.y = np.random.normal(0.25 * self.x, 0.1)

with pm.Model() as self.model:
cov_func = pm.gp.cov.Linear(1, c=0.0)
c = pm.Normal("c", mu=20.0, sigma=100.0)
mean_func = pm.gp.mean.Constant(c)
self.gp = self.gp_implementation(mean_func=mean_func, cov_func=cov_func)
self.sigma = pm.HalfNormal("sigma", sigma=100)


class TestMarginalSigmaParams(TestSigmaParams):
R"""Tests for the deprecation warnings and raising ValueError."""

gp_implementation = pm.gp.Marginal

def test_catch_warnings(self):
"""Warning from using the old noise parameter."""
with self.model:
with pytest.warns(FutureWarning):
self.gp.marginal_likelihood("lik_noise", X=self.x, y=self.y, noise=self.sigma)

with pytest.warns(FutureWarning):
self.gp.conditional(
"cond_noise",
Xnew=self.x,
given={
"noise": self.sigma,
},
)

def test_raise_value_error(self):
"""Either both or neither parameter is specified."""
with self.model:
with pytest.raises(ValueError):
self.gp.marginal_likelihood(
"like_both", X=self.x, y=self.y, noise=self.sigma, sigma=self.sigma
)

with pytest.raises(ValueError):
self.gp.marginal_likelihood("like_neither", X=self.x, y=self.y)


class TestMarginalApproxSigmaParams(TestSigmaParams):
R"""Tests for the deprecation warnings and raising ValueError"""

gp_implementation = pm.gp.MarginalApprox

def test_catch_warnings(self):
"""Warning from using the old noise parameter."""
with self.model:
with pytest.warns(FutureWarning):
self.gp.marginal_likelihood(
"lik_noise", X=self.x, Xu=self.xu, y=self.y, noise=self.sigma
)

def test_raise_value_error(self):
"""Either both or neither parameter is specified."""
with self.model:
with pytest.raises(ValueError):
self.gp.marginal_likelihood(
"like_both", X=self.x, Xu=self.xu, y=self.y, noise=self.sigma, sigma=self.sigma
)

with pytest.raises(ValueError):
self.gp.marginal_likelihood("like_neither", X=self.x, Xu=self.xu, y=self.y)


class TestMarginalVsMarginalApprox:
R"""
Compare test fits of models Marginal and MarginalApprox.
Expand Down Expand Up @@ -113,20 +185,20 @@ def testAdditiveMarginal(self):
gp3 = pm.gp.Marginal(mean_func=self.means[2], cov_func=self.covs[2])

gpsum = gp1 + gp2 + gp3
fsum = gpsum.marginal_likelihood("f", self.X, self.y, noise=self.noise)
fsum = gpsum.marginal_likelihood("f", self.X, self.y, sigma=self.noise)
model1_logp = model1.compile_logp()({})

with pm.Model() as model2:
gptot = pm.gp.Marginal(
mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)
)
fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise)
fsum = gptot.marginal_likelihood("f", self.X, self.y, sigma=self.noise)
model2_logp = model2.compile_logp()({})
npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)

with model1:
fp1 = gpsum.conditional(
"fp1", self.Xnew, given={"X": self.X, "y": self.y, "noise": self.noise, "gp": gpsum}
"fp1", self.Xnew, given={"X": self.X, "y": self.y, "sigma": self.noise, "gp": gpsum}
)
with model2:
fp2 = gptot.conditional("fp2", self.Xnew)
Expand All @@ -152,14 +224,14 @@ def testAdditiveMarginalApprox(self, approx):
)

gpsum = gp1 + gp2 + gp3
fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, sigma=sigma)
model1_logp = model1.compile_logp()({})

with pm.Model() as model2:
gptot = pm.gp.MarginalApprox(
mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs), approx=approx
)
fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, sigma=sigma)
model2_logp = model2.compile_logp()({})
npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)

Expand Down Expand Up @@ -233,7 +305,7 @@ def testAdditiveTypeRaises2(self):

class TestMarginalVsLatent:
R"""
Compare the logp of models Marginal, noise=0 and Latent.
Compare the logp of models Marginal, sigma=0 and Latent.
"""

def setup_method(self):
Expand All @@ -245,7 +317,7 @@ def setup_method(self):
cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
mean_func = pm.gp.mean.Constant(0.5)
gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
f = gp.marginal_likelihood("f", X, y, noise=0.0)
f = gp.marginal_likelihood("f", X, y, sigma=0.0)
p = gp.conditional("p", Xnew)
self.logp = model.compile_logp()({"p": pnew})
self.X = X
Expand Down Expand Up @@ -422,7 +494,7 @@ def setup_method(self):
cov_func = pm.gp.cov.Kron(self.cov_funcs)
self.mean = pm.gp.mean.Constant(0.5)
gp = pm.gp.Marginal(mean_func=self.mean, cov_func=cov_func)
f = gp.marginal_likelihood("f", self.X, self.y, noise=self.sigma)
f = gp.marginal_likelihood("f", self.X, self.y, sigma=self.sigma)
p = gp.conditional("p", self.Xnew)
self.mu, self.cov = gp.predict(self.Xnew)
self.logp = model.compile_logp()({"p": self.pnew})
Expand Down