Module ehfheatwaves.qtiler
Contains functions that calculate quantiles using various interpolation methods.
@author: Tammas Loughran
Expand source code
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""
Contains functions that calculate quantiles using various interpolation methods.
@author: Tammas Loughran
"""
import math
import sys
import numpy as np
class InvalidPercentileError(Exception):
"""Exception to be raised if the provided percentile is invalid."""
def __init__(self, p):
print(p, " is not a valid fraction value.")
def quantile_R(
x:np.ndarray,
p:float,
itype:int=7,
fraction:bool=False,
rmnans:bool=False,
)->float:
"""Quantile function used in R.
Calculates quantiles in the same way as the quantile function in R.
There are nine interpolation methods. Type 1-3 are discrete methods and
4-9 are continuous methods. 4 is a basic linear interpolation. 5 is the
default method used in Matlab. 7 is the default used in R and Python.
See: Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
packages, American Statistician, 50, 361-365.
## Arguments:
- x : Input value.
- p : Percentile.
- itype : interpolation method to use.
- fraction : boolean indicates if percentile is a fraction 0<p<1.0.
- rmnans : boolean indicates whether or not to remove nans.
## Returns:
- q : quantile at pth percentile
"""
# Convert the percentile to a fraction
if not fraction:
p = p/100.0
if (p>1) or (p<0): raise InvalidPercentileError(p)
# Flatten the array
x = x.flatten()
# remove nans if desired
if (np.isnan(np.min(x)))&(rmnans==True):
x = x[np.logical_not(np.isnan(x))]
elif (np.isnan(np.min(x)))&(rmnans==False):
raise Exception('You must not have nans in percentile calculation')
if (p==1): return max(x)
# Get number of samples
n = len(x)
# Sort
x = np.sort(x)
# Switch case functions for interpolation type
def one(x, p, n):
m = 0.0
j = int(np.floor(p*n + m))
g = n*p + m - j
if g==0:
gamma = 0.
else:
gamma = 1.
return (1. - gamma)*x[j-1] + gamma*x[j]
def two(x, p, n):
m = 0.0
j = int(np.floor(p*n + m))
g = n*p + m - j
if g==0:
gamma = 0.5
else:
gamma = 1.
return (1. - gamma)*x[j-1] + gamma*x[j]
def three(x, p, n):
m = -0.5
j = int(np.floor(p*n + m))
g = n*p + m - j
if (g==0)&(j%2==0):
gamma = 0.
else:
gamma = 1.
return (1. - gamma)*x[j-1] + gamma*x[j]
def four(x, p, n):
m = 0.0
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
def five(x, p, n):
m = 0.5
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
def six(x, p, n):
m = p
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
def seven(x, p, n):
m = 1.0 - p
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
def eight(x, p, n):
m = (p + 1)/3.
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
def nine(x, p, n):
m = p/4. + 3./8.
j = int(np.floor(p*n + m))
gamma = n*p + m - j
return (1. - gamma)*x[j-1] + gamma*x[j]
switcher = {
1: one,
2: two,
3: three,
4: four,
5: five,
6: six,
7: seven,
8: eight,
9: nine,
}
return switcher[itype](x, p, n)
def quantile_zhang(y:np.ndarray, p:float, fraction:bool=False, rmnans:bool=False)->float:
"""Caclulate the `p`th percentile value of an array `y` using the Zhang method.
The linear interpolation method used is outlined in Zhang et al., 2005,
Avoiding Inhomogeneity in Percentile-Based Indices of Temperature Extremes,
Journal of Climate, vol. 18.
The interpolation is given as:
$$ Q_p = (1 - f)*y_j + f*y_j + 1 $$
$$ p<1/(n + 1) $$ are set to the smallest value in `y`.
$$ p>n/(n + 1) $$ are set to the largest value in `y`.
## Arguments:
- y : Input array.
- p : pth percentile.
- fraction : boolean indicates if percentile is a fraction 0<p<1.
- rmnans : boolean indicates whether or not to remove nans
## Returns:
- Qp : qualtile of pth percentile
"""
# Convert the percentile to a fraction
if not fraction:
p = p/100.0
if (p>1) or (p<0): raise InvalidPercentileError(p)
# Flatten array
#y = y.flatten()
if (np.isnan(np.min(y)))&(rmnans==True):
y = y[np.logical_not(np.isnan(y))]
elif (np.isnan(np.min(y)))&(rmnans==False):
raise Exception('You must not have nans in percentile calculation')
n = y.shape[0]
# j is the largest integer no greater than (p*(n+1))
j = math.floor(p*(n + 1))
# f is the interpolation factor
f = p*(n + 1) - j
def intrpl(y, j):
if j>=n:
return y[-1]
elif j<1:
return y[0]
else:
yj = y[j-1]
yjp = y[j]
return (1 - f)*yj + f*yjp
if y.ndim==1:
y = np.sort(y)
Qp = intrpl(y, j)
else:
spacedim = np.array(y.shape)[1:].prod()
oldshape = y.shape[1:]
y = y.reshape(y.shape[0], spacedim)
Qp = np.ones(spacedim)*np.nan
for lat in range(spacedim):
array = np.sort(y[:,lat])
Qp[lat] = intrpl(array, j)
Qp = Qp.reshape(oldshape)
return Qp
def quantile_zhang_fast(x:np.ndarray, q:float, fraction:bool=False, rmnans:bool=False)->float:
"""A faster version of quantile_zhang. Thanks goes to Mathias Hauser for this.
## Arguments:
- y : Input array.
- p : pth percentile.
- fraction : boolean indicates if percentile is a fraction 0<p<1.
- rmnans : boolean indicates whether or not to remove nans
## Returns:
- Qp : qualtile of pth percentile
"""
n = x.shape[0]
# Convert the percentile to a fraction
if fraction:
q = q*100.0
if (q<0) or (q>100): raise InvalidPercentileError(q)
if q==100.0: return max(x)
elif q==0.0: return min(x)
# adjust p such that np.percentile returns the same as quantile_zhang
# numpy uses (n -1); quantile_zhang uses (n + 1)
q_adj = q*(n + 1.0)/(n - 1.0)
# numpy uses indices j and j+1; quantile_zhang uses j-1 and j
q_adj -= 1.0*100.0/(n - 1)
# ensure q_adj is in 0..100.
q_adj = np.clip(q_adj, 0.0, 100.0)
return np.percentile(x, q_adj, axis=0)
def quantile_climpact(y:np.ndarray, p:float, fraction:bool=False)->float:
"""Quantile function used by Climpact.
Copy of the c_quantile function used in climpact.
I have no idea where the interpolation is from.
"""
if not fraction:
p = p/100.
if (p>1) or (p<0): raise InvalidPercentileError(p)
y = np.array(y)
if y.ndim==1:
return qclimpact(y, p)
else:
spacedim = y.shape[1]
nodims = y.ndim
if nodims>2:
spacedim = np.array(y.shape)[1:].prod()
oldshape = y.shape[1:]
y = y.reshape(y.shape[0], spacedim)
Qp = np.ones(spacedim)*np.nan
for lat in range(spacedim):
if np.isnan(y[:,lat]).all(): continue
Qp[lat] = qclimpact(y[:,lat],p)
if nodims>2: Qp = Qp.reshape(oldshape)
return Qp
def qclimpact(y:np.ndarray, p:float)->float:
y = y[np.logical_not(np.isnan(y))]
n = y.shape[0]
a, b = 1.0/3.0, 1.0/3.0
nppm = a + p*(n + 1 - a - b) - 1
fuzz = 4*sys.float_info.epsilon
j = math.floor(nppm + fuzz)
if (abs(nppm - j)<=fuzz):
h = 0
else:
h = nppm - j
right_elem = max(0, min(int(j) + 1, n - 1))
left_elem = max(0, min(int(j), n - 1))
y = np.sort(y)
if h==1:
Qp = y[right_elem]
elif h==0:
Qp = y[left_elem]
else:
Qp = (1 - h)*y[left_elem] + h*y[right_elem]
return Qp
Functions
def qclimpact(y: numpy.ndarray, p: float) ‑> float
-
Expand source code
def qclimpact(y:np.ndarray, p:float)->float: y = y[np.logical_not(np.isnan(y))] n = y.shape[0] a, b = 1.0/3.0, 1.0/3.0 nppm = a + p*(n + 1 - a - b) - 1 fuzz = 4*sys.float_info.epsilon j = math.floor(nppm + fuzz) if (abs(nppm - j)<=fuzz): h = 0 else: h = nppm - j right_elem = max(0, min(int(j) + 1, n - 1)) left_elem = max(0, min(int(j), n - 1)) y = np.sort(y) if h==1: Qp = y[right_elem] elif h==0: Qp = y[left_elem] else: Qp = (1 - h)*y[left_elem] + h*y[right_elem] return Qp
def quantile_R(x: numpy.ndarray, p: float, itype: int = 7, fraction: bool = False, rmnans: bool = False) ‑> float
-
Quantile function used in R.
Calculates quantiles in the same way as the quantile function in R. There are nine interpolation methods. Type 1-3 are discrete methods and 4-9 are continuous methods. 4 is a basic linear interpolation. 5 is the default method used in Matlab. 7 is the default used in R and Python. See: Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical packages, American Statistician, 50, 361-365.
Arguments:
- x : Input value.
- p : Percentile.
- itype : interpolation method to use.
- fraction : boolean indicates if percentile is a fraction 0<p<1.0.
- rmnans : boolean indicates whether or not to remove nans.
Returns:
- q : quantile at pth percentile
Expand source code
def quantile_R( x:np.ndarray, p:float, itype:int=7, fraction:bool=False, rmnans:bool=False, )->float: """Quantile function used in R. Calculates quantiles in the same way as the quantile function in R. There are nine interpolation methods. Type 1-3 are discrete methods and 4-9 are continuous methods. 4 is a basic linear interpolation. 5 is the default method used in Matlab. 7 is the default used in R and Python. See: Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical packages, American Statistician, 50, 361-365. ## Arguments: - x : Input value. - p : Percentile. - itype : interpolation method to use. - fraction : boolean indicates if percentile is a fraction 0<p<1.0. - rmnans : boolean indicates whether or not to remove nans. ## Returns: - q : quantile at pth percentile """ # Convert the percentile to a fraction if not fraction: p = p/100.0 if (p>1) or (p<0): raise InvalidPercentileError(p) # Flatten the array x = x.flatten() # remove nans if desired if (np.isnan(np.min(x)))&(rmnans==True): x = x[np.logical_not(np.isnan(x))] elif (np.isnan(np.min(x)))&(rmnans==False): raise Exception('You must not have nans in percentile calculation') if (p==1): return max(x) # Get number of samples n = len(x) # Sort x = np.sort(x) # Switch case functions for interpolation type def one(x, p, n): m = 0.0 j = int(np.floor(p*n + m)) g = n*p + m - j if g==0: gamma = 0. else: gamma = 1. return (1. - gamma)*x[j-1] + gamma*x[j] def two(x, p, n): m = 0.0 j = int(np.floor(p*n + m)) g = n*p + m - j if g==0: gamma = 0.5 else: gamma = 1. return (1. - gamma)*x[j-1] + gamma*x[j] def three(x, p, n): m = -0.5 j = int(np.floor(p*n + m)) g = n*p + m - j if (g==0)&(j%2==0): gamma = 0. else: gamma = 1. return (1. - gamma)*x[j-1] + gamma*x[j] def four(x, p, n): m = 0.0 j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] def five(x, p, n): m = 0.5 j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] def six(x, p, n): m = p j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] def seven(x, p, n): m = 1.0 - p j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] def eight(x, p, n): m = (p + 1)/3. j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] def nine(x, p, n): m = p/4. + 3./8. j = int(np.floor(p*n + m)) gamma = n*p + m - j return (1. - gamma)*x[j-1] + gamma*x[j] switcher = { 1: one, 2: two, 3: three, 4: four, 5: five, 6: six, 7: seven, 8: eight, 9: nine, } return switcher[itype](x, p, n)
def quantile_climpact(y: numpy.ndarray, p: float, fraction: bool = False) ‑> float
-
Quantile function used by Climpact.
Copy of the c_quantile function used in climpact. I have no idea where the interpolation is from.
Expand source code
def quantile_climpact(y:np.ndarray, p:float, fraction:bool=False)->float: """Quantile function used by Climpact. Copy of the c_quantile function used in climpact. I have no idea where the interpolation is from. """ if not fraction: p = p/100. if (p>1) or (p<0): raise InvalidPercentileError(p) y = np.array(y) if y.ndim==1: return qclimpact(y, p) else: spacedim = y.shape[1] nodims = y.ndim if nodims>2: spacedim = np.array(y.shape)[1:].prod() oldshape = y.shape[1:] y = y.reshape(y.shape[0], spacedim) Qp = np.ones(spacedim)*np.nan for lat in range(spacedim): if np.isnan(y[:,lat]).all(): continue Qp[lat] = qclimpact(y[:,lat],p) if nodims>2: Qp = Qp.reshape(oldshape) return Qp
def quantile_zhang(y: numpy.ndarray, p: float, fraction: bool = False, rmnans: bool = False) ‑> float
-
Caclulate the
p
th percentile value of an arrayy
using the Zhang method.The linear interpolation method used is outlined in Zhang et al., 2005, Avoiding Inhomogeneity in Percentile-Based Indices of Temperature Extremes, Journal of Climate, vol. 18.
The interpolation is given as:
Q_p = (1 - f)*y_j + f*y_j + 1
p<1/(n + 1) are set to the smallest value in
y
.p>n/(n + 1) are set to the largest value in
y
.Arguments:
- y : Input array.
- p : pth percentile.
- fraction : boolean indicates if percentile is a fraction 0<p<1.
- rmnans : boolean indicates whether or not to remove nans
Returns:
- Qp : qualtile of pth percentile
Expand source code
def quantile_zhang(y:np.ndarray, p:float, fraction:bool=False, rmnans:bool=False)->float: """Caclulate the `p`th percentile value of an array `y` using the Zhang method. The linear interpolation method used is outlined in Zhang et al., 2005, Avoiding Inhomogeneity in Percentile-Based Indices of Temperature Extremes, Journal of Climate, vol. 18. The interpolation is given as: $$ Q_p = (1 - f)*y_j + f*y_j + 1 $$ $$ p<1/(n + 1) $$ are set to the smallest value in `y`. $$ p>n/(n + 1) $$ are set to the largest value in `y`. ## Arguments: - y : Input array. - p : pth percentile. - fraction : boolean indicates if percentile is a fraction 0<p<1. - rmnans : boolean indicates whether or not to remove nans ## Returns: - Qp : qualtile of pth percentile """ # Convert the percentile to a fraction if not fraction: p = p/100.0 if (p>1) or (p<0): raise InvalidPercentileError(p) # Flatten array #y = y.flatten() if (np.isnan(np.min(y)))&(rmnans==True): y = y[np.logical_not(np.isnan(y))] elif (np.isnan(np.min(y)))&(rmnans==False): raise Exception('You must not have nans in percentile calculation') n = y.shape[0] # j is the largest integer no greater than (p*(n+1)) j = math.floor(p*(n + 1)) # f is the interpolation factor f = p*(n + 1) - j def intrpl(y, j): if j>=n: return y[-1] elif j<1: return y[0] else: yj = y[j-1] yjp = y[j] return (1 - f)*yj + f*yjp if y.ndim==1: y = np.sort(y) Qp = intrpl(y, j) else: spacedim = np.array(y.shape)[1:].prod() oldshape = y.shape[1:] y = y.reshape(y.shape[0], spacedim) Qp = np.ones(spacedim)*np.nan for lat in range(spacedim): array = np.sort(y[:,lat]) Qp[lat] = intrpl(array, j) Qp = Qp.reshape(oldshape) return Qp
def quantile_zhang_fast(x: numpy.ndarray, q: float, fraction: bool = False, rmnans: bool = False) ‑> float
-
A faster version of quantile_zhang. Thanks goes to Mathias Hauser for this.
Arguments:
- y : Input array.
- p : pth percentile.
- fraction : boolean indicates if percentile is a fraction 0<p<1.
- rmnans : boolean indicates whether or not to remove nans
Returns:
- Qp : qualtile of pth percentile
Expand source code
def quantile_zhang_fast(x:np.ndarray, q:float, fraction:bool=False, rmnans:bool=False)->float: """A faster version of quantile_zhang. Thanks goes to Mathias Hauser for this. ## Arguments: - y : Input array. - p : pth percentile. - fraction : boolean indicates if percentile is a fraction 0<p<1. - rmnans : boolean indicates whether or not to remove nans ## Returns: - Qp : qualtile of pth percentile """ n = x.shape[0] # Convert the percentile to a fraction if fraction: q = q*100.0 if (q<0) or (q>100): raise InvalidPercentileError(q) if q==100.0: return max(x) elif q==0.0: return min(x) # adjust p such that np.percentile returns the same as quantile_zhang # numpy uses (n -1); quantile_zhang uses (n + 1) q_adj = q*(n + 1.0)/(n - 1.0) # numpy uses indices j and j+1; quantile_zhang uses j-1 and j q_adj -= 1.0*100.0/(n - 1) # ensure q_adj is in 0..100. q_adj = np.clip(q_adj, 0.0, 100.0) return np.percentile(x, q_adj, axis=0)
Classes
class InvalidPercentileError (p)
-
Exception to be raised if the provided percentile is invalid.
Expand source code
class InvalidPercentileError(Exception): """Exception to be raised if the provided percentile is invalid.""" def __init__(self, p): print(p, " is not a valid fraction value.")
Ancestors
- builtins.Exception
- builtins.BaseException