Skip to content

Add move_quantile function #418

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 34 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
524afbf
Add moving quantile
andrii-riazanov Aug 26, 2021
4f98aa4
initial changes from median to quantile
andrii-riazanov Sep 14, 2022
e470fa2
initial changes from median to quantile
andrii-riazanov Sep 14, 2022
1c97aac
Merge branch 'quantile' of https://github.com/andrii-riazanov/bottlen…
andrii-riazanov Sep 15, 2022
10b8824
Change all move_median to move_quantile
andrii-riazanov Sep 15, 2022
c718a18
Add move_median as move_quantile without q argument at C level
andrii-riazanov Sep 15, 2022
009c835
Fix bug with addressing quantile before assignment
andrii-riazanov Sep 16, 2022
832035a
Initial tests and some fixes
andrii-riazanov Sep 17, 2022
7af7168
Finish extensive testing of move_quantile
andrii-riazanov Sep 17, 2022
42eddad
Ignore warnings from numpy about infs and NaNs
andrii-riazanov Sep 17, 2022
c2a2ae3
move_quantile(q=0) vs move_min benching
andrii-riazanov Sep 21, 2022
9f4c5dc
Revert "move_quantile(q=0) vs move_min benching"
andrii-riazanov Sep 21, 2022
9447697
Some changes (to ammend later)
andrii-riazanov Sep 21, 2022
de181da
Bench move_quantile(q=0.5) with slow.move_median
andrii-riazanov Sep 21, 2022
02a0ce1
Bring old move_median, add move_quantile separately
andrii-riazanov Sep 21, 2022
cd49b4f
Finish bringing move_median back
andrii-riazanov Sep 21, 2022
fcaefde
Move move_quantile to C level fully
andrii-riazanov Sep 21, 2022
1bddedd
Refactor parse_args function in move_template
andrii-riazanov Sep 22, 2022
7a413cb
Add docs and comments
andrii-riazanov Sep 22, 2022
6851ed2
Update move_test.py
andrii-riazanov Sep 22, 2022
97ecd15
Actually add docs and comments
andrii-riazanov Sep 23, 2022
a7d5c22
Add comments, modify tests, change back gitignore
andrii-riazanov Sep 23, 2022
5a2bcac
Refactor parse_args again to actually work
andrii-riazanov Sep 23, 2022
c390863
Dial tests back a little to run reasonable time
andrii-riazanov Sep 23, 2022
6f1e5d4
Modify benches, restore old files
andrii-riazanov Sep 23, 2022
7f1c3af
Change `packaging` module to `pkg_resources`
andrii-riazanov Sep 26, 2022
4dadfe4
Update move_quantile benches in asv with q=0.25
andrii-riazanov Sep 27, 2022
9012e24
Make mm_handle and mq_handle the same
andrii-riazanov Sep 28, 2022
72677f8
Median and quantile with function pointers
andrii-riazanov Sep 28, 2022
2c892db
Support of itrable q argument for move_quantile
andrii-riazanov Oct 2, 2022
654ab14
Make tests work with posiitonal q in move_quantile
andrii-riazanov Oct 2, 2022
04ce117
Merge branch 'master' into quantile
andrii-riazanov Jan 31, 2023
00cd119
Merge branch 'master' into quantile
andrii-riazanov Mar 19, 2023
4ec8945
Merge branch 'master' into quantile
andrii-riazanov Apr 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Modify benches, restore old files
  • Loading branch information
andrii-riazanov committed Sep 23, 2022
commit 6f1e5d4b66bccf3f6d0af9db8ae943e756b7d7d0
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ MANIFEST
.*.swp
*~
\#*#
bottleneck/src/bn_config.h
bottleneck/src/bn_config.h
2 changes: 2 additions & 0 deletions bottleneck/benchmark/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ def getsetups(setup, shapes, nans, axes, dtype, order):
run = {}
run["name"] = func
run["statements"] = ["bn_func(a, w, 1, axis)", "sw_func(a, w, 1, axis)"]
if func == "move_quantile":
run["statements"] = ["bn_func(a, w, 1, axis, q=0.25)", "sw_func(a, w, 1, axis, q=0.25)"]
setup = """
from bottleneck.slow.move import %s as sw_func
from bottleneck import %s as bn_func
Expand Down
41 changes: 23 additions & 18 deletions bottleneck/benchmark/bench_detailed.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def benchsuite(function, fraction_nan):
# avoid all-nan slice warnings from np.median and np.nanmedian
if "%s" == "median": from bottleneck.slow import median as sl_fn
if "%s" == "nanmedian": from bottleneck.slow import nanmedian as sl_fn
if "%s" == "move_quantile": from bottleneck.slow import move_median as sl_fn

from numpy import array, nan
from numpy.random import RandomState
Expand All @@ -95,12 +94,14 @@ def benchsuite(function, fraction_nan):
index = 0
elif function in ["rankdata", "nanrankdata"]:
index = 0
elif function in bn.get_functions("move", as_string=True):
elif function in bn.get_functions("move", as_string=True) and function != "move_quantile":
index = 1
elif function in ["partition", "argpartition", "push"]:
index = 2
elif function == "replace":
index = 3
elif function == "move_quantile":
index = 4
else:
raise ValueError("`function` (%s) not recognized" % function)

Expand All @@ -117,7 +118,7 @@ def benchsuite(function, fraction_nan):
run = {}
run["name"] = [f + signature, array]
run["statements"] = ["bn_fn" + signature, "sl_fn" + signature]
run["setup"] = setup % (f, f, f, f, f, f, array, fraction_nan, fraction_nan)
run["setup"] = setup % (f, f, f, f, f, array, fraction_nan, fraction_nan)
run["repeat"] = repeat
suite.append(run)

Expand All @@ -134,30 +135,32 @@ def get_instructions():
"(a, 1)", # move
"(a, 0)", # (arg)partition
"(a, np.nan, 0)", # replace
"(a, 1, q=0.25)", # move_quantile
10,
),
("rand(10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", 10),
("rand(100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", 6),
("rand(1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 3),
("rand(1000000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 2),
("rand(10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", "(a, 2, q=0.25)", 10),
("rand(100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", "(a, 20, q=0.25)", 6),
("rand(1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", "(a, 200, q=0.25)", 3),
("rand(1000000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", None, 2),
# 2d input array
("rand(10, 10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", 6),
("rand(100, 100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", 3),
("rand(1000, 1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", 2),
("rand(10, 10)", "(a, 1)", None, None, None, 6),
("rand(100, 100)", "(a, 1)", None, None, None, 3),
("rand(1000, 1000)", "(a, 1)", None, None, None, 2),
("rand(100000, 2)", "(a, 1)", "(a, 1)", "(a, 1)", None, 2),
("rand(10, 10)", "(a, 0)", None, None, None, 6),
("rand(100, 100)", "(a, 0)", "(a, 20, axis=0)", None, None, 3),
("rand(1000, 1000)", "(a, 0)", "(a, 200, axis=0)", None, None, 2),
("rand(10, 10)", "(a)", "(a, 2)", "(a, 2)", "(a, np.nan, 0)", "(a, 2, q=0.25)", 6),
("rand(100, 100)", "(a)", "(a, 20)", "(a, 20)", "(a, np.nan, 0)", "(a, 20, q=0.25)", 3),
("rand(1000, 1000)", "(a)", "(a, 200)", "(a, 200)", "(a, np.nan, 0)", None ,2),
("rand(10, 10)", "(a, 1)", None, None, None, None, 6),
("rand(100, 100)", "(a, 1)", None, None, None, None, 3),
("rand(1000, 1000)", "(a, 1)", None, None, None, None, 2),
("rand(100000, 2)", "(a, 1)", "(a, 1)", "(a, 1)", None, None, 2),
("rand(10, 10)", "(a, 0)", None, None, None, None, 6),
("rand(100, 100)", "(a, 0)", "(a, 20, axis=0)", None, None, None, 3),
("rand(1000, 1000)", "(a, 0)", "(a, 200, axis=0)", None, None, None, 2),
# 3d input array
(
"rand(100, 100, 100)",
"(a, 0)",
"(a, 20, axis=0)",
"(a, 20, axis=0)",
None,
"(a, 20, axis=0, q=0.25)",
2,
),
(
Expand All @@ -166,6 +169,7 @@ def get_instructions():
"(a, 20, axis=1)",
"(a, 20, axis=1)",
None,
"(a, 20, axis=1, q=0.25)",
2,
),
(
Expand All @@ -174,10 +178,11 @@ def get_instructions():
"(a, 20, axis=2)",
"(a, 20, axis=2)",
"(a, np.nan, 0)",
"(a, 20, axis=2, q=0.25)",
2,
),
# 0d input array
("array(1.0)", "(a)", None, None, "(a, 0, 2)", 10),
("array(1.0)", "(a)", None, None, "(a, 0, 2)", None, 10),
]

return instructions
36 changes: 19 additions & 17 deletions bottleneck/slow/move.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,36 +107,38 @@ def move_median(a, window, min_count=None, axis=-1, **kwargs):
"Slow move_median for unaccelerated dtype"
return move_func(np.nanmedian, a, window, min_count, axis=axis)

def move_quantile(a, window, min_count=None, axis=-1, q=0.5):
"Slow move_quantile for unaccelerated dtype"
return move_func(np_nanquantile_infs, a, window, min_count, axis=axis, q=q)

def move_rank(a, window, min_count=None, axis=-1):
"Slow move_rank for unaccelerated dtype"
return move_func(lastrank, a, window, min_count, axis=axis)

# function for handling infs in np.nanquantile
# keyword argument for interpolation method in np.nanquantile was changed in 1.22.0
from packaging import version
if version.parse(np.__version__) > version.parse("1.22.0"):
METHOD_KEYWORD = "method"
else:
METHOD_KEYWORD = "interpolation"

def np_nanquantile_infs(a, **kwargs):
def move_quantile(a, window, min_count=None, axis=-1, q=0.5, **kwargs):
"Slow move_quantile for unaccelerated dtype"
with warnings.catch_warnings():
warnings.simplefilter("ignore")
if not np.isinf(a).any():
kwargs[METHOD_KEYWORD] = 'midpoint'
return np.nanquantile(a, **kwargs)
return move_func(np.nanquantile, a, window, min_count, axis=axis, q=q, **kwargs)
else:
kwargs[METHOD_KEYWORD] = 'lower'
lower_nanquantile = np.nanquantile(a, **kwargs)
kwargs[METHOD_KEYWORD] = 'higher'
higher_nanquantile = np.nanquantile(a, **kwargs)

midpoint_nanquantile = (lower_nanquantile + higher_nanquantile) / 2
return midpoint_nanquantile
return move_func(np_nanquantile_infs, a, window, min_count, axis=axis, q=q, **kwargs)

def move_rank(a, window, min_count=None, axis=-1):
"Slow move_rank for unaccelerated dtype"
return move_func(lastrank, a, window, min_count, axis=axis)


# function for handling infs in np.nanquantile
def np_nanquantile_infs(a, **kwargs):
kwargs[METHOD_KEYWORD] = 'lower'
lower_nanquantile = np.nanquantile(a, **kwargs)
kwargs[METHOD_KEYWORD] = 'higher'
higher_nanquantile = np.nanquantile(a, **kwargs)

midpoint_nanquantile = (lower_nanquantile + higher_nanquantile) / 2
return midpoint_nanquantile

# magic utility functions ---------------------------------------------------

Expand Down
7 changes: 3 additions & 4 deletions bottleneck/src/move_median/move_median_debug.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include "move_median.h"

ai_t *mm_move_median(ai_t *a, idx_t length, idx_t window, idx_t min_count, double quantile);
ai_t *mm_move_median(ai_t *a, idx_t length, idx_t window, idx_t min_count);
int mm_assert_equal(ai_t *actual, ai_t *desired, ai_t *input, idx_t length,
char *err_msg);
int mm_unit_test(void);
Expand All @@ -19,7 +19,7 @@ int main(void) {


/* moving window median of 1d arrays returns output array */
ai_t *mm_move_median(ai_t *a, idx_t length, idx_t window, idx_t min_count, double quantile) {
ai_t *mm_move_median(ai_t *a, idx_t length, idx_t window, idx_t min_count) {
mm_handle *mm;
ai_t *out;
idx_t i;
Expand Down Expand Up @@ -84,14 +84,13 @@ int mm_unit_test(void) {
int length;
char *err_msg;
int failed;
double quantile = 0.5;

length = sizeof(arr_input) / sizeof(*arr_input);
err_msg = malloc(1024 * sizeof *err_msg);
sprintf(err_msg, "move_median failed with window=%d, min_count=%d",
window, min_count);

actual = mm_move_median(arr_input, length, window, min_count, quantile);
actual = mm_move_median(arr_input, length, window, min_count);
failed = mm_assert_equal(actual, desired, arr_input, length, err_msg);

free(actual);
Expand Down