Spaces:
Runtime error
Runtime error
""" | |
This file is very long and growing, but it was decided to not split it yet, as | |
it's still manageable (2020-03-17, ~1.1k LoC). See gh-31989 | |
Instead of splitting it was decided to define sections here: | |
- Configuration / Settings | |
- Autouse fixtures | |
- Common arguments | |
- Missing values & co. | |
- Classes | |
- Indices | |
- Series' | |
- DataFrames | |
- Operators & Operations | |
- Data sets/files | |
- Time zones | |
- Dtypes | |
- Misc | |
""" | |
from collections import abc | |
from datetime import ( | |
date, | |
datetime, | |
time, | |
timedelta, | |
timezone, | |
) | |
from decimal import Decimal | |
import operator | |
import os | |
from typing import Callable | |
from dateutil.tz import ( | |
tzlocal, | |
tzutc, | |
) | |
import hypothesis | |
from hypothesis import strategies as st | |
import numpy as np | |
import pytest | |
from pytz import ( | |
FixedOffset, | |
utc, | |
) | |
import pandas.util._test_decorators as td | |
from pandas.core.dtypes.dtypes import ( | |
DatetimeTZDtype, | |
IntervalDtype, | |
) | |
import pandas as pd | |
from pandas import ( | |
DataFrame, | |
Interval, | |
Period, | |
Series, | |
Timedelta, | |
Timestamp, | |
) | |
import pandas._testing as tm | |
from pandas.core import ops | |
from pandas.core.indexes.api import ( | |
Index, | |
MultiIndex, | |
) | |
try: | |
import pyarrow as pa | |
except ImportError: | |
has_pyarrow = False | |
else: | |
del pa | |
has_pyarrow = True | |
zoneinfo = None | |
if pd.compat.PY39: | |
# Import "zoneinfo" could not be resolved (reportMissingImports) | |
import zoneinfo # type: ignore[no-redef] | |
# Although zoneinfo can be imported in Py39, it is effectively | |
# "not available" without tzdata/IANA tz data. | |
# We will set zoneinfo to not found in this case | |
try: | |
zoneinfo.ZoneInfo("UTC") # type: ignore[attr-defined] | |
except zoneinfo.ZoneInfoNotFoundError: # type: ignore[attr-defined] | |
zoneinfo = None | |
# Until https://github.com/numpy/numpy/issues/19078 is sorted out, just suppress | |
suppress_npdev_promotion_warning = pytest.mark.filterwarnings( | |
"ignore:Promotion of numbers and bools:FutureWarning" | |
) | |
# ---------------------------------------------------------------- | |
# Configuration / Settings | |
# ---------------------------------------------------------------- | |
# pytest | |
def pytest_addoption(parser) -> None: | |
parser.addoption("--skip-slow", action="store_true", help="skip slow tests") | |
parser.addoption("--skip-network", action="store_true", help="skip network tests") | |
parser.addoption("--skip-db", action="store_true", help="skip db tests") | |
parser.addoption( | |
"--run-high-memory", action="store_true", help="run high memory tests" | |
) | |
parser.addoption("--only-slow", action="store_true", help="run only slow tests") | |
parser.addoption( | |
"--strict-data-files", | |
action="store_true", | |
help="Fail if a test is skipped for missing data file.", | |
) | |
def ignore_doctest_warning(item: pytest.Item, path: str, message: str) -> None: | |
"""Ignore doctest warning. | |
Parameters | |
---------- | |
item : pytest.Item | |
pytest test item. | |
path : str | |
Module path to Python object, e.g. "pandas.core.frame.DataFrame.append". A | |
warning will be filtered when item.name ends with in given path. So it is | |
sufficient to specify e.g. "DataFrame.append". | |
message : str | |
Message to be filtered. | |
""" | |
if item.name.endswith(path): | |
item.add_marker(pytest.mark.filterwarnings(f"ignore:{message}")) | |
def pytest_collection_modifyitems(items, config): | |
skip_slow = config.getoption("--skip-slow") | |
only_slow = config.getoption("--only-slow") | |
skip_network = config.getoption("--skip-network") | |
skip_db = config.getoption("--skip-db") | |
marks = [ | |
(pytest.mark.slow, "slow", skip_slow, "--skip-slow"), | |
(pytest.mark.network, "network", skip_network, "--network"), | |
(pytest.mark.db, "db", skip_db, "--skip-db"), | |
] | |
# Warnings from doctests that can be ignored; place reason in comment above. | |
# Each entry specifies (path, message) - see the ignore_doctest_warning function | |
ignored_doctest_warnings = [ | |
# Deprecations where the docstring will emit a warning | |
("DataFrame.append", "The frame.append method is deprecated"), | |
("Series.append", "The series.append method is deprecated"), | |
("dtypes.common.is_categorical", "is_categorical is deprecated"), | |
("Categorical.replace", "Categorical.replace is deprecated"), | |
("dtypes.common.is_extension_type", "'is_extension_type' is deprecated"), | |
("Index.is_mixed", "Index.is_mixed is deprecated"), | |
("MultiIndex._is_lexsorted", "MultiIndex.is_lexsorted is deprecated"), | |
# Docstring divides by zero to show behavior difference | |
("missing.mask_zero_div_zero", "divide by zero encountered"), | |
# Docstring demonstrates the call raises a warning | |
("_validators.validate_axis_style_args", "Use named arguments"), | |
] | |
for item in items: | |
if config.getoption("--doctest-modules") or config.getoption( | |
"--doctest-cython", default=False | |
): | |
# autouse=True for the add_doctest_imports can lead to expensive teardowns | |
# since doctest_namespace is a session fixture | |
item.add_marker(pytest.mark.usefixtures("add_doctest_imports")) | |
for path, message in ignored_doctest_warnings: | |
ignore_doctest_warning(item, path, message) | |
# mark all tests in the pandas/tests/frame directory with "arraymanager" | |
if "/frame/" in item.nodeid: | |
item.add_marker(pytest.mark.arraymanager) | |
item.add_marker(suppress_npdev_promotion_warning) | |
for (mark, kwd, skip_if_found, arg_name) in marks: | |
if kwd in item.keywords: | |
# If we're skipping, no need to actually add the marker or look for | |
# other markers | |
if skip_if_found: | |
item.add_marker(pytest.mark.skip(f"skipping due to {arg_name}")) | |
break | |
item.add_marker(mark) | |
if only_slow and "slow" not in item.keywords: | |
item.add_marker(pytest.mark.skip("skipping due to --only-slow")) | |
# Hypothesis | |
hypothesis.settings.register_profile( | |
"ci", | |
# Hypothesis timing checks are tuned for scalars by default, so we bump | |
# them from 200ms to 500ms per test case as the global default. If this | |
# is too short for a specific test, (a) try to make it faster, and (b) | |
# if it really is slow add `@settings(deadline=...)` with a working value, | |
# or `deadline=None` to entirely disable timeouts for that test. | |
# 2022-02-09: Changed deadline from 500 -> None. Deadline leads to | |
# non-actionable, flaky CI failures (# GH 24641, 44969, 45118, 44969) | |
deadline=None, | |
suppress_health_check=(hypothesis.HealthCheck.too_slow,), | |
) | |
hypothesis.settings.load_profile("ci") | |
# Registering these strategies makes them globally available via st.from_type, | |
# which is use for offsets in tests/tseries/offsets/test_offsets_properties.py | |
for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): | |
cls = getattr(pd.tseries.offsets, name) | |
st.register_type_strategy( | |
cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) | |
) | |
for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): | |
cls = getattr(pd.tseries.offsets, name) | |
st.register_type_strategy( | |
cls, | |
st.builds( | |
cls, | |
n=st.integers(-5, 5), | |
normalize=st.booleans(), | |
month=st.integers(min_value=1, max_value=12), | |
), | |
) | |
for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): | |
cls = getattr(pd.tseries.offsets, name) | |
st.register_type_strategy( | |
cls, | |
st.builds( | |
cls, | |
n=st.integers(-24, 24), | |
normalize=st.booleans(), | |
startingMonth=st.integers(min_value=1, max_value=12), | |
), | |
) | |
def add_doctest_imports(doctest_namespace) -> None: | |
""" | |
Make `np` and `pd` names available for doctests. | |
""" | |
doctest_namespace["np"] = np | |
doctest_namespace["pd"] = pd | |
# ---------------------------------------------------------------- | |
# Autouse fixtures | |
# ---------------------------------------------------------------- | |
def configure_tests() -> None: | |
""" | |
Configure settings for all tests and test modules. | |
""" | |
pd.set_option("chained_assignment", "raise") | |
# ---------------------------------------------------------------- | |
# Common arguments | |
# ---------------------------------------------------------------- | |
def axis(request): | |
""" | |
Fixture for returning the axis numbers of a DataFrame. | |
""" | |
return request.param | |
axis_frame = axis | |
def axis_1(request): | |
""" | |
Fixture for returning aliases of axis 1 of a DataFrame. | |
""" | |
return request.param | |
def observed(request): | |
""" | |
Pass in the observed keyword to groupby for [True, False] | |
This indicates whether categoricals should return values for | |
values which are not in the grouper [False / None], or only values which | |
appear in the grouper [True]. [None] is supported for future compatibility | |
if we decide to change the default (and would need to warn if this | |
parameter is not passed). | |
""" | |
return request.param | |
def ordered(request): | |
""" | |
Boolean 'ordered' parameter for Categorical. | |
""" | |
return request.param | |
def keep(request): | |
""" | |
Valid values for the 'keep' parameter used in | |
.duplicated or .drop_duplicates | |
""" | |
return request.param | |
def inclusive_endpoints_fixture(request): | |
""" | |
Fixture for trying all interval 'inclusive' parameters. | |
""" | |
return request.param | |
def closed(request): | |
""" | |
Fixture for trying all interval closed parameters. | |
""" | |
return request.param | |
def other_closed(request): | |
""" | |
Secondary closed fixture to allow parametrizing over all pairs of closed. | |
""" | |
return request.param | |
def compression(request): | |
""" | |
Fixture for trying common compression types in compression tests. | |
""" | |
return request.param | |
def compression_only(request): | |
""" | |
Fixture for trying common compression types in compression tests excluding | |
uncompressed case. | |
""" | |
return request.param | |
def writable(request): | |
""" | |
Fixture that an array is writable. | |
""" | |
return request.param | |
def join_type(request): | |
""" | |
Fixture for trying all types of join operations. | |
""" | |
return request.param | |
def nselect_method(request): | |
""" | |
Fixture for trying all nselect methods. | |
""" | |
return request.param | |
# ---------------------------------------------------------------- | |
# Missing values & co. | |
# ---------------------------------------------------------------- | |
def nulls_fixture(request): | |
""" | |
Fixture for each null type in pandas. | |
""" | |
return request.param | |
nulls_fixture2 = nulls_fixture # Generate cartesian product of nulls_fixture | |
def unique_nulls_fixture(request): | |
""" | |
Fixture for each null type in pandas, each null type exactly once. | |
""" | |
return request.param | |
# Generate cartesian product of unique_nulls_fixture: | |
unique_nulls_fixture2 = unique_nulls_fixture | |
def np_nat_fixture(request): | |
""" | |
Fixture for each NaT type in numpy. | |
""" | |
return request.param | |
# Generate cartesian product of np_nat_fixture: | |
np_nat_fixture2 = np_nat_fixture | |
# ---------------------------------------------------------------- | |
# Classes | |
# ---------------------------------------------------------------- | |
def frame_or_series(request): | |
""" | |
Fixture to parametrize over DataFrame and Series. | |
""" | |
return request.param | |
# error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]" | |
def index_or_series(request): | |
""" | |
Fixture to parametrize over Index and Series, made necessary by a mypy | |
bug, giving an error: | |
List item 0 has incompatible type "Type[Series]"; expected "Type[PandasObject]" | |
See GH#29725 | |
""" | |
return request.param | |
# Generate cartesian product of index_or_series fixture: | |
index_or_series2 = index_or_series | |
def index_or_series_or_array(request): | |
""" | |
Fixture to parametrize over Index, Series, and ExtensionArray | |
""" | |
return request.param | |
def box_with_array(request): | |
""" | |
Fixture to test behavior for Index, Series, DataFrame, and pandas Array | |
classes | |
""" | |
return request.param | |
box_with_array2 = box_with_array | |
def dict_subclass(): | |
""" | |
Fixture for a dictionary subclass. | |
""" | |
class TestSubDict(dict): | |
def __init__(self, *args, **kwargs) -> None: | |
dict.__init__(self, *args, **kwargs) | |
return TestSubDict | |
def non_dict_mapping_subclass(): | |
""" | |
Fixture for a non-mapping dictionary subclass. | |
""" | |
class TestNonDictMapping(abc.Mapping): | |
def __init__(self, underlying_dict) -> None: | |
self._data = underlying_dict | |
def __getitem__(self, key): | |
return self._data.__getitem__(key) | |
def __iter__(self): | |
return self._data.__iter__() | |
def __len__(self): | |
return self._data.__len__() | |
return TestNonDictMapping | |
# ---------------------------------------------------------------- | |
# Indices | |
# ---------------------------------------------------------------- | |
def multiindex_year_month_day_dataframe_random_data(): | |
""" | |
DataFrame with 3 level MultiIndex (year, month, day) covering | |
first 100 business days from 2000-01-01 with random data | |
""" | |
tdf = tm.makeTimeDataFrame(100) | |
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() | |
# use Int64Index, to make sure things work | |
ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) | |
ymd.index.set_names(["year", "month", "day"], inplace=True) | |
return ymd | |
def lexsorted_two_level_string_multiindex() -> MultiIndex: | |
""" | |
2-level MultiIndex, lexsorted, with string names. | |
""" | |
return MultiIndex( | |
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], | |
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], | |
names=["first", "second"], | |
) | |
def multiindex_dataframe_random_data( | |
lexsorted_two_level_string_multiindex, | |
) -> DataFrame: | |
"""DataFrame with 2 level MultiIndex with random data""" | |
index = lexsorted_two_level_string_multiindex | |
return DataFrame( | |
np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp") | |
) | |
def _create_multiindex(): | |
""" | |
MultiIndex used to test the general functionality of this object | |
""" | |
# See Also: tests.multi.conftest.idx | |
major_axis = Index(["foo", "bar", "baz", "qux"]) | |
minor_axis = Index(["one", "two"]) | |
major_codes = np.array([0, 0, 1, 2, 3, 3]) | |
minor_codes = np.array([0, 1, 0, 1, 0, 1]) | |
index_names = ["first", "second"] | |
return MultiIndex( | |
levels=[major_axis, minor_axis], | |
codes=[major_codes, minor_codes], | |
names=index_names, | |
verify_integrity=False, | |
) | |
def _create_mi_with_dt64tz_level(): | |
""" | |
MultiIndex with a level that is a tzaware DatetimeIndex. | |
""" | |
# GH#8367 round trip with pickle | |
return MultiIndex.from_product( | |
[[1, 2], ["a", "b"], pd.date_range("20130101", periods=3, tz="US/Eastern")], | |
names=["one", "two", "three"], | |
) | |
indices_dict = { | |
"string": tm.makeStringIndex(100), | |
"datetime": tm.makeDateIndex(100), | |
"datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), | |
"period": tm.makePeriodIndex(100), | |
"timedelta": tm.makeTimedeltaIndex(100), | |
"int": tm.makeIntIndex(100), | |
"uint": tm.makeUIntIndex(100), | |
"range": tm.makeRangeIndex(100), | |
"float": tm.makeFloatIndex(100), | |
"complex64": tm.makeFloatIndex(100).astype("complex64"), | |
"complex128": tm.makeFloatIndex(100).astype("complex128"), | |
"num_int64": tm.makeNumericIndex(100, dtype="int64"), | |
"num_int32": tm.makeNumericIndex(100, dtype="int32"), | |
"num_int16": tm.makeNumericIndex(100, dtype="int16"), | |
"num_int8": tm.makeNumericIndex(100, dtype="int8"), | |
"num_uint64": tm.makeNumericIndex(100, dtype="uint64"), | |
"num_uint32": tm.makeNumericIndex(100, dtype="uint32"), | |
"num_uint16": tm.makeNumericIndex(100, dtype="uint16"), | |
"num_uint8": tm.makeNumericIndex(100, dtype="uint8"), | |
"num_float64": tm.makeNumericIndex(100, dtype="float64"), | |
"num_float32": tm.makeNumericIndex(100, dtype="float32"), | |
"bool-object": tm.makeBoolIndex(10).astype(object), | |
"bool-dtype": Index(np.random.randn(10) < 0), | |
"categorical": tm.makeCategoricalIndex(100), | |
"interval": tm.makeIntervalIndex(100), | |
"empty": Index([]), | |
"tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), | |
"mi-with-dt64tz-level": _create_mi_with_dt64tz_level(), | |
"multi": _create_multiindex(), | |
"repeats": Index([0, 0, 1, 1, 2, 2]), | |
"nullable_int": Index(np.arange(100), dtype="Int64"), | |
"nullable_uint": Index(np.arange(100), dtype="UInt16"), | |
"nullable_float": Index(np.arange(100), dtype="Float32"), | |
"nullable_bool": Index(np.arange(100).astype(bool), dtype="boolean"), | |
"string-python": Index(pd.array(tm.makeStringIndex(100), dtype="string[python]")), | |
} | |
if has_pyarrow: | |
idx = Index(pd.array(tm.makeStringIndex(100), dtype="string[pyarrow]")) | |
indices_dict["string-pyarrow"] = idx | |
def index(request): | |
""" | |
Fixture for many "simple" kinds of indices. | |
These indices are unlikely to cover corner cases, e.g. | |
- no names | |
- no NaTs/NaNs | |
- no values near implementation bounds | |
- ... | |
""" | |
# copy to avoid mutation, e.g. setting .name | |
return indices_dict[request.param].copy() | |
# Needed to generate cartesian product of indices | |
index_fixture2 = index | |
def index_flat(request): | |
""" | |
index fixture, but excluding MultiIndex cases. | |
""" | |
key = request.param | |
return indices_dict[key].copy() | |
# Alias so we can test with cartesian product of index_flat | |
index_flat2 = index_flat | |
def index_with_missing(request): | |
""" | |
Fixture for indices with missing values. | |
Integer-dtype and empty cases are excluded because they cannot hold missing | |
values. | |
MultiIndex is excluded because isna() is not defined for MultiIndex. | |
""" | |
# GH 35538. Use deep copy to avoid illusive bug on np-dev | |
# GHA pipeline that writes into indices_dict despite copy | |
ind = indices_dict[request.param].copy(deep=True) | |
vals = ind.values | |
if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]: | |
# For setting missing values in the top level of MultiIndex | |
vals = ind.tolist() | |
vals[0] = (None,) + vals[0][1:] | |
vals[-1] = (None,) + vals[-1][1:] | |
return MultiIndex.from_tuples(vals) | |
else: | |
vals[0] = None | |
vals[-1] = None | |
return type(ind)(vals) | |
# ---------------------------------------------------------------- | |
# Series' | |
# ---------------------------------------------------------------- | |
def string_series() -> Series: | |
""" | |
Fixture for Series of floats with Index of unique strings | |
""" | |
s = tm.makeStringSeries() | |
s.name = "series" | |
return s | |
def object_series() -> Series: | |
""" | |
Fixture for Series of dtype object with Index of unique strings | |
""" | |
s = tm.makeObjectSeries() | |
s.name = "objects" | |
return s | |
def datetime_series() -> Series: | |
""" | |
Fixture for Series of floats with DatetimeIndex | |
""" | |
s = tm.makeTimeSeries() | |
s.name = "ts" | |
return s | |
def _create_series(index): | |
"""Helper for the _series dict""" | |
size = len(index) | |
data = np.random.randn(size) | |
return Series(data, index=index, name="a") | |
_series = { | |
f"series-with-{index_id}-index": _create_series(index) | |
for index_id, index in indices_dict.items() | |
} | |
def series_with_simple_index(index) -> Series: | |
""" | |
Fixture for tests on series with changing types of indices. | |
""" | |
return _create_series(index) | |
def series_with_multilevel_index() -> Series: | |
""" | |
Fixture with a Series with a 2-level MultiIndex. | |
""" | |
arrays = [ | |
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"], | |
["one", "two", "one", "two", "one", "two", "one", "two"], | |
] | |
tuples = zip(*arrays) | |
index = MultiIndex.from_tuples(tuples) | |
data = np.random.randn(8) | |
ser = Series(data, index=index) | |
ser[3] = np.NaN | |
return ser | |
_narrow_series = { | |
f"{dtype.__name__}-series": tm.make_rand_series(name="a", dtype=dtype) | |
for dtype in tm.NARROW_NP_DTYPES | |
} | |
_index_or_series_objs = {**indices_dict, **_series, **_narrow_series} | |
def index_or_series_obj(request): | |
""" | |
Fixture for tests on indexes, series and series with a narrow dtype | |
copy to avoid mutation, e.g. setting .name | |
""" | |
return _index_or_series_objs[request.param].copy(deep=True) | |
# ---------------------------------------------------------------- | |
# DataFrames | |
# ---------------------------------------------------------------- | |
def int_frame() -> DataFrame: | |
""" | |
Fixture for DataFrame of ints with index of unique strings | |
Columns are ['A', 'B', 'C', 'D'] | |
A B C D | |
vpBeWjM651 1 0 1 0 | |
5JyxmrP1En -1 0 0 0 | |
qEDaoD49U2 -1 1 0 0 | |
m66TkTfsFe 0 0 0 0 | |
EHPaNzEUFm -1 0 -1 0 | |
fpRJCevQhi 2 0 0 0 | |
OlQvnmfi3Q 0 0 -2 0 | |
... .. .. .. .. | |
uB1FPlz4uP 0 0 0 1 | |
EcSe6yNzCU 0 0 -1 0 | |
L50VudaiI8 -1 1 -2 0 | |
y3bpw4nwIp 0 -1 0 0 | |
H0RdLLwrCT 1 1 0 0 | |
rY82K0vMwm 0 0 0 0 | |
1OPIUjnkjk 2 0 0 0 | |
[30 rows x 4 columns] | |
""" | |
return DataFrame(tm.getSeriesData()).astype("int64") | |
def datetime_frame() -> DataFrame: | |
""" | |
Fixture for DataFrame of floats with DatetimeIndex | |
Columns are ['A', 'B', 'C', 'D'] | |
A B C D | |
2000-01-03 -1.122153 0.468535 0.122226 1.693711 | |
2000-01-04 0.189378 0.486100 0.007864 -1.216052 | |
2000-01-05 0.041401 -0.835752 -0.035279 -0.414357 | |
2000-01-06 0.430050 0.894352 0.090719 0.036939 | |
2000-01-07 -0.620982 -0.668211 -0.706153 1.466335 | |
2000-01-10 -0.752633 0.328434 -0.815325 0.699674 | |
2000-01-11 -2.236969 0.615737 -0.829076 -1.196106 | |
... ... ... ... ... | |
2000-02-03 1.642618 -0.579288 0.046005 1.385249 | |
2000-02-04 -0.544873 -1.160962 -0.284071 -1.418351 | |
2000-02-07 -2.656149 -0.601387 1.410148 0.444150 | |
2000-02-08 -1.201881 -1.289040 0.772992 -1.445300 | |
2000-02-09 1.377373 0.398619 1.008453 -0.928207 | |
2000-02-10 0.473194 -0.636677 0.984058 0.511519 | |
2000-02-11 -0.965556 0.408313 -1.312844 -0.381948 | |
[30 rows x 4 columns] | |
""" | |
return DataFrame(tm.getTimeSeriesData()) | |
def float_frame() -> DataFrame: | |
""" | |
Fixture for DataFrame of floats with index of unique strings | |
Columns are ['A', 'B', 'C', 'D']. | |
A B C D | |
P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 | |
qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 | |
tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 | |
wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 | |
M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 | |
QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 | |
r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 | |
... ... ... ... ... | |
IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 | |
lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 | |
qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 | |
yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 | |
65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 | |
eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 | |
xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 | |
[30 rows x 4 columns] | |
""" | |
return DataFrame(tm.getSeriesData()) | |
def mixed_type_frame() -> DataFrame: | |
""" | |
Fixture for DataFrame of float/int/string columns with RangeIndex | |
Columns are ['a', 'b', 'c', 'float32', 'int32']. | |
""" | |
return DataFrame( | |
{ | |
"a": 1.0, | |
"b": 2, | |
"c": "foo", | |
"float32": np.array([1.0] * 10, dtype="float32"), | |
"int32": np.array([1] * 10, dtype="int32"), | |
}, | |
index=np.arange(10), | |
) | |
def rand_series_with_duplicate_datetimeindex() -> Series: | |
""" | |
Fixture for Series with a DatetimeIndex that has duplicates. | |
""" | |
dates = [ | |
datetime(2000, 1, 2), | |
datetime(2000, 1, 2), | |
datetime(2000, 1, 2), | |
datetime(2000, 1, 3), | |
datetime(2000, 1, 3), | |
datetime(2000, 1, 3), | |
datetime(2000, 1, 4), | |
datetime(2000, 1, 4), | |
datetime(2000, 1, 4), | |
datetime(2000, 1, 5), | |
] | |
return Series(np.random.randn(len(dates)), index=dates) | |
# ---------------------------------------------------------------- | |
# Scalars | |
# ---------------------------------------------------------------- | |
def ea_scalar_and_dtype(request): | |
return request.param | |
# ---------------------------------------------------------------- | |
# Operators & Operations | |
# ---------------------------------------------------------------- | |
_all_arithmetic_operators = [ | |
"__add__", | |
"__radd__", | |
"__sub__", | |
"__rsub__", | |
"__mul__", | |
"__rmul__", | |
"__floordiv__", | |
"__rfloordiv__", | |
"__truediv__", | |
"__rtruediv__", | |
"__pow__", | |
"__rpow__", | |
"__mod__", | |
"__rmod__", | |
] | |
def all_arithmetic_operators(request): | |
""" | |
Fixture for dunder names for common arithmetic operations. | |
""" | |
return request.param | |
def all_binary_operators(request): | |
""" | |
Fixture for operator and roperator arithmetic, comparison, and logical ops. | |
""" | |
return request.param | |
def all_arithmetic_functions(request): | |
""" | |
Fixture for operator and roperator arithmetic functions. | |
Notes | |
----- | |
This includes divmod and rdivmod, whereas all_arithmetic_operators | |
does not. | |
""" | |
return request.param | |
_all_numeric_reductions = [ | |
"sum", | |
"max", | |
"min", | |
"mean", | |
"prod", | |
"std", | |
"var", | |
"median", | |
"kurt", | |
"skew", | |
] | |
def all_numeric_reductions(request): | |
""" | |
Fixture for numeric reduction names. | |
""" | |
return request.param | |
_all_boolean_reductions = ["all", "any"] | |
def all_boolean_reductions(request): | |
""" | |
Fixture for boolean reduction names. | |
""" | |
return request.param | |
_all_reductions = _all_numeric_reductions + _all_boolean_reductions | |
def all_reductions(request): | |
""" | |
Fixture for all (boolean + numeric) reduction names. | |
""" | |
return request.param | |
def comparison_op(request): | |
""" | |
Fixture for operator module comparison functions. | |
""" | |
return request.param | |
def compare_operators_no_eq_ne(request): | |
""" | |
Fixture for dunder names for compare operations except == and != | |
* >= | |
* > | |
* < | |
* <= | |
""" | |
return request.param | |
def all_logical_operators(request): | |
""" | |
Fixture for dunder names for common logical operations | |
* | | |
* & | |
* ^ | |
""" | |
return request.param | |
# ---------------------------------------------------------------- | |
# Data sets/files | |
# ---------------------------------------------------------------- | |
def strict_data_files(pytestconfig): | |
""" | |
Returns the configuration for the test setting `--strict-data-files`. | |
""" | |
return pytestconfig.getoption("--strict-data-files") | |
def datapath(strict_data_files: str) -> Callable[..., str]: | |
""" | |
Get the path to a data file. | |
Parameters | |
---------- | |
path : str | |
Path to the file, relative to ``pandas/tests/`` | |
Returns | |
------- | |
path including ``pandas/tests``. | |
Raises | |
------ | |
ValueError | |
If the path doesn't exist and the --strict-data-files option is set. | |
""" | |
BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") | |
def deco(*args): | |
path = os.path.join(BASE_PATH, *args) | |
if not os.path.exists(path): | |
if strict_data_files: | |
raise ValueError( | |
f"Could not find file {path} and --strict-data-files is set." | |
) | |
else: | |
pytest.skip(f"Could not find {path}.") | |
return path | |
return deco | |
def iris(datapath) -> DataFrame: | |
""" | |
The iris dataset as a DataFrame. | |
""" | |
return pd.read_csv(datapath("io", "data", "csv", "iris.csv")) | |
# ---------------------------------------------------------------- | |
# Time zones | |
# ---------------------------------------------------------------- | |
TIMEZONES = [ | |
None, | |
"UTC", | |
"US/Eastern", | |
"Asia/Tokyo", | |
"dateutil/US/Pacific", | |
"dateutil/Asia/Singapore", | |
"+01:15", | |
"-02:15", | |
"UTC+01:15", | |
"UTC-02:15", | |
tzutc(), | |
tzlocal(), | |
FixedOffset(300), | |
FixedOffset(0), | |
FixedOffset(-300), | |
timezone.utc, | |
timezone(timedelta(hours=1)), | |
timezone(timedelta(hours=-1), name="foo"), | |
] | |
if zoneinfo is not None: | |
TIMEZONES.extend([zoneinfo.ZoneInfo("US/Pacific"), zoneinfo.ZoneInfo("UTC")]) | |
TIMEZONE_IDS = [repr(i) for i in TIMEZONES] | |
def tz_naive_fixture(request): | |
""" | |
Fixture for trying timezones including default (None): {0} | |
""" | |
return request.param | |
def tz_aware_fixture(request): | |
""" | |
Fixture for trying explicit timezones: {0} | |
""" | |
return request.param | |
# Generate cartesian product of tz_aware_fixture: | |
tz_aware_fixture2 = tz_aware_fixture | |
_UTCS = ["utc", "dateutil/UTC", utc, tzutc(), timezone.utc] | |
if zoneinfo is not None: | |
_UTCS.append(zoneinfo.ZoneInfo("UTC")) | |
def utc_fixture(request): | |
""" | |
Fixture to provide variants of UTC timezone strings and tzinfo objects. | |
""" | |
return request.param | |
utc_fixture2 = utc_fixture | |
# ---------------------------------------------------------------- | |
# Dtypes | |
# ---------------------------------------------------------------- | |
def string_dtype(request): | |
""" | |
Parametrized fixture for string dtypes. | |
* str | |
* 'str' | |
* 'U' | |
""" | |
return request.param | |
def nullable_string_dtype(request): | |
""" | |
Parametrized fixture for string dtypes. | |
* 'string[python]' | |
* 'string[pyarrow]' | |
""" | |
return request.param | |
def string_storage(request): | |
""" | |
Parametrized fixture for pd.options.mode.string_storage. | |
* 'python' | |
* 'pyarrow' | |
""" | |
return request.param | |
# Alias so we can test with cartesian product of string_storage | |
string_storage2 = string_storage | |
def bytes_dtype(request): | |
""" | |
Parametrized fixture for bytes dtypes. | |
* bytes | |
* 'bytes' | |
""" | |
return request.param | |
def object_dtype(request): | |
""" | |
Parametrized fixture for object dtypes. | |
* object | |
* 'object' | |
""" | |
return request.param | |
def any_string_dtype(request): | |
""" | |
Parametrized fixture for string dtypes. | |
* 'object' | |
* 'string[python]' | |
* 'string[pyarrow]' | |
""" | |
return request.param | |
def datetime64_dtype(request): | |
""" | |
Parametrized fixture for datetime64 dtypes. | |
* 'datetime64[ns]' | |
* 'M8[ns]' | |
""" | |
return request.param | |
def timedelta64_dtype(request): | |
""" | |
Parametrized fixture for timedelta64 dtypes. | |
* 'timedelta64[ns]' | |
* 'm8[ns]' | |
""" | |
return request.param | |
def fixed_now_ts() -> Timestamp: | |
""" | |
Fixture emits fixed Timestamp.now() | |
""" | |
return Timestamp( | |
year=2021, month=1, day=1, hour=12, minute=4, second=13, microsecond=22 | |
) | |
def float_numpy_dtype(request): | |
""" | |
Parameterized fixture for float dtypes. | |
* float | |
* 'float32' | |
* 'float64' | |
""" | |
return request.param | |
def float_ea_dtype(request): | |
""" | |
Parameterized fixture for float dtypes. | |
* 'Float32' | |
* 'Float64' | |
""" | |
return request.param | |
def any_float_dtype(request): | |
""" | |
Parameterized fixture for float dtypes. | |
* float | |
* 'float32' | |
* 'float64' | |
* 'Float32' | |
* 'Float64' | |
""" | |
return request.param | |
def complex_dtype(request): | |
""" | |
Parameterized fixture for complex dtypes. | |
* complex | |
* 'complex64' | |
* 'complex128' | |
""" | |
return request.param | |
def any_signed_int_numpy_dtype(request): | |
""" | |
Parameterized fixture for signed integer dtypes. | |
* int | |
* 'int8' | |
* 'int16' | |
* 'int32' | |
* 'int64' | |
""" | |
return request.param | |
def any_unsigned_int_numpy_dtype(request): | |
""" | |
Parameterized fixture for unsigned integer dtypes. | |
* 'uint8' | |
* 'uint16' | |
* 'uint32' | |
* 'uint64' | |
""" | |
return request.param | |
def any_int_numpy_dtype(request): | |
""" | |
Parameterized fixture for any integer dtype. | |
* int | |
* 'int8' | |
* 'uint8' | |
* 'int16' | |
* 'uint16' | |
* 'int32' | |
* 'uint32' | |
* 'int64' | |
* 'uint64' | |
""" | |
return request.param | |
def any_int_ea_dtype(request): | |
""" | |
Parameterized fixture for any nullable integer dtype. | |
* 'UInt8' | |
* 'Int8' | |
* 'UInt16' | |
* 'Int16' | |
* 'UInt32' | |
* 'Int32' | |
* 'UInt64' | |
* 'Int64' | |
""" | |
return request.param | |
def any_int_dtype(request): | |
""" | |
Parameterized fixture for any nullable integer dtype. | |
* int | |
* 'int8' | |
* 'uint8' | |
* 'int16' | |
* 'uint16' | |
* 'int32' | |
* 'uint32' | |
* 'int64' | |
* 'uint64' | |
* 'UInt8' | |
* 'Int8' | |
* 'UInt16' | |
* 'Int16' | |
* 'UInt32' | |
* 'Int32' | |
* 'UInt64' | |
* 'Int64' | |
""" | |
return request.param | |
def any_numeric_ea_dtype(request): | |
""" | |
Parameterized fixture for any nullable integer dtype and | |
any float ea dtypes. | |
* 'UInt8' | |
* 'Int8' | |
* 'UInt16' | |
* 'Int16' | |
* 'UInt32' | |
* 'Int32' | |
* 'UInt64' | |
* 'Int64' | |
* 'Float32' | |
* 'Float64' | |
""" | |
return request.param | |
def any_signed_int_ea_dtype(request): | |
""" | |
Parameterized fixture for any signed nullable integer dtype. | |
* 'Int8' | |
* 'Int16' | |
* 'Int32' | |
* 'Int64' | |
""" | |
return request.param | |
def any_real_numpy_dtype(request): | |
""" | |
Parameterized fixture for any (purely) real numeric dtype. | |
* int | |
* 'int8' | |
* 'uint8' | |
* 'int16' | |
* 'uint16' | |
* 'int32' | |
* 'uint32' | |
* 'int64' | |
* 'uint64' | |
* float | |
* 'float32' | |
* 'float64' | |
""" | |
return request.param | |
def any_numpy_dtype(request): | |
""" | |
Parameterized fixture for all numpy dtypes. | |
* bool | |
* 'bool' | |
* int | |
* 'int8' | |
* 'uint8' | |
* 'int16' | |
* 'uint16' | |
* 'int32' | |
* 'uint32' | |
* 'int64' | |
* 'uint64' | |
* float | |
* 'float32' | |
* 'float64' | |
* complex | |
* 'complex64' | |
* 'complex128' | |
* str | |
* 'str' | |
* 'U' | |
* bytes | |
* 'bytes' | |
* 'datetime64[ns]' | |
* 'M8[ns]' | |
* 'timedelta64[ns]' | |
* 'm8[ns]' | |
* object | |
* 'object' | |
""" | |
return request.param | |
def any_numeric_dtype(request): | |
""" | |
Parameterized fixture for all numeric dtypes. | |
* int | |
* 'int8' | |
* 'uint8' | |
* 'int16' | |
* 'uint16' | |
* 'int32' | |
* 'uint32' | |
* 'int64' | |
* 'uint64' | |
* float | |
* 'float32' | |
* 'float64' | |
* complex | |
* 'complex64' | |
* 'complex128' | |
* 'UInt8' | |
* 'Int8' | |
* 'UInt16' | |
* 'Int16' | |
* 'UInt32' | |
* 'Int32' | |
* 'UInt64' | |
* 'Int64' | |
* 'Float32' | |
* 'Float64' | |
""" | |
return request.param | |
# categoricals are handled separately | |
_any_skipna_inferred_dtype = [ | |
("string", ["a", np.nan, "c"]), | |
("string", ["a", pd.NA, "c"]), | |
("mixed", ["a", pd.NaT, "c"]), # pd.NaT not considered valid by is_string_array | |
("bytes", [b"a", np.nan, b"c"]), | |
("empty", [np.nan, np.nan, np.nan]), | |
("empty", []), | |
("mixed-integer", ["a", np.nan, 2]), | |
("mixed", ["a", np.nan, 2.0]), | |
("floating", [1.0, np.nan, 2.0]), | |
("integer", [1, np.nan, 2]), | |
("mixed-integer-float", [1, np.nan, 2.0]), | |
("decimal", [Decimal(1), np.nan, Decimal(2)]), | |
("boolean", [True, np.nan, False]), | |
("boolean", [True, pd.NA, False]), | |
("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), | |
("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), | |
("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), | |
# The following two dtypes are commented out due to GH 23554 | |
# ('complex', [1 + 1j, np.nan, 2 + 2j]), | |
# ('timedelta64', [np.timedelta64(1, 'D'), | |
# np.nan, np.timedelta64(2, 'D')]), | |
("timedelta", [timedelta(1), np.nan, timedelta(2)]), | |
("time", [time(1), np.nan, time(2)]), | |
("period", [Period(2013), pd.NaT, Period(2018)]), | |
("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), | |
] | |
ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id | |
def any_skipna_inferred_dtype(request): | |
""" | |
Fixture for all inferred dtypes from _libs.lib.infer_dtype | |
The covered (inferred) types are: | |
* 'string' | |
* 'empty' | |
* 'bytes' | |
* 'mixed' | |
* 'mixed-integer' | |
* 'mixed-integer-float' | |
* 'floating' | |
* 'integer' | |
* 'decimal' | |
* 'boolean' | |
* 'datetime64' | |
* 'datetime' | |
* 'date' | |
* 'timedelta' | |
* 'time' | |
* 'period' | |
* 'interval' | |
Returns | |
------- | |
inferred_dtype : str | |
The string for the inferred dtype from _libs.lib.infer_dtype | |
values : np.ndarray | |
An array of object dtype that will be inferred to have | |
`inferred_dtype` | |
Examples | |
-------- | |
>>> import pandas._libs.lib as lib | |
>>> | |
>>> def test_something(any_skipna_inferred_dtype): | |
... inferred_dtype, values = any_skipna_inferred_dtype | |
... # will pass | |
... assert lib.infer_dtype(values, skipna=True) == inferred_dtype | |
""" | |
inferred_dtype, values = request.param | |
values = np.array(values, dtype=object) # object dtype to avoid casting | |
# correctness of inference tested in tests/dtypes/test_inference.py | |
return inferred_dtype, values | |
# ---------------------------------------------------------------- | |
# Misc | |
# ---------------------------------------------------------------- | |
def ip(): | |
""" | |
Get an instance of IPython.InteractiveShell. | |
Will raise a skip if IPython is not installed. | |
""" | |
pytest.importorskip("IPython", minversion="6.0.0") | |
from IPython.core.interactiveshell import InteractiveShell | |
# GH#35711 make sure sqlite history file handle is not leaked | |
from traitlets.config import Config # isort:skip | |
c = Config() | |
c.HistoryManager.hist_file = ":memory:" | |
return InteractiveShell(config=c) | |
def spmatrix(request): | |
""" | |
Yields scipy sparse matrix classes. | |
""" | |
from scipy import sparse | |
return getattr(sparse, request.param + "_matrix") | |
def tick_classes(request): | |
""" | |
Fixture for Tick based datetime offsets available for a time series. | |
""" | |
return request.param | |
def sort_by_key(request): | |
""" | |
Simple fixture for testing keys in sorting methods. | |
Tests None (no key) and the identity key. | |
""" | |
return request.param | |
def fsspectest(): | |
pytest.importorskip("fsspec") | |
from fsspec import register_implementation | |
from fsspec.implementations.memory import MemoryFileSystem | |
from fsspec.registry import _registry as registry | |
class TestMemoryFS(MemoryFileSystem): | |
protocol = "testmem" | |
test = [None] | |
def __init__(self, **kwargs) -> None: | |
self.test[0] = kwargs.pop("test", None) | |
super().__init__(**kwargs) | |
register_implementation("testmem", TestMemoryFS, clobber=True) | |
yield TestMemoryFS() | |
registry.pop("testmem", None) | |
TestMemoryFS.test[0] = None | |
TestMemoryFS.store.clear() | |
def names(request): | |
""" | |
A 3-tuple of names, the first two for operands, the last for a result. | |
""" | |
return request.param | |
def indexer_sli(request): | |
""" | |
Parametrize over __setitem__, loc.__setitem__, iloc.__setitem__ | |
""" | |
return request.param | |
def indexer_li(request): | |
""" | |
Parametrize over loc.__getitem__, iloc.__getitem__ | |
""" | |
return request.param | |
def indexer_si(request): | |
""" | |
Parametrize over __setitem__, iloc.__setitem__ | |
""" | |
return request.param | |
def indexer_sl(request): | |
""" | |
Parametrize over __setitem__, loc.__setitem__ | |
""" | |
return request.param | |
def indexer_al(request): | |
""" | |
Parametrize over at.__setitem__, loc.__setitem__ | |
""" | |
return request.param | |
def indexer_ial(request): | |
""" | |
Parametrize over iat.__setitem__, iloc.__setitem__ | |
""" | |
return request.param | |
def using_array_manager(): | |
""" | |
Fixture to check if the array manager is being used. | |
""" | |
return pd.options.mode.data_manager == "array" | |
def using_copy_on_write() -> bool: | |
""" | |
Fixture to check if Copy-on-Write is enabled. | |
""" | |
return pd.options.mode.copy_on_write and pd.options.mode.data_manager == "block" | |