zmc
2023-12-22 9fdbf60165db0400c2e8e6be2dc6e88138ac719a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""Common utility functions for rolling operations"""
from __future__ import annotations
 
from collections import defaultdict
from typing import cast
 
import numpy as np
 
from pandas.core.dtypes.generic import (
    ABCDataFrame,
    ABCSeries,
)
 
from pandas.core.indexes.api import MultiIndex
 
 
def flex_binary_moment(arg1, arg2, f, pairwise: bool = False):
    if isinstance(arg1, ABCSeries) and isinstance(arg2, ABCSeries):
        X, Y = prep_binary(arg1, arg2)
        return f(X, Y)
 
    elif isinstance(arg1, ABCDataFrame):
        from pandas import DataFrame
 
        def dataframe_from_int_dict(data, frame_template) -> DataFrame:
            result = DataFrame(data, index=frame_template.index)
            if len(result.columns) > 0:
                result.columns = frame_template.columns[result.columns]
            else:
                result.columns = frame_template.columns.copy()
            return result
 
        results = {}
        if isinstance(arg2, ABCDataFrame):
            if pairwise is False:
                if arg1 is arg2:
                    # special case in order to handle duplicate column names
                    for i in range(len(arg1.columns)):
                        results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
                    return dataframe_from_int_dict(results, arg1)
                else:
                    if not arg1.columns.is_unique:
                        raise ValueError("'arg1' columns are not unique")
                    if not arg2.columns.is_unique:
                        raise ValueError("'arg2' columns are not unique")
                    X, Y = arg1.align(arg2, join="outer")
                    X, Y = prep_binary(X, Y)
                    res_columns = arg1.columns.union(arg2.columns)
                    for col in res_columns:
                        if col in X and col in Y:
                            results[col] = f(X[col], Y[col])
                    return DataFrame(results, index=X.index, columns=res_columns)
            elif pairwise is True:
                results = defaultdict(dict)
                for i in range(len(arg1.columns)):
                    for j in range(len(arg2.columns)):
                        if j < i and arg2 is arg1:
                            # Symmetric case
                            results[i][j] = results[j][i]
                        else:
                            results[i][j] = f(
                                *prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
                            )
 
                from pandas import concat
 
                result_index = arg1.index.union(arg2.index)
                if len(result_index):
                    # construct result frame
                    result = concat(
                        [
                            concat(
                                [results[i][j] for j in range(len(arg2.columns))],
                                ignore_index=True,
                            )
                            for i in range(len(arg1.columns))
                        ],
                        ignore_index=True,
                        axis=1,
                    )
                    result.columns = arg1.columns
 
                    # set the index and reorder
                    if arg2.columns.nlevels > 1:
                        # mypy needs to know columns is a MultiIndex, Index doesn't
                        # have levels attribute
                        arg2.columns = cast(MultiIndex, arg2.columns)
                        # GH 21157: Equivalent to MultiIndex.from_product(
                        #  [result_index], <unique combinations of arg2.columns.levels>,
                        # )
                        # A normal MultiIndex.from_product will produce too many
                        # combinations.
                        result_level = np.tile(
                            result_index, len(result) // len(result_index)
                        )
                        arg2_levels = (
                            np.repeat(
                                arg2.columns.get_level_values(i),
                                len(result) // len(arg2.columns),
                            )
                            for i in range(arg2.columns.nlevels)
                        )
                        result_names = list(arg2.columns.names) + [result_index.name]
                        result.index = MultiIndex.from_arrays(
                            [*arg2_levels, result_level], names=result_names
                        )
                        # GH 34440
                        num_levels = len(result.index.levels)
                        new_order = [num_levels - 1] + list(range(num_levels - 1))
                        result = result.reorder_levels(new_order).sort_index()
                    else:
                        result.index = MultiIndex.from_product(
                            [range(len(arg2.columns)), range(len(result_index))]
                        )
                        result = result.swaplevel(1, 0).sort_index()
                        result.index = MultiIndex.from_product(
                            [result_index] + [arg2.columns]
                        )
                else:
                    # empty result
                    result = DataFrame(
                        index=MultiIndex(
                            levels=[arg1.index, arg2.columns], codes=[[], []]
                        ),
                        columns=arg2.columns,
                        dtype="float64",
                    )
 
                # reset our index names to arg1 names
                # reset our column names to arg2 names
                # careful not to mutate the original names
                result.columns = result.columns.set_names(arg1.columns.names)
                result.index = result.index.set_names(
                    result_index.names + arg2.columns.names
                )
 
                return result
        else:
            results = {
                i: f(*prep_binary(arg1.iloc[:, i], arg2))
                for i in range(len(arg1.columns))
            }
            return dataframe_from_int_dict(results, arg1)
 
    else:
        return flex_binary_moment(arg2, arg1, f)
 
 
def zsqrt(x):
    with np.errstate(all="ignore"):
        result = np.sqrt(x)
        mask = x < 0
 
    if isinstance(x, ABCDataFrame):
        if mask._values.any():
            result[mask] = 0
    else:
        if mask.any():
            result[mask] = 0
 
    return result
 
 
def prep_binary(arg1, arg2):
    # mask out values, this also makes a common index...
    X = arg1 + 0 * arg2
    Y = arg2 + 0 * arg1
    return X, Y