- 2.17.0 (latest)
- 2.16.0
- 2.15.0
- 2.14.0
- 2.13.0
- 2.12.0
- 2.11.0
- 2.10.0
- 2.9.0
- 2.8.0
- 2.7.0
- 2.6.0
- 2.5.0
- 2.4.0
- 2.3.0
- 2.2.0
- 2.0.0-dev0
- 1.36.0
- 1.35.0
- 1.34.0
- 1.33.0
- 1.32.0
- 1.31.0
- 1.30.0
- 1.29.0
- 1.28.0
- 1.27.0
- 1.26.0
- 1.25.0
- 1.24.0
- 1.22.0
- 1.21.0
- 1.20.0
- 1.19.0
- 1.18.0
- 1.17.0
- 1.16.0
- 1.15.0
- 1.14.0
- 1.13.0
- 1.12.0
- 1.11.1
- 1.10.0
- 1.9.0
- 1.8.0
- 1.7.0
- 1.6.0
- 1.5.0
- 1.4.0
- 1.3.0
- 1.2.0
- 1.1.0
- 1.0.0
- 0.26.0
- 0.25.0
- 0.24.0
- 0.23.0
- 0.22.0
- 0.21.0
- 0.20.1
- 0.19.2
- 0.18.0
- 0.17.0
- 0.16.0
- 0.15.0
- 0.14.1
- 0.13.0
- 0.12.0
- 0.11.0
- 0.10.0
- 0.9.0
- 0.8.0
- 0.7.0
- 0.6.0
- 0.5.0
- 0.4.0
- 0.3.0
- 0.2.0
SeriesGroupBy
(
block
:
bigframes
.
core
.
blocks
.
Block
,
value_column
:
str
,
by_col_ids
:
typing
.
Sequence
[
str
],
value_name
:
typing
.
Hashable
=
None
,
dropna
=
True
,
)
Class for grouping and aggregating relational data.
Methods
agg
agg
(
func
=
None
,
)
-
> typing
.
Union
[
bigframes
.
dataframe
.
DataFrame
,
bigframes
.
series
.
Series
]
Aggregate using one or more operations.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> s = bpd.Series([1, 2, 3, 4], index=[1, 1, 2, 2])
>>> s.groupby(level=0).agg(['min', 'max'])
min max
1 1 2
2 3 4
<BLANKLINE>
[2 rows x 2 columns]
aggregate
aggregate
(
func
=
None
,
)
-
> typing
.
Union
[
bigframes
.
dataframe
.
DataFrame
,
bigframes
.
series
.
Series
]
Aggregate using one or more operations.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> s = bpd.Series([1, 2, 3, 4], index=[1, 1, 2, 2])
>>> s.groupby(level=0).aggregate(['min', 'max'])
min max
1 1 2
2 3 4
<BLANKLINE>
[2 rows x 2 columns]
all
all
()
-
> bigframes
.
series
.
Series
Return True if all values in the group are true, else False.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([1, 2, 0], index=lst)
>>> ser.groupby(level=0).all()
a True
b False
dtype: boolean
For DataFrameGroupBy:
>>> data = [[1, 0, 3], [1, 5, 6], [7, 8, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["ostrich", "penguin", "parrot"])
>>> df.groupby(by=["a"]).all()
b c
a
1 False True
7 True True
<BLANKLINE>
[2 rows x 2 columns]
any
any
()
-
> bigframes
.
series
.
Series
Return True if any value in the group is true, else False.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([1, 2, 0], index=lst)
>>> ser.groupby(level=0).any()
a True
b False
dtype: boolean
For DataFrameGroupBy:
>>> data = [[1, 0, 3], [1, 0, 6], [7, 1, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["ostrich", "penguin", "parrot"])
>>> df.groupby(by=["a"]).any()
b c
a
1 False True
7 True True
<BLANKLINE>
[2 rows x 2 columns]
count
count
()
-
> bigframes
.
series
.
Series
Compute count of group, excluding missing values.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([1, 2, np.nan], index=lst)
>>> ser.groupby(level=0).count()
a 2
b 0
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, np.nan, 3], [1, np.nan, 6], [7, 8, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["cow", "horse", "bull"])
>>> df.groupby(by=["a"]).count()
b c
a
1 0 2
7 1 1
<BLANKLINE>
[2 rows x 2 columns]
cumcount
cumcount
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Number each item in each group from 0 to the length of that group - 1.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b', 'c']
>>> ser = bpd.Series([5, 1, 2, 3, 4], index=lst)
>>> ser.groupby(level=0).cumcount()
a 0
a 1
b 0
b 1
c 0
dtype: Int64
>>> ser.groupby(level=0).cumcount(ascending=False)
a 0
a 1
b 0
b 1
c 0
dtype: Int64
ascending
bool, default True
If False, number in reverse, from length of group - 1 to 0.
cummax
cummax
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Cumulative max for each group.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([6, 2, 0], index=lst)
>>> ser.groupby(level=0).cummax()
a 6
a 6
b 0
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["fox", "gorilla", "lion"])
>>> df.groupby("a").cummax()
b c
fox 8 2
gorilla 8 5
lion 6 9
<BLANKLINE>
[3 rows x 2 columns]
cummin
cummin
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Cumulative min for each group.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([6, 2, 0], index=lst)
>>> ser.groupby(level=0).cummin()
a 6
a 2
b 0
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["fox", "gorilla", "lion"])
>>> df.groupby("a").cummin()
b c
fox 8 2
gorilla 2 2
lion 6 9
<BLANKLINE>
[3 rows x 2 columns]
cumprod
cumprod
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Cumulative product for each group.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([6, 2, 0], index=lst)
>>> ser.groupby(level=0).cumprod()
a 6.0
a 12.0
b 0.0
dtype: Float64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["cow", "horse", "bull"])
>>> df.groupby("a").cumprod()
b c
cow 8.0 2.0
horse 16.0 10.0
bull 6.0 9.0
<BLANKLINE>
[3 rows x 2 columns]
cumsum
cumsum
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Cumulative sum for each group.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([6, 2, 0], index=lst)
>>> ser.groupby(level=0).cumsum()
a 6
a 8
b 0
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["fox", "gorilla", "lion"])
>>> df.groupby("a").cumsum()
b c
fox 8 2
gorilla 10 7
lion 6 9
<BLANKLINE>
[3 rows x 2 columns]
diff
diff
(
periods
=
1
)
-
> bigframes
.
series
.
Series
First discrete difference of element. Calculates the difference of each element compared with another element in the group (default is element in previous row).
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
>>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst)
>>> ser.groupby(level=0).diff()
a <NA>
a -5
a 6
b <NA>
b -1
b 0
dtype: Int64
For DataFrameGroupBy:
>>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
>>> df = bpd.DataFrame(data, index=['dog', 'dog', 'dog',
... 'mouse', 'mouse', 'mouse', 'mouse'])
>>> df.groupby(level=0).diff()
a b
dog <NA> <NA>
dog 2 3
dog 2 4
mouse <NA> <NA>
mouse 0 0
mouse 1 -2
mouse -5 -1
<BLANKLINE>
[7 rows x 2 columns]
expanding
expanding
(
min_periods
:
int
=
1
)
-
> bigframes
.
core
.
window
.
rolling
.
Window
Provides expanding functionality.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'c', 'c', 'e']
>>> ser = bpd.Series([1, 0, -2, -1, 2], index=lst)
>>> ser.groupby(level=0).expanding().min()
index index
a a 1
a 0
c c -2
c -2
e e 2
dtype: Int64
head
head
(
n
:
int
=
5
)
-
> bigframes
.
series
.
Series
Return last first n rows of each group
Examples:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame([[1, 2], [1, 4], [5, 6]],
... columns=['A', 'B'])
>>> df.groupby('A').head(1)
A B
0 1 2
2 5 6
[2 rows x 2 columns]
n
int
If positive: number of entries to include from start of each group. If negative: number of entries to exclude from end of each group.
kurt
kurt
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Return unbiased kurtosis over requested axis.
Kurtosis obtained using Fisher's definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
Examples:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b']
>>> ser = bpd.Series([0, 1, 1, 0, 0, 1, 2, 4, 5], index=lst)
>>> ser.groupby(level=0).kurt()
a -6.0
b -1.963223
dtype: Float64
numeric_only
bool, default False
Include only float
, int
or boolean
data.
kurtosis
kurtosis
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Return unbiased kurtosis over requested axis.
Kurtosis obtained using Fisher's definition of kurtosis (kurtosis of normal == 0.0). Normalized by N-1.
Examples:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'a', 'b', 'b', 'b', 'b', 'b']
>>> ser = bpd.Series([0, 1, 1, 0, 0, 1, 2, 4, 5], index=lst)
>>> ser.groupby(level=0).kurtosis()
a -6.0
b -1.963223
dtype: Float64
numeric_only
bool, default False
Include only float
, int
or boolean
data.
max
max
(
*
args
)
-
> bigframes
.
series
.
Series
Compute max of group values.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b']
>>> ser = bpd.Series([1, 2, 3, 4], index=lst)
>>> ser.groupby(level=0).max()
a 2
b 4
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["tiger", "leopard", "cheetah", "lion"])
>>> df.groupby(by=["a"]).max()
b c
a
1 8 5
2 6 9
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float, int, boolean columns.
min_count
int, default 0
The required number of valid values to perform the operation. If fewer than min_count
and non-NA values are present, the result will be NA.
mean
mean
(
*
args
)
-
> bigframes
.
series
.
Series
Compute mean of groups, excluding missing values.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame({'A': [1, 1, 2, 1, 2],
... 'B': [np.nan, 2, 3, 4, 5],
... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
Groupby one column and return the mean of the remaining columns in each group.
>>> df.groupby('A').mean()
B C
A
1 3.0 1.333333
2 4.0 1.5
<BLANKLINE>
[2 rows x 2 columns]
Groupby two columns and return the mean of the remaining column.
>>> df.groupby(['A', 'B']).mean()
C
A B
1 2.0 2.0
4.0 1.0
2 3.0 1.0
5.0 2.0
<BLANKLINE>
[4 rows x 1 columns]
Groupby one column and return the mean of only particular column in the group.
>>> df.groupby('A')['B'].mean()
A
1 3.0
2 4.0
Name: B, dtype: Float64
numeric_only
bool, default False
Include only float, int, boolean columns.
median
median
(
*
args
,
exact
:
bool
=
True
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Compute median of groups, excluding missing values.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
>>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst)
>>> ser.groupby(level=0).median()
a 7.0
b 3.0
dtype: Float64
For DataFrameGroupBy:
>>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
>>> df = bpd.DataFrame(data, index=['dog', 'dog', 'dog',
... 'mouse', 'mouse', 'mouse', 'mouse'])
>>> df.groupby(level=0).median()
a b
dog 3.0 4.0
mouse 7.0 3.0
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float, int, boolean columns.
exact
bool, default True
Calculate the exact median instead of an approximation.
min
min
(
*
args
)
-
> bigframes
.
series
.
Series
Compute min of group values.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b']
>>> ser = bpd.Series([1, 2, 3, 4], index=lst)
>>> ser.groupby(level=0).min()
a 1
b 3
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["tiger", "leopard", "cheetah", "lion"])
>>> df.groupby(by=["a"]).min()
b c
a
1 2 2
2 5 8
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float, int, boolean columns.
min_count
int, default 0
The required number of valid values to perform the operation. If fewer than min_count
and non-NA values are present, the result will be NA.
nunique
nunique
()
-
> bigframes
.
series
.
Series
Return number of unique elements in the group.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b']
>>> ser = bpd.Series([1, 2, 3, 3], index=lst)
>>> ser.groupby(level=0).nunique()
a 2
b 1
dtype: Int64
prod
prod
(
*
args
)
-
> bigframes
.
series
.
Series
Compute prod of group values. (DataFrameGroupBy functionality is not yet available.)
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b']
>>> ser = bpd.Series([1, 2, 3, 4], index=lst)
>>> ser.groupby(level=0).prod()
a 2.0
b 12.0
dtype: Float64
numeric_only
bool, default False
Include only float, int, boolean columns.
min_count
int, default 0
The required number of valid values to perform the operation. If fewer than min_count
and non-NA values are present, the result will be NA.
quantile
quantile
(
q
:
typing
.
Union
[
float
,
typing
.
Sequence
[
float
]]
=
0.5
,
*
,
numeric_only
:
bool
=
False
)
-
> bigframes
.
series
.
Series
Return group values at the given quantile, a la numpy.percentile.
Examples:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame([
... ['a', 1], ['a', 2], ['a', 3],
... ['b', 1], ['b', 3], ['b', 5]
... ], columns=['key', 'val'])
>>> df.groupby('key').quantile()
val
key
a 2.0
b 3.0
<BLANKLINE>
[2 rows x 1 columns]
q
float or array-like, default 0.5 (50% quantile)
Value(s) between 0 and 1 providing the quantile(s) to compute.
numeric_only
bool, default False
Include only float
, int
or boolean
data.
rank
rank
(
method
=
"average"
,
ascending
:
bool
=
True
,
na_option
:
str
=
"keep"
)
-
> bigframes
.
series
.
Series
Provide the rank of values within each group.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> df = bpd.DataFrame(
... {
... "group": ["a", "a", "a", "a", "a", "b", "b", "b", "b", "b"],
... "value": [2, 4, 2, 3, 5, 1, 2, 4, 1, 5],
... }
... )
>>> df
group value
0 a 2
1 a 4
2 a 2
3 a 3
4 a 5
5 b 1
6 b 2
7 b 4
8 b 1
9 b 5
<BLANKLINE>
[10 rows x 2 columns]
>>> for method in ['average', 'min', 'max', 'dense', 'first']:
... df[f'{method}_rank'] = df.groupby('group')['value'].rank(method)
>>> df
group value average_rank min_rank max_rank dense_rank first_rank
0 a 2 1.5 1.0 2.0 1.0 1.0
1 a 4 4.0 4.0 4.0 3.0 4.0
2 a 2 1.5 1.0 2.0 1.0 2.0
3 a 3 3.0 3.0 3.0 2.0 3.0
4 a 5 5.0 5.0 5.0 4.0 5.0
5 b 1 1.5 1.0 2.0 1.0 1.0
6 b 2 3.0 3.0 3.0 2.0 3.0
7 b 4 4.0 4.0 4.0 3.0 4.0
8 b 1 1.5 1.0 2.0 1.0 2.0
9 b 5 5.0 5.0 5.0 4.0 5.0
<BLANKLINE>
[10 rows x 7 columns]
method
{'average', 'min', 'max', 'first', 'dense'}, default 'average'
- average: average rank of group. * min: lowest rank in group. * max: highest rank in group. * first: ranks assigned in order they appear in the array. * dense: like 'min', but rank always increases by 1 between groups.
ascending
bool, default True
False for ranks by high (1) to low (N).
na_option
{'keep', 'top', 'bottom'}, default 'keep'
- keep: leave NA values where they are. * top: smallest rank if ascending. * bottom: smallest rank if descending.
rolling
rolling
(
window
:
(
int
|
pandas
.
_libs
.
tslibs
.
timedeltas
.
Timedelta
|
numpy
.
timedelta64
|
datetime
.
timedelta
|
str
),
min_periods
=
None
,
closed
:
typing
.
Literal
[
"right"
,
"left"
,
"both"
,
"neither"
]
=
"right"
,
)
-
> bigframes
.
core
.
window
.
rolling
.
Window
Returns a rolling grouper, providing rolling functionality per group.
Examples:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'a', 'e']
>>> ser = bpd.Series([1, 0, -2, -1, 2], index=lst)
>>> ser.groupby(level=0).rolling(2).min()
index index
a a <NA>
a 0
a -2
a -2
e e <NA>
dtype: Int64
window
int, pandas.Timedelta, numpy.timedelta64, datetime.timedelta, str
Size of the moving window. If an integer, the fixed number of observations used for each window. If a string, the timedelta representation in string. This string must be parsable by pandas.Timedelta(). Otherwise, the time range for each window.
min_periods
int, default None
Minimum number of observations in window required to have a value; otherwise, result is np.nan
. For a window that is specified by an integer, min_periods
will default to the size of the window. For a window that is not spicified by an interger, min_periods
will default to 1.
on
str, optional
For a DataFrame, a column label on which to calculate the rolling window, rather than the DataFrame’s index.
closed
str, default 'right'
If 'right', the first point in the window is excluded from calculations. If 'left', the last point in the window is excluded from calculations. If 'both', the no points in the window are excluded from calculations. If 'neither', the first and last points in the window are excluded from calculations.
shift
shift
(
periods
=
1
)
-
> bigframes
.
series
.
Series
Shift index by desired number of periods.
size
size
()
-
> bigframes
.
series
.
Series
Compute group sizes.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b']
>>> ser = bpd.Series([1, 2, 3], index=lst)
>>> ser
a 1
a 2
b 3
dtype: Int64
>>> ser.groupby(level=0).size()
a 2
b 1
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["owl", "toucan", "eagle"])
>>> df
a b c
owl 1 2 3
toucan 1 5 6
eagle 7 8 9
[3 rows x 3 columns]
>>> df.groupby("a").size()
a
1 2
7 1
dtype: Int64
skew
skew
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Return unbiased skew within groups.
Normalized by N-1.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> ser = bpd.Series([390., 350., 357., np.nan, 22., 20., 30.],
... index=['Falcon', 'Falcon', 'Falcon', 'Falcon',
... 'Parrot', 'Parrot', 'Parrot'],
... name="Max Speed")
>>> ser.groupby(level=0).skew()
Falcon 1.525174
Parrot 1.457863
Name: Max Speed, dtype: Float64
numeric_only
bool, default False
Include only float
, int
or boolean
data.
std
std
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Compute standard deviation of groups, excluding missing values.
For multiple groupings, the result index will be a MultiIndex.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
>>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst)
>>> ser.groupby(level=0).std()
a 3.21455
b 0.57735
dtype: Float64
For DataFrameGroupBy:
>>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
>>> df = bpd.DataFrame(data, index=['dog', 'dog', 'dog',
... 'mouse', 'mouse', 'mouse', 'mouse'])
>>> df.groupby(level=0).std()
a b
dog 2.0 3.511885
mouse 2.217356 1.5
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float
, int
or boolean
data.
sum
sum
(
*
args
)
-
> bigframes
.
series
.
Series
Compute sum of group values.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'b', 'b']
>>> ser = bpd.Series([1, 2, 3, 4], index=lst)
>>> ser.groupby(level=0).sum()
a 3
b 7
dtype: Int64
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
>>> df = bpd.DataFrame(data, columns=["a", "b", "c"],
... index=["tiger", "leopard", "cheetah", "lion"])
>>> df.groupby("a").sum()
b c
a
1 10 7
2 11 17
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float, int, boolean columns.
min_count
int, default 0
The required number of valid values to perform the operation. If fewer than min_count
and non-NA values are present, the result will be NA.
var
var
(
*
args
,
**
kwargs
)
-
> bigframes
.
series
.
Series
Compute variance of groups, excluding missing values.
For multiple groupings, the result index will be a MultiIndex.
Examples:
For SeriesGroupBy:
>>> import bigframes.pandas as bpd
>>> import numpy as np
>>> bpd.options.display.progress_bar = None
>>> lst = ['a', 'a', 'a', 'b', 'b', 'b']
>>> ser = bpd.Series([7, 2, 8, 4, 3, 3], index=lst)
>>> ser.groupby(level=0).var()
a 10.333333
b 0.333333
dtype: Float64
For DataFrameGroupBy:
>>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]}
>>> df = bpd.DataFrame(data, index=['dog', 'dog', 'dog',
... 'mouse', 'mouse', 'mouse', 'mouse'])
>>> df.groupby(level=0).var()
a b
dog 4.0 12.333333
mouse 4.916667 2.25
<BLANKLINE>
[2 rows x 2 columns]
numeric_only
bool, default False
Include only float
, int
or boolean
data.