"""Binary encoding"""
from functools import partialmethod
from category_encoders import utils
from category_encoders.basen import BaseNEncoder
__author__ = 'willmcginnis'
[docs]class BinaryEncoder(BaseNEncoder):
"""Binary encoding for categorical variables, similar to onehot, but stores categories as binary bitstrings.
Parameters
----------
verbose: int
integer indicating verbosity of the output. 0 for none.
cols: list
a list of columns to encode, if None, all string columns will be encoded.
drop_invariant: bool
boolean for whether or not to drop columns with 0 variance.
return_df: bool
boolean for whether to return a pandas DataFrame from transform (otherwise it will be a numpy array).
handle_unknown: str
options are 'error', 'return_nan', 'value', and 'indicator'. The default is 'value'. Warning: if indicator is used,
an extra column will be added in if the transform matrix has unknown categories. This can cause
unexpected changes in dimension in some cases.
handle_missing: str
options are 'error', 'return_nan', 'value', and 'indicator'. The default is 'value'. Warning: if indicator is used,
an extra column will be added in if the transform matrix has nan values. This can cause
unexpected changes in dimension in some cases.
Example
-------
>>> from category_encoders import *
>>> import pandas as pd
>>> from sklearn.datasets import load_boston
>>> bunch = load_boston()
>>> y = bunch.target
>>> X = pd.DataFrame(bunch.data, columns=bunch.feature_names)
>>> enc = BinaryEncoder(cols=['CHAS', 'RAD']).fit(X, y)
>>> numeric_dataset = enc.transform(X)
>>> print(numeric_dataset.info())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 18 columns):
CRIM 506 non-null float64
ZN 506 non-null float64
INDUS 506 non-null float64
CHAS_0 506 non-null int64
CHAS_1 506 non-null int64
NOX 506 non-null float64
RM 506 non-null float64
AGE 506 non-null float64
DIS 506 non-null float64
RAD_0 506 non-null int64
RAD_1 506 non-null int64
RAD_2 506 non-null int64
RAD_3 506 non-null int64
RAD_4 506 non-null int64
TAX 506 non-null float64
PTRATIO 506 non-null float64
B 506 non-null float64
LSTAT 506 non-null float64
dtypes: float64(11), int64(7)
memory usage: 71.3 KB
None
"""
encoding_relation = utils.EncodingRelation.ONE_TO_M
__init__ = partialmethod(BaseNEncoder.__init__, base=2)