Source code for imputena.deletion.delete_pairwise

import pandas as pd


[docs]def delete_pairwise(data=None, columns=None, threshold=None, inplace=False): """Performs pairwise deletion on the data: Drops any rows that contain NA values in any of the specified columns. If a threshold is given, the function drops those rows which have less non-NA values in the specified columns. :param data: The data on which to perform the pairwise deletion of missing values. :type data: pandas.DataFrame :param columns: rows will be dropped if any of their value in any of those columns is NA. :type columns: array-like :param threshold: Require that many non-NA values in the specified columns :type threshold: int, optional :param inplace: If True, do operation inplace and return None. :type inplace: bool, default False :return: The dataframe with all rows containing NA in one or more of the specified columns eliminated or None if inplace=True. :rtype: pandas.DataFrame or None :raises: TypeError, ValueError """ # Check that data is a Dataframe: if not isinstance(data, pd.DataFrame): raise TypeError('The data has to be a DataFrame.') # Check that each of the given columns is actually a column of data: for column in columns: if column not in data.columns: raise ValueError('\'' + column + '\' is not a column of the data.') # Apply the pairwise deletion and return the result, or None if inplace: if inplace: data.dropna(inplace=True, thresh=threshold, subset=columns) return None else: return data.dropna(thresh=threshold, subset=columns)