# biome.text.data.helpers Module

# configure_dask_cluster Function


def configure_dask_cluster (
  address: str = 'local',
  n_workers: int = 1,
  worker_memory: Union[str, int] = '1GB',
)  -> Union[distributed.client.Client, NoneType]

Creates a dask client (with a LocalCluster if needed)

Parameters

address
The cluster address. If "local" try to connect to a local cluster listening the 8786 port. If no cluster listening, creates a new LocalCluster
n_workers
The number of cluster workers (only a new "local" cluster generation)
worker_memory
The memory reserved for local workers

Returns

A new dask Client
 

# close_dask_client Function


def close_dask_client()

# extension_from_path Function


def extension_from_path(path: Union[str, List[str]]) -> str

Helper method to get file extension

Parameters

path
A string or a list of strings. If it is a list, the first entry is taken.

Returns

extension
File extension

# make_paths_relative Function


def make_paths_relative (
  yaml_dirname: str,
  cfg_dict: Dict,
  path_keys: List[str] = None,
) 

Helper method to convert file system paths relative to the yaml config file, to paths relative to the current path.

It will recursively cycle through cfg_dict if it is nested.

Parameters

yaml_dirname
Dirname to the yaml config file (as obtained by os.path.dirname.
cfg_dict
The config dictionary extracted from the yaml file.
path_keys
If not None, it will only try to modify the cfg_dict values corresponding to the path_keys.

# is_relative_file_system_path Function


def is_relative_file_system_path(string: str) -> bool

Helper method to check if a string is a relative file system path.

Parameters

string
The string to be checked.

Returns

bool
Whether the string is a relative file system path or not. If string is not type(str), return False.

# flatten_dask_dataframe Function


def flatten_dask_dataframe(data_frame: dask.dataframe.core.DataFrame) -> dask.dataframe.core.DataFrame

Flatten an dataframe adding nested values as new columns and dropping the old ones Parameters


data_frame
The original dask DataFrame

Returns

A new Dataframe with flatten content
 

# flatten_dataframe Function


def flatten_dataframe(data_frame: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame

Maintained by