# biome.text.data.readers Module

# from_csv Function


def from_csv (
  path: Union[str, List[str]],
  **params,
)  -> dask.dataframe.core.DataFrame

Creates a dd.DataFrame from one or several csv files.

Includes a "path column".

Parameters

path
Path to files
**params
Extra arguments passed on to dask.dataframe.read_csv

Returns

dataframe
A dd.DataFrame

# from_json Function


def from_json (
  path: Union[str, List[str]],
  flatten: bool = False,
  **params,
)  -> dask.dataframe.core.DataFrame

Creates a dd.DataFrame from one or several json files.

Includes a "path column".

Parameters

path
Path to files
flatten
If true, flatten nested data (default false).
**params
Extra arguments passed on to pandas.read_json

Returns

dataframe
A dd.DataFrame

# from_parquet Function


def from_parquet (
  path: Union[str, List[str]],
  **params,
)  -> dask.dataframe.core.DataFrame

Creates a dd.DataFrame from one or several parquet files.

Includes a "path column".

Parameters

path
Path to files
**params
Extra arguments passed on to pandas.read_parquet

Returns

df
A dd.DataFrame

# from_excel Function


def from_excel (
  path: Union[str, List[str]],
  **params,
)  -> dask.dataframe.core.DataFrame

Creates a dask.dataframe.DataFrame from one or several excel files. Includes a "path column".

Parameters

path
Path to files
params
Extra arguments passed on to pandas.read_excel

Returns

df
A dask.dataframe.DataFrame

# DataFrameReader Class


class DataFrameReader ()

A base class for read :class:dask.dataframe.DataFrame

# Subclasses

# read Static method


def read (
  source: Union[str, List[str]],
  **kwargs,
)  -> dask.dataframe.core.DataFrame

Base class method for read the DataSources as a :class:dask.dataframe.DataFrame

Parameters

source: The source information. **kwargs: extra arguments passed to read method. Each reader should declare needed arguments

Returns

A :class:dask.dataframe.DataFrame read from source
 

# ElasticsearchDataFrameReader Class


class ElasticsearchDataFrameReader ()

Read a :class:dask.dataframe.DataFrame from a elasticsearch index

# Ancestors

# read Static method


def read (
  source: Union[str, List[str]],
  index: str,
  doc_type: str = '_doc',
  query: Union[dict, NoneType] = None,
  es_host: str = 'http://localhost:9200',
  flatten_content: bool = False,
  **kwargs,
)  -> dask.dataframe.core.DataFrame

Creates a :class:dask.dataframe.DataFrame from a elasticsearch indexes

Parameters

source
The source param must match with :class:ElasticsearchDataFrameReader.SOURCE_TYPE
es_host
The elasticsearch host url (default to "http://localhost:9200")
index
The elasticsearch index
doc_type
The elasticsearch document type (default to "_doc")
query
The index query applied for extract the data
flatten_content
If True, applies a flatten to all nested data. It may take time to apply this flatten, so is deactivate by default.
kwargs
Extra arguments passed to base search method

Returns

A :class:dask.dataframe.DataFrame with index query results
 
Maintained by