Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 47 additions & 33 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,34 +1,48 @@
repos:
- repo: https://github.com/timothycrosley/isort
rev: 5.9.3
hooks:
- id: isort
additional_dependencies: [toml]
exclude: examples/*
- repo: https://github.com/python/black
rev: 22.3.0
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
- repo: https://github.com/pycqa/pylint
rev: pylint-2.7.4
hooks:
- id: pylint
- repo: https://github.com/econchick/interrogate
rev: 1.5.0
hooks:
- id: interrogate
exclude: ^(build|docs|merlin/io|tests|setup.py|versioneer.py)
args: [--config=pyproject.toml]
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
- repo: https://github.com/PyCQA/bandit
rev: 1.7.0
hooks:
- id: bandit
args: [--verbose, -ll, -x, tests,examples,bench]
# imports
- repo: https://github.com/MarcoGorelli/absolufy-imports
rev: v0.3.1
hooks:
- id: absolufy-imports
- repo: https://github.com/timothycrosley/isort
rev: 5.9.3
hooks:
- id: isort
additional_dependencies: [toml]
exclude: examples/*
# code style
- repo: https://github.com/python/black
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/pycqa/pylint
rev: pylint-2.7.4
hooks:
- id: pylint
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
# notebooks
- repo: https://github.com/s-weigand/flake8-nb
rev: v0.3.0
hooks:
- id: flake8-nb
files: \.ipynb$
# documentation
- repo: https://github.com/econchick/interrogate
rev: 1.5.0
hooks:
- id: interrogate
exclude: ^(build|docs|merlin/io|tests|setup.py|versioneer.py)
args: [--config=pyproject.toml]
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
# security
- repo: https://github.com/PyCQA/bandit
rev: 1.7.0
hooks:
- id: bandit
args: [--verbose, -ll, -x, tests,examples,bench]
2 changes: 1 addition & 1 deletion merlin/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# limitations under the License.
#

from ._version import get_versions
from merlin.core._version import get_versions

__version__ = get_versions()["version"]
del get_versions
8 changes: 4 additions & 4 deletions merlin/dag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#

# flake8: noqa
from .base_operator import BaseOperator, Supports
from .graph import Graph
from .node import Node, iter_nodes, postorder_iter_nodes, preorder_iter_nodes
from .selector import ColumnSelector
from merlin.dag.base_operator import BaseOperator, Supports
from merlin.dag.graph import Graph
from merlin.dag.node import Node, iter_nodes, postorder_iter_nodes, preorder_iter_nodes
from merlin.dag.selector import ColumnSelector
8 changes: 4 additions & 4 deletions merlin/dag/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#
# alias submodules here to avoid breaking everything with moving to submodules
# flake8: noqa
from .concat_columns import ConcatColumns
from .selection import SelectionOp
from .subset_columns import SubsetColumns
from .subtraction import SubtractionOp
from merlin.dag.ops.concat_columns import ConcatColumns
from merlin.dag.ops.selection import SelectionOp
from merlin.dag.ops.subset_columns import SubsetColumns
from merlin.dag.ops.subtraction import SubtractionOp
8 changes: 4 additions & 4 deletions merlin/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#

# flake8: noqa
from . import dataframe_iter, dataset, shuffle
from .dataframe_iter import DataFrameIter
from .dataset import Dataset
from .shuffle import Shuffle, shuffle_df
from merlin.io import dataframe_iter, dataset, shuffle
from merlin.io.dataframe_iter import DataFrameIter
from merlin.io.dataset import Dataset
from merlin.io.shuffle import Shuffle, shuffle_df
3 changes: 2 additions & 1 deletion merlin/io/avro.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from dask.base import tokenize
from dask.dataframe.core import new_dd_object

from .dataset_engine import DatasetEngine
from merlin.io.dataset_engine import DatasetEngine


class AvroDatasetEngine(DatasetEngine):
Expand All @@ -31,6 +31,7 @@ class AvroDatasetEngine(DatasetEngine):
"""

def __init__(self, paths, part_size, storage_options=None, cpu=False, **kwargs):
# pylint: disable=access-member-before-definition
super().__init__(paths, part_size, storage_options=storage_options, cpu=cpu)
if kwargs != {}:
raise ValueError("Unexpected AvroDatasetEngine argument(s).")
Expand Down
3 changes: 2 additions & 1 deletion merlin/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from fsspec.core import get_fs_token_paths
from fsspec.utils import infer_compression

from .dataset_engine import DatasetEngine
from merlin.io.dataset_engine import DatasetEngine


class CSVDatasetEngine(DatasetEngine):
Expand All @@ -37,6 +37,7 @@ class CSVDatasetEngine(DatasetEngine):
"""

def __init__(self, paths, part_size, storage_options=None, cpu=False, **kwargs):
# pylint: disable=access-member-before-definition
super().__init__(paths, part_size, cpu=cpu, storage_options=storage_options)
self._meta = {}
self.csv_kwargs = kwargs
Expand Down
5 changes: 2 additions & 3 deletions merlin/io/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,9 @@

from merlin.core.dispatch import annotate
from merlin.core.utils import ensure_optimize_dataframe_graph, global_dask_client
from merlin.io.shuffle import Shuffle
from merlin.io.worker import clean_worker_cache, get_worker_cache

from .shuffle import Shuffle
from .writer_factory import _writer_cls_factory, writer_factory
from merlin.io.writer_factory import _writer_cls_factory, writer_factory


class DaskSubgraph:
Expand Down
2 changes: 1 addition & 1 deletion merlin/io/dataframe_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from dask.dataframe.core import new_dd_object
from dask.highlevelgraph import HighLevelGraph

from .dataset_engine import DatasetEngine
from merlin.io.dataset_engine import DatasetEngine


class DataFrameDatasetEngine(DatasetEngine):
Expand Down
11 changes: 5 additions & 6 deletions merlin/io/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,15 @@
import merlin.core.dispatch as dispatch
from merlin.core.dispatch import convert_data, hex_to_int, is_dataframe_object
from merlin.core.utils import device_mem_size, global_dask_client, set_client_deprecated
from merlin.io.csv import CSVDatasetEngine
from merlin.io.dask import _ddf_to_dataset, _simple_shuffle
from merlin.io.dataframe_engine import DataFrameDatasetEngine
from merlin.io.dataframe_iter import DataFrameIter
from merlin.io.parquet import ParquetDatasetEngine
from merlin.io.shuffle import _check_shuffle_arg
from merlin.schema import ColumnSchema, Schema
from merlin.schema.io.tensorflow_metadata import TensorflowMetadata

from .csv import CSVDatasetEngine
from .dask import _ddf_to_dataset, _simple_shuffle
from .dataframe_engine import DataFrameDatasetEngine
from .parquet import ParquetDatasetEngine

try:
import cudf
except ImportError:
Expand Down Expand Up @@ -310,7 +309,7 @@ def __init__(
)
elif engine == "avro":
try:
from .avro import AvroDatasetEngine
from merlin.io.avro import AvroDatasetEngine
except ImportError as e:
raise RuntimeError(
"Failed to import AvroDatasetEngine. Make sure uavro is installed."
Expand Down
2 changes: 1 addition & 1 deletion merlin/io/hugectr.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import numpy as np

from .writer import ThreadedWriter
from merlin.io.writer import ThreadedWriter


class HugeCTRWriter(ThreadedWriter):
Expand Down
9 changes: 4 additions & 5 deletions merlin/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,10 @@
aggregate_row_groups = None

from merlin.core.utils import run_on_worker

from .dataset_engine import DatasetEngine
from .fsspec_utils import _optimized_read_partition_remote, _optimized_read_remote
from .shuffle import Shuffle, shuffle_df
from .writer import ThreadedWriter
from merlin.io.dataset_engine import DatasetEngine
from merlin.io.fsspec_utils import _optimized_read_partition_remote, _optimized_read_remote
from merlin.io.shuffle import Shuffle, shuffle_df
from merlin.io.writer import ThreadedWriter

LOG = logging.getLogger("merlin")

Expand Down
3 changes: 1 addition & 2 deletions merlin/io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@
from fsspec.core import get_fs_token_paths

from merlin.core.dispatch import annotate

from .shuffle import shuffle_df
from merlin.io.shuffle import shuffle_df


class Writer:
Expand Down
4 changes: 2 additions & 2 deletions merlin/io/writer_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#
from fsspec.core import get_fs_token_paths

from .hugectr import HugeCTRWriter
from .parquet import CPUParquetWriter, GPUParquetWriter
from merlin.io.hugectr import HugeCTRWriter
from merlin.io.parquet import CPUParquetWriter, GPUParquetWriter


def writer_factory(
Expand Down
4 changes: 2 additions & 2 deletions merlin/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
#

# flake8: noqa
from .schema import ColumnSchema, Schema
from .tags import Tags, TagSet, TagsType
from merlin.schema.schema import ColumnSchema, Schema
from merlin.schema.tags import Tags, TagSet, TagsType
14 changes: 7 additions & 7 deletions merlin/schema/io/tensorflow_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
import fsspec
import numpy

from ..schema import ColumnSchema
from ..schema import Schema as MerlinSchema
from ..tags import Tags
from . import proto_utils, schema_bp
from .schema_bp import Feature, FeatureType, FixedShape, FloatDomain, IntDomain
from .schema_bp import Schema as ProtoSchema
from .schema_bp import ValueCount
from merlin.schema.io import proto_utils, schema_bp
from merlin.schema.io.schema_bp import Feature, FeatureType, FixedShape, FloatDomain, IntDomain
from merlin.schema.io.schema_bp import Schema as ProtoSchema
from merlin.schema.io.schema_bp import ValueCount
from merlin.schema.schema import ColumnSchema
from merlin.schema.schema import Schema as MerlinSchema
from merlin.schema.tags import Tags

DOMAIN_ATTRS = {FeatureType.INT: "int_domain", FeatureType.FLOAT: "float_domain"}
FEATURE_TYPES = {
Expand Down
2 changes: 1 addition & 1 deletion merlin/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import pandas as pd

from .tags import Tags, TagSet
from merlin.schema.tags import Tags, TagSet


@dataclass(frozen=True)
Expand Down