Skip to content
This repository was archived by the owner on Aug 17, 2023. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
.metaparticle
*.bak
.vscode

.idea
.env
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
2 changes: 1 addition & 1 deletion kubeflow/fairing/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
KFSERVING_GROUP = "serving.kubeflow.org"
KFSERVING_KIND = "InferenceService"
KFSERVING_PLURAL = "inferenceservices"
KFSERVING_VERSION = 'v1alpha2'
KFSERVING_VERSION = 'v1beta1'
KFSERVING_DEFAULT_NAME = 'fairing-kfserving-'
KFSERVING_DEPLOYER_TYPE = 'kfservice'
KFSERVING_CONTAINER_NAME = 'user-container'
Expand Down
107 changes: 35 additions & 72 deletions kubeflow/fairing/deployers/kfserving/kfserving.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import uuid
import logging
import uuid

from kubernetes import client as k8s_client

from kfserving import V1alpha2EndpointSpec
from kfserving import V1alpha2PredictorSpec
from kfserving import V1alpha2TensorflowSpec
from kfserving import V1alpha2ONNXSpec
from kfserving import V1alpha2PyTorchSpec
from kfserving import V1alpha2SKLearnSpec
from kfserving import V1alpha2TritonSpec
from kfserving import V1alpha2XGBoostSpec
from kfserving import V1alpha2CustomSpec
from kfserving import V1alpha2InferenceServiceSpec
from kfserving import V1alpha2InferenceService

from kfserving import V1beta1InferenceService
from kfserving import V1beta1InferenceServiceSpec
from kfserving import V1beta1PredictorSpec
from kfserving import V1beta1SKLearnSpec
from kfserving import V1beta1TFServingSpec
from kfserving import V1beta1TorchServeSpec
from kfserving import V1beta1TritonSpec
from kfserving import V1beta1XGBoostSpec
from kubeflow.fairing import utils
from kubeflow.fairing.constants import constants
from kubeflow.fairing.deployers.deployer import DeployerInterface
from kubeflow.fairing.kubernetes.manager import KubeManager
from kubeflow.fairing import utils
from kubernetes import client as k8s_client

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Expand All @@ -41,7 +36,7 @@
class KFServing(DeployerInterface):
"""Serves a prediction endpoint using Kubeflow KFServing."""

def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None,
def __init__(self, framework, default_storage_uri=None,
canary_traffic_percent=0, namespace=None, labels=None, annotations=None,
custom_default_container=None, custom_canary_container=None,
isvc_name=None, stream_log=False, cleanup=False, config_file=None,
Expand All @@ -50,7 +45,6 @@ def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None,
:param framework: The framework for the InferenceService, such as Tensorflow,
XGBoost and ScikitLearn etc.
:param default_storage_uri: URI pointing to Saved Model assets for default service.
:param canary_storage_uri: URI pointing to Saved Model assets for canary service.
:param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
:param namespace: The k8s namespace where the InferenceService will be deployed.
:param labels: Labels for the InferenceService, separate with commas if have more than one.
Expand All @@ -73,7 +67,6 @@ def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None,
self.framework = framework
self.isvc_name = isvc_name
self.default_storage_uri = default_storage_uri
self.canary_storage_uri = canary_storage_uri
self.canary_traffic_percent = canary_traffic_percent
self.annotations = annotations
self.set_labels(labels)
Expand Down Expand Up @@ -135,68 +128,38 @@ def deploy(self, isvc): # pylint:disable=arguments-differ,unused-argument

def generate_isvc(self):
""" generate InferenceService """

api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
default_predictor, canary_predictor = None, None

if self.framework == 'custom':
default_predictor = self.generate_predictor_spec(
self.framework, container=self.custom_default_container)
else:
default_predictor = self.generate_predictor_spec(
self.framework, storage_uri=self.default_storage_uri)

if self.framework != 'custom' and self.canary_storage_uri is not None:
canary_predictor = self.generate_predictor_spec(
self.framework, storage_uri=self.canary_storage_uri)
if self.framework == 'custom' and self.custom_canary_container is not None:
canary_predictor = self.generate_predictor_spec(
self.framework, container=self.custom_canary_container)

if canary_predictor:
isvc_spec = V1alpha2InferenceServiceSpec(
default=V1alpha2EndpointSpec(predictor=default_predictor),
canary=V1alpha2EndpointSpec(predictor=canary_predictor),
canary_traffic_percent=self.canary_traffic_percent)
else:
isvc_spec = V1alpha2InferenceServiceSpec(
default=V1alpha2EndpointSpec(predictor=default_predictor),
canary_traffic_percent=self.canary_traffic_percent)

return V1alpha2InferenceService(api_version=api_version,
kind=constants.KFSERVING_KIND,
metadata=k8s_client.V1ObjectMeta(
name=self.isvc_name,
generate_name=constants.KFSERVING_DEFAULT_NAME,
namespace=self.namespace),
spec=isvc_spec)


def generate_predictor_spec(self, framework, storage_uri=None, container=None):
default_predictor = self.generate_predictor_spec(
self.framework, storage_uri=self.default_storage_uri)
return V1beta1InferenceService(api_version=api_version,
kind=constants.KFSERVING_KIND,
metadata=k8s_client.V1ObjectMeta(
name=self.isvc_name,
generate_name=constants.KFSERVING_DEFAULT_NAME,
namespace=self.namespace),
spec=V1beta1InferenceServiceSpec(
predictor=default_predictor
))

def generate_predictor_spec(self, framework, storage_uri=None):
'''Generate predictor spec according to framework and
default_storage_uri or custom container.
'''
if self.framework == 'tensorflow':
predictor = V1alpha2PredictorSpec(
tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))
elif self.framework == 'onnx':
predictor = V1alpha2PredictorSpec(
onnx=V1alpha2ONNXSpec(storage_uri=storage_uri))
predictor = V1beta1PredictorSpec(
tensorflow=V1beta1TFServingSpec(storage_uri=storage_uri))
elif self.framework == 'pytorch':
predictor = V1alpha2PredictorSpec(
pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri))
predictor = V1beta1PredictorSpec(
pytorch=V1beta1TorchServeSpec(storage_uri=storage_uri))
elif self.framework == 'sklearn':
predictor = V1alpha2PredictorSpec(
sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri))
predictor = V1beta1PredictorSpec(
sklearn=V1beta1SKLearnSpec(storage_uri=storage_uri))
elif self.framework == 'triton':
predictor = V1alpha2PredictorSpec(
triton=V1alpha2TritonSpec(storage_uri=storage_uri))
predictor = V1beta1PredictorSpec(
triton=V1beta1TritonSpec(storage_uri=storage_uri))
elif self.framework == 'xgboost':
predictor = V1alpha2PredictorSpec(
xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri))
elif self.framework == 'custom':
predictor = V1alpha2PredictorSpec(
custom=V1alpha2CustomSpec(container=container))
predictor = V1beta1PredictorSpec(
xgboost=V1beta1XGBoostSpec(storage_uri=storage_uri))
else:
raise RuntimeError("Unsupported framework {}".format(framework))
return predictor
Expand Down
14 changes: 7 additions & 7 deletions kubeflow/fairing/deployers/pytorchjob/pytorchjob.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import logging
from kubernetes import client as k8s_client

from kubeflow.pytorchjob import V1ReplicaSpec
from kubeflow.pytorchjob import V1PyTorchJob
from kubeflow.pytorchjob import V1PyTorchJobSpec

from kubeflow.fairing.constants import constants
from kubeflow.fairing.deployers.job.job import Job
from kubeflow.training import V1PyTorchJob
from kubeflow.training import V1PyTorchJobSpec
from kubeflow.training import V1ReplicaSpec, V1RunPolicy
from kubernetes import client as k8s_client

logger = logging.getLogger(__name__)


class PyTorchJob(Job):
""" Handle all the k8s' template building to create pytorch
training job using Kubeflow PyTorch Operator"""

def __init__(self, namespace=None, master_count=1, worker_count=1,
runs=1, job_name=None, stream_log=True, labels=None,
pod_spec_mutators=None, cleanup=False, annotations=None,
Expand Down Expand Up @@ -81,12 +81,12 @@ def generate_deployment_spec(self, pod_template_spec):

pytorchjob = V1PyTorchJob(
api_version=constants.PYTORCH_JOB_GROUP + "/" + \
constants.PYTORCH_JOB_VERSION,
constants.PYTORCH_JOB_VERSION,
kind=constants.PYTORCH_JOB_KIND,
metadata=k8s_client.V1ObjectMeta(name=self.job_name,
generate_name=constants.PYTORCH_JOB_DEFAULT_NAME,
labels=self.labels),
spec=V1PyTorchJobSpec(pytorch_replica_specs=pytorch_replica_specs)
spec=V1PyTorchJobSpec(pytorch_replica_specs=pytorch_replica_specs, run_policy=V1RunPolicy(clean_pod_policy="None"))
)

return pytorchjob
Expand Down
14 changes: 8 additions & 6 deletions kubeflow/fairing/deployers/tfjob/tfjob.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
import logging
import copy
from kubernetes import client as k8s_client
import logging

from kubeflow.tfjob import V1ReplicaSpec
from kubeflow.tfjob import V1TFJob
from kubeflow.tfjob import V1TFJobSpec
from kubernetes import client as k8s_client

from kubeflow.fairing.constants import constants
from kubeflow.fairing.deployers.job.job import Job
from kubeflow.training import V1ReplicaSpec, V1RunPolicy
from kubeflow.training import V1TFJob
from kubeflow.training import V1TFJobSpec

logger = logging.getLogger(__name__)


class TfJob(Job):
""" Handle all the k8s' template building to create tensorflow
training job using Kubeflow TFOperator"""

def __init__(self, namespace=None, worker_count=1, ps_count=0,
chief_count=0, runs=1, job_name=None, stream_log=True,
labels=None, pod_spec_mutators=None, cleanup=False, annotations=None,
Expand Down Expand Up @@ -90,7 +91,8 @@ def generate_deployment_spec(self, pod_template_spec):
metadata=k8s_client.V1ObjectMeta(name=self.job_name,
generate_name=constants.TF_JOB_DEFAULT_NAME,
labels=self.labels),
spec=V1TFJobSpec(tf_replica_specs=tf_replica_specs)
spec=V1TFJobSpec(tf_replica_specs=tf_replica_specs, run_policy=V1RunPolicy(clean_pod_policy="None"))

)

return tfjob
Expand Down
4 changes: 2 additions & 2 deletions kubeflow/fairing/kubernetes/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from kubernetes import client, config, watch
from kfserving import KFServingClient

from kubeflow.tfjob import TFJobClient
from kubeflow.pytorchjob import PyTorchJobClient
from kubeflow.training import TFJobClient
from kubeflow.training import PyTorchJobClient

from kubeflow.fairing.utils import is_running_in_k8s, camel_to_snake
from kubeflow.fairing.constants import constants
Expand Down
14 changes: 6 additions & 8 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
python-dateutil>=2.1,<=2.8.0
numpy>=1.17.3
kfserving>=0.3.0.2
ray[serve]==1.5.0
kfserving>=0.6.1
docker>=3.4.1
notebook>=5.6.0
kubernetes==10.0.1
kubernetes>=12.0.0
future>=0.17.1
six>=1.11.0
google-cloud-storage>=1.13.2
google-cloud-logging>=1.13.0
requests>=2.21.0,<2.23
requests>=2.21.0
setuptools>=34.0.0
google-auth>=1.6.2
httplib2>=0.12.0
oauth2client>=4.0.0
tornado>=6.0.1
google-api-python-client>=1.7.8
cloudpickle>=0.8,<=1.4.1
urllib3==1.24.2
boto3>=1.9.0
azure-storage-file>=2.1.0
azure-mgmt-storage>=9.0.0
retrying>=1.3.3
kubeflow-tfjob>=0.1.1
kubeflow-pytorchjob>=0.1.1
kubeflow-training>=1.3.0
ibm-cos-sdk>=2.6.0
grpcio>=1.27.2
nbconvert>=5.6.1
msrestazure>=0.6.4
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.9',
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
'Topic :: Scientific/Engineering',
Expand Down