NVIDIA
diff --git a/‎.readthedocs.yaml‎
Lines changed: 2 additions & 2 deletions b/‎.readthedocs.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎build_doc.sh‎
Lines changed: 1 addition & 1 deletion b/‎build_doc.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/advanced/vertical_xgboost/README.md‎
Lines changed: 1 addition & 1 deletion b/‎examples/advanced/vertical_xgboost/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/advanced/vertical_xgboost/code/vertical_xgb/vertical_data_loader.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/advanced/vertical_xgboost/code/vertical_xgb/vertical_data_loader.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/advanced/vertical_xgboost/requirements.txt‎
Lines changed: 7 additions & 3 deletions b/‎examples/advanced/vertical_xgboost/requirements.txt‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json‎
Lines changed: 0 additions & 3 deletions b/‎examples/advanced/xgboost/histogram-based/jobs/base/app/config/config_fed_server.json‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json‎
Lines changed: 2 additions & 12 deletions b/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_client.json‎
Lines changed: 2 additions & 12 deletions
diff --git a/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json‎
Lines changed: 14 additions & 2 deletions b/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/config/config_fed_server.json‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py‎
Lines changed: 77 additions & 0 deletions b/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/app/custom/higgs_data_loader.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json‎
Lines changed: 10 additions & 0 deletions b/‎examples/advanced/xgboost/histogram-based/jobs/base_v2/meta.json‎
Lines changed: 10 additions & 0 deletions
@@ -9,7 +9,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.8"
+    python: "3.10"
 
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
@@ -26,6 +26,6 @@ sphinx:
 python:
   install:
     - method: pip
-      path: .[doc]
+      path: .[dev]
 #  system_packages: true
 
@@ -49,7 +49,7 @@ function clean_docs() {
 }
 
 function build_html_docs() {
-    pip install -e .[doc]
+    pip install -e .[dev]
     sphinx-apidoc --module-first -f -o docs/apidocs/ nvflare "*poc" "*private"
     sphinx-build -b html docs docs/_build
 }
 
@@ -89,7 +89,7 @@ The model will be saved to `test.model.json`.
 ## Results
 Model accuracy can be visualized in tensorboard:
 ```
-tensorboard --logdir /tmp/nvflare/vertical_xgb/simulate_job/tb_events
+tensorboard --logdir /tmp/nvflare/vertical_xgb/server/simulate_job/tb_events
 ```
 
 An example training (pink) and validation (orange) AUC graph from running vertical XGBoost on HIGGS:
 
@@ -62,7 +62,7 @@ def __init__(self, data_split_path, psi_path, id_col, label_owner, train_proport
         self.label_owner = label_owner
         self.train_proportion = train_proportion
 
-    def load_data(self, client_id: str):
+    def load_data(self, client_id: str, training_mode: str = ""):
         client_data_split_path = self.data_split_path.replace("site-x", client_id)
         client_psi_path = self.psi_path.replace("site-x", client_id)
 
 
@@ -1,6 +1,10 @@
-nvflare~=2.4.0rc
+nvflare~=2.5.0rc
 openmined.psi==1.1.1
 pandas
-tensorboard
 torch
-xgboost>=2.0.0
+tensorboard
+# require xgboost 2.2 version, for now need to install a binary build
+# "xgboost>=2.2"
+
+--extra-index-url https://s3-us-west-2.amazonaws.com/xgboost-nightly-builds/list.html?prefix=federated-secure/
+xgboost
@@ -1,8 +1,5 @@
 {
   "format_version": 2,
-  "server": {
-    "heart_beat_timeout": 600
-  },
   "task_data_filters": [],
   "task_result_filters": [],
   "components": [
 
@@ -1,26 +1,16 @@
 {
   "format_version": 2,
-  "num_rounds": 100,
   "executors": [
     {
       "tasks": [
         "config", "start"
       ],
       "executor": {
         "id": "Executor",
-        "path": "nvflare.app_opt.xgboost.histogram_based_v2.executor.FedXGBHistogramExecutor",
+        "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_executor.FedXGBHistogramExecutor",
         "args": {
           "data_loader_id": "dataloader",
-          "metrics_writer_id": "metrics_writer",
-          "early_stopping_rounds": 2,
-          "xgb_params": {
-            "max_depth": 8,
-            "eta": 0.1,
-            "objective": "binary:logistic",
-            "eval_metric": "auc",
-            "tree_method": "hist",
-            "nthread": 16
-          }
+          "metrics_writer_id": "metrics_writer"
         }
       }
     }
 
@@ -15,9 +15,21 @@
   "workflows": [
     {
       "id": "xgb_controller",
-      "path": "nvflare.app_opt.xgboost.histogram_based_v2.controller.XGBFedController",
+      "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_controller.XGBFedController",
       "args": {
-        "num_rounds": "{num_rounds}"
+        "num_rounds": "{num_rounds}",
+        "training_mode": "horizontal",
+        "xgb_params": {
+          "max_depth": 8,
+          "eta": 0.1,
+          "objective": "binary:logistic",
+          "eval_metric": "auc",
+          "tree_method": "hist",
+          "nthread": 16
+        },
+        "xgb_options": {
+          "early_stopping_rounds": 2
+        }
       }
     }
   ]
 
@@ -0,0 +1,77 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+import pandas as pd
+import xgboost as xgb
+
+from nvflare.app_opt.xgboost.data_loader import XGBDataLoader
+
+
+def _read_higgs_with_pandas(data_path, start: int, end: int):
+    data_size = end - start
+    data = pd.read_csv(data_path, header=None, skiprows=start, nrows=data_size)
+    data_num = data.shape[0]
+
+    # split to feature and label
+    x = data.iloc[:, 1:].copy()
+    y = data.iloc[:, 0].copy()
+
+    return x, y, data_num
+
+
+class HIGGSDataLoader(XGBDataLoader):
+    def __init__(self, data_split_filename):
+        """Reads HIGGS dataset and return XGB data matrix.
+
+        Args:
+            data_split_filename: file name to data splits
+        """
+        self.data_split_filename = data_split_filename
+
+    def load_data(self, client_id: str, training_mode: str = ""):
+        with open(self.data_split_filename, "r") as file:
+            data_split = json.load(file)
+
+        data_path = data_split["data_path"]
+        data_index = data_split["data_index"]
+
+        # check if site_id and "valid" in the mapping dict
+        if client_id not in data_index.keys():
+            raise ValueError(
+                f"Data does not contain Client {client_id} split",
+            )
+
+        if "valid" not in data_index.keys():
+            raise ValueError(
+                "Data does not contain Validation split",
+            )
+
+        site_index = data_index[client_id]
+        valid_index = data_index["valid"]
+
+        # training
+        x_train, y_train, total_train_data_num = _read_higgs_with_pandas(
+            data_path=data_path, start=site_index["start"], end=site_index["end"]
+        )
+        dmat_train = xgb.DMatrix(x_train, label=y_train)
+
+        # validation
+        x_valid, y_valid, total_valid_data_num = _read_higgs_with_pandas(
+            data_path=data_path, start=valid_index["start"], end=valid_index["end"]
+        )
+        dmat_valid = xgb.DMatrix(x_valid, label=y_valid)
+
+        return dmat_train, dmat_valid
@@ -0,0 +1,10 @@
+{
+  "name": "xgboost_histogram_based_v2",
+  "resource_spec": {},
+  "deploy_map": {
+    "app": [
+      "@ALL"
+    ]
+  },
+  "min_clients": 2
+}
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ function clean_docs() {`
`49`	`49`	`}`
`50`	`50`
`51`	`51`	`function build_html_docs() {`
`52`		`- pip install -e .[doc]`
	`52`	`+ pip install -e .[dev]`
`53`	`53`	`sphinx-apidoc --module-first -f -o docs/apidocs/ nvflare "poc" "private"`
`54`	`54`	`sphinx-build -b html docs docs/_build`
`55`	`55`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1,8 +1,5 @@`
`1`	`1`	`{`
`2`	`2`	`"format_version": 2,`
`3`		`- "server": {`
`4`		`- "heart_beat_timeout": 600`
`5`		`- },`
`6`	`3`	`"task_data_filters": [],`
`7`	`4`	`"task_result_filters": [],`
`8`	`5`	`"components": [`
Original file line number	Diff line number	Diff line change
`@@ -1,26 +1,16 @@`
`1`	`1`	`{`
`2`	`2`	`"format_version": 2,`
`3`		`- "num_rounds": 100,`
`4`	`3`	`"executors": [`
`5`	`4`	`{`
`6`	`5`	`"tasks": [`
`7`	`6`	`"config", "start"`
`8`	`7`	`],`
`9`	`8`	`"executor": {`
`10`	`9`	`"id": "Executor",`
`11`		`- "path": "nvflare.app_opt.xgboost.histogram_based_v2.executor.FedXGBHistogramExecutor",`
	`10`	`+ "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_executor.FedXGBHistogramExecutor",`
`12`	`11`	`"args": {`
`13`	`12`	`"data_loader_id": "dataloader",`
`14`		`- "metrics_writer_id": "metrics_writer",`
`15`		`- "early_stopping_rounds": 2,`
`16`		`- "xgb_params": {`
`17`		`- "max_depth": 8,`
`18`		`- "eta": 0.1,`
`19`		`- "objective": "binary:logistic",`
`20`		`- "eval_metric": "auc",`
`21`		`- "tree_method": "hist",`
`22`		`- "nthread": 16`
`23`		`- }`
	`13`	`+ "metrics_writer_id": "metrics_writer"`
`24`	`14`	`}`
`25`	`15`	`}`
`26`	`16`	`}`
Original file line number	Diff line number	Diff line change
`@@ -15,9 +15,21 @@`
`15`	`15`	`"workflows": [`
`16`	`16`	`{`
`17`	`17`	`"id": "xgb_controller",`
`18`		`- "path": "nvflare.app_opt.xgboost.histogram_based_v2.controller.XGBFedController",`
	`18`	`+ "path": "nvflare.app_opt.xgboost.histogram_based_v2.fed_controller.XGBFedController",`
`19`	`19`	`"args": {`
`20`		`- "num_rounds": "{num_rounds}"`
	`20`	`+ "num_rounds": "{num_rounds}",`
	`21`	`+ "training_mode": "horizontal",`
	`22`	`+ "xgb_params": {`
	`23`	`+ "max_depth": 8,`
	`24`	`+ "eta": 0.1,`
	`25`	`+ "objective": "binary:logistic",`
	`26`	`+ "eval_metric": "auc",`
	`27`	`+ "tree_method": "hist",`
	`28`	`+ "nthread": 16`
	`29`	`+ },`
	`30`	`+ "xgb_options": {`
	`31`	`+ "early_stopping_rounds": 2`
	`32`	`+ }`
`21`	`33`	`}`
`22`	`34`	`}`
`23`	`35`	`]`