Skip to content

Commit 1659bab

Browse files
Profiler Teamcopybara-github
authored andcommitted
Integrate Smart Suggestion into Xprof
PiperOrigin-RevId: 783787859
1 parent 4b96c4a commit 1659bab

11 files changed

+277
-23
lines changed

xprof/convert/BUILD

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,6 @@ cc_library(
862862
":hlo_to_tools_data",
863863
":multi_xplanes_to_op_stats",
864864
":multi_xspace_to_inference_stats",
865-
":op_stats_processor",
866865
":op_stats_to_hlo_stats",
867866
":op_stats_to_input_pipeline_analysis",
868867
":op_stats_to_op_profile",
@@ -899,6 +898,11 @@ cc_library(
899898
"@org_xprof//plugin/xprof/protobuf:roofline_model_proto_cc",
900899
"@org_xprof//plugin/xprof/protobuf:tf_data_stats_proto_cc",
901900
"@org_xprof//plugin/xprof/protobuf:tf_stats_proto_cc",
901+
"@org_xprof//xprof/convert/smart_suggestion:all_rules",
902+
"@org_xprof//xprof/convert/smart_suggestion:signal_provider",
903+
"@org_xprof//xprof/convert/smart_suggestion:smart_suggestion_engine",
904+
"@org_xprof//xprof/convert/smart_suggestion:smart_suggestion_rule_factory",
905+
"@org_xprof//xprof/convert/smart_suggestion:tool_data_provider_impl",
902906
"@org_xprof//xprof/convert/trace_viewer:trace_events",
903907
"@org_xprof//xprof/convert/trace_viewer:trace_events_to_json",
904908
"@org_xprof//xprof/convert/trace_viewer:trace_options",

xprof/convert/smart_suggestion/BUILD

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# load("//third_party/bazel_rules/rules_cc/cc:cc_library.bzl", "cc_library")
21
# load("//third_party/bazel_rules/rules_cc/cc:cc_test.bzl", "cc_test")
32

43
package(
@@ -11,11 +10,11 @@ cc_library(
1110
hdrs = ["signal_provider.h"],
1211
deps = [
1312
":tool_data_provider",
14-
"//util/task:status",
1513
"@com_google_absl//absl/status:statusor",
1614
"@org_xprof//plugin/xprof/protobuf:input_pipeline_proto_cc",
1715
"@org_xprof//plugin/xprof/protobuf:overview_page_proto_cc",
1816
"@org_xprof//plugin/xprof/protobuf:tpu_input_pipeline_proto_cc",
17+
"@xla//xla/tsl/platform:statusor",
1918
],
2019
)
2120

@@ -44,7 +43,6 @@ cc_library(
4443
hdrs = ["tool_data_provider_impl.h"],
4544
deps = [
4645
":tool_data_provider",
47-
"//util/task:status",
4846
"@com_google_absl//absl/status:statusor",
4947
"@org_xprof//plugin/xprof/protobuf:input_pipeline_proto_cc",
5048
"@org_xprof//plugin/xprof/protobuf:op_stats_proto_cc",
@@ -53,6 +51,7 @@ cc_library(
5351
"@org_xprof//xprof/convert:op_stats_to_input_pipeline_analysis",
5452
"@org_xprof//xprof/convert:op_stats_to_overview_page",
5553
"@org_xprof//xprof/convert:repository",
54+
"@xla//xla/tsl/platform:errors",
5655
],
5756
)
5857

@@ -74,11 +73,11 @@ cc_library(
7473
":input_bound_rule",
7574
":signal_provider",
7675
":smart_suggestion_rule",
77-
"//util/task:status",
7876
"@com_google_absl//absl/status:statusor",
7977
"@com_google_absl//absl/strings",
8078
"@com_google_absl//absl/strings:str_format",
8179
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
80+
"@xla//xla/tsl/platform:statusor",
8281
],
8382
)
8483

@@ -89,11 +88,44 @@ cc_library(
8988
":input_bound_rule",
9089
":signal_provider",
9190
":smart_suggestion_rule",
92-
"//util/task:status",
9391
"@com_google_absl//absl/status:statusor",
9492
"@com_google_absl//absl/strings",
9593
"@com_google_absl//absl/strings:str_format",
9694
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
95+
"@xla//xla/tsl/platform:statusor",
96+
],
97+
)
98+
99+
cc_library(
100+
name = "smart_suggestion_rule_factory",
101+
hdrs = ["smart_suggestion_rule_factory.h"],
102+
deps = [
103+
":smart_suggestion_rule",
104+
],
105+
)
106+
107+
cc_library(
108+
name = "all_rules",
109+
hdrs = ["all_rules.h"],
110+
deps = [
111+
":data_transfer_bound_rule",
112+
":host_processing_bound_rule",
113+
":input_bound_rule",
114+
":smart_suggestion_rule_factory",
115+
],
116+
)
117+
118+
cc_library(
119+
name = "smart_suggestion_engine",
120+
srcs = ["smart_suggestion_engine.cc"],
121+
hdrs = ["smart_suggestion_engine.h"],
122+
deps = [
123+
":signal_provider",
124+
":smart_suggestion_rule",
125+
":smart_suggestion_rule_factory",
126+
"@com_google_absl//absl/status:statusor",
127+
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
128+
"@xla//xla/tsl/platform:statusor",
97129
],
98130
)
99131

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#ifndef THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_ALL_RULES_H_
17+
#define THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_ALL_RULES_H_
18+
19+
#include "xprof/convert/smart_suggestion/data_transfer_bound_rule.h"
20+
#include "xprof/convert/smart_suggestion/host_processing_bound_rule.h"
21+
#include "xprof/convert/smart_suggestion/input_bound_rule.h"
22+
#include "xprof/convert/smart_suggestion/smart_suggestion_rule_factory.h"
23+
24+
namespace tensorflow {
25+
namespace profiler {
26+
27+
// Registers all smart suggestion rules.
28+
inline void RegisterAllRules(SmartSuggestionRuleFactory* f) {
29+
// go/keep-sorted start
30+
f->Register<DataTransferBoundRule>();
31+
f->Register<HostProcessingBoundRule>();
32+
f->Register<InputBoundRule>();
33+
// go/keep-sorted end
34+
}
35+
36+
} // namespace profiler
37+
} // namespace tensorflow
38+
39+
#endif // THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_ALL_RULES_H_

xprof/convert/smart_suggestion/data_transfer_bound_rule.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ limitations under the License.
2222
#include "absl/status/statusor.h"
2323
#include "absl/strings/str_cat.h"
2424
#include "absl/strings/str_format.h"
25+
#include "xla/tsl/platform/statusor.h"
2526
#include "xprof/convert/smart_suggestion/input_bound_rule.h"
2627
#include "xprof/convert/smart_suggestion/signal_provider.h"
2728
#include "xprof/convert/smart_suggestion/smart_suggestion_rule.h"
2829
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
29-
#include "util/task/status_macros.h"
3030

3131
namespace tensorflow {
3232
namespace profiler {
@@ -59,7 +59,7 @@ class DataTransferBoundRule : public SmartSuggestionRule {
5959
SmartSuggestion suggestion;
6060
suggestion.set_rule_name("DataTransferBoundRule");
6161

62-
ASSIGN_OR_RETURN(double enqueue_percent_of_input,
62+
TF_ASSIGN_OR_RETURN(double enqueue_percent_of_input,
6363
signal_provider.GetEnqueuePercentOfInput());
6464

6565
std::string suggestion_text = absl::StrCat(

xprof/convert/smart_suggestion/host_processing_bound_rule.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ limitations under the License.
2222
#include "absl/status/statusor.h"
2323
#include "absl/strings/str_cat.h"
2424
#include "absl/strings/str_format.h"
25+
#include "xla/tsl/platform/statusor.h"
2526
#include "xprof/convert/smart_suggestion/input_bound_rule.h"
2627
#include "xprof/convert/smart_suggestion/signal_provider.h"
2728
#include "xprof/convert/smart_suggestion/smart_suggestion_rule.h"
2829
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
29-
#include "util/task/status_macros.h"
3030

3131
namespace tensorflow {
3232
namespace profiler {
@@ -61,7 +61,7 @@ class HostProcessingBoundRule : public SmartSuggestionRule {
6161
SmartSuggestion suggestion;
6262
suggestion.set_rule_name("HostProcessingBoundRule");
6363

64-
ASSIGN_OR_RETURN(double non_enqueue_percent_of_input,
64+
TF_ASSIGN_OR_RETURN(double non_enqueue_percent_of_input,
6565
signal_provider.GetNonEnqueuePercentOfInput());
6666

6767
std::string suggestion_text = absl::StrCat(

xprof/convert/smart_suggestion/signal_provider.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ limitations under the License.
2020
#include <utility>
2121

2222
#include "absl/status/statusor.h"
23+
#include "xla/tsl/platform/statusor.h"
2324
#include "xprof/convert/smart_suggestion/tool_data_provider.h"
2425
#include "plugin/xprof/protobuf/input_pipeline.pb.h"
2526
#include "plugin/xprof/protobuf/overview_page.pb.h"
2627
#include "plugin/xprof/protobuf/tpu_input_pipeline.pb.h"
27-
#include "util/task/status_macros.h"
2828
namespace tensorflow {
2929
namespace profiler {
3030

@@ -36,23 +36,23 @@ class SignalProvider {
3636

3737
// Average HBM utilization from overview page.
3838
absl::StatusOr<double> GetHbmUtilization() const {
39-
ASSIGN_OR_RETURN(const auto* overview_page,
39+
TF_ASSIGN_OR_RETURN(const auto* overview_page,
4040
tool_data_provider_->GetOverviewPage());
4141
return overview_page->analysis()
4242
.memory_bw_utilization_relative_to_hw_limit_percent();
4343
}
4444

4545
// Average MXU utilization from overview page.
4646
absl::StatusOr<double> GetMxuUtilization() const {
47-
ASSIGN_OR_RETURN(const auto* overview_page,
47+
TF_ASSIGN_OR_RETURN(const auto* overview_page,
4848
tool_data_provider_->GetOverviewPage());
4949
return overview_page->analysis().mxu_utilization_percent();
5050
}
5151

5252
// Returns the input percentage of step time from input pipeline analysis.
5353
// Used for the input bound rule.
5454
absl::StatusOr<double> GetInputPercentOfStepTime() const {
55-
ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
55+
TF_ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
5656
tool_data_provider_->GetInputPipelineAnalysisResult());
5757
return input_pipeline_analysis->input_percent();
5858
}
@@ -61,7 +61,7 @@ class SignalProvider {
6161
// microseconds from host-side input analysis.
6262
// Used for the input bound rule.
6363
absl::StatusOr<double> GetEnqueueUs() const {
64-
ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
64+
TF_ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
6565
tool_data_provider_->GetInputPipelineAnalysisResult());
6666
return input_pipeline_analysis->input_time_breakdown().enqueue_us();
6767
}
@@ -70,7 +70,7 @@ class SignalProvider {
7070
// from host-side input analysis.
7171
// Used for the input bound rule.
7272
absl::StatusOr<double> GetNonEnqueueUs() const {
73-
ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
73+
TF_ASSIGN_OR_RETURN(const auto* input_pipeline_analysis,
7474
tool_data_provider_->GetInputPipelineAnalysisResult());
7575
const auto& breakdown = input_pipeline_analysis->input_time_breakdown();
7676
return breakdown.demanded_file_read_us() +
@@ -80,8 +80,8 @@ class SignalProvider {
8080

8181
// Returns the percentage of input time that is due to enqueuing data.
8282
absl::StatusOr<double> GetEnqueuePercentOfInput() const {
83-
ASSIGN_OR_RETURN(double enqueue_us, GetEnqueueUs());
84-
ASSIGN_OR_RETURN(double non_enqueue_us, GetNonEnqueueUs());
83+
TF_ASSIGN_OR_RETURN(double enqueue_us, GetEnqueueUs());
84+
TF_ASSIGN_OR_RETURN(double non_enqueue_us, GetNonEnqueueUs());
8585
double total_input_time_us = enqueue_us + non_enqueue_us;
8686
if (total_input_time_us == 0) {
8787
return 0.0;
@@ -92,8 +92,8 @@ class SignalProvider {
9292
// Returns the percentage of input time that is due to non-enqueuing
9393
// activities.
9494
absl::StatusOr<double> GetNonEnqueuePercentOfInput() const {
95-
ASSIGN_OR_RETURN(double non_enqueue_us, GetNonEnqueueUs());
96-
ASSIGN_OR_RETURN(double enqueue_us, GetEnqueueUs());
95+
TF_ASSIGN_OR_RETURN(double non_enqueue_us, GetNonEnqueueUs());
96+
TF_ASSIGN_OR_RETURN(double enqueue_us, GetEnqueueUs());
9797
double total_input_time_us = non_enqueue_us + enqueue_us;
9898
if (total_input_time_us == 0) {
9999
return 0.0;
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
#include "xprof/convert/smart_suggestion/smart_suggestion_engine.h"
16+
17+
#include <optional>
18+
19+
#include "absl/status/statusor.h"
20+
#include "xla/tsl/platform/statusor.h"
21+
#include "xprof/convert/smart_suggestion/signal_provider.h"
22+
#include "xprof/convert/smart_suggestion/smart_suggestion_rule.h"
23+
#include "xprof/convert/smart_suggestion/smart_suggestion_rule_factory.h"
24+
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
25+
26+
namespace tensorflow {
27+
namespace profiler {
28+
29+
absl::StatusOr<SmartSuggestionReport> SmartSuggestionEngine::Run(
30+
const SignalProvider& signal_provider,
31+
const SmartSuggestionRuleFactory& rule_factory) const {
32+
SmartSuggestionReport report;
33+
34+
const auto& rules = rule_factory.CreateAllRules();
35+
for (const auto& rule : rules) {
36+
TF_ASSIGN_OR_RETURN(std::optional<SmartSuggestion> suggestion,
37+
rule->Apply(signal_provider));
38+
if (suggestion.has_value()) {
39+
*report.add_suggestions() = *suggestion;
40+
}
41+
}
42+
return report;
43+
}
44+
45+
} // namespace profiler
46+
} // namespace tensorflow
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
==============================================================================*/
15+
16+
#ifndef THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SMART_SUGGESTION_ENGINE_H_
17+
#define THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SMART_SUGGESTION_ENGINE_H_
18+
19+
#include "absl/status/statusor.h"
20+
#include "xprof/convert/smart_suggestion/signal_provider.h"
21+
#include "xprof/convert/smart_suggestion/smart_suggestion_rule_factory.h"
22+
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"
23+
24+
namespace tensorflow {
25+
namespace profiler {
26+
27+
// Engine to generate smart suggestions.
28+
class SmartSuggestionEngine {
29+
public:
30+
explicit SmartSuggestionEngine() = default;
31+
32+
// Generates smart suggestions based on the provided signal provider and rule
33+
// factory.
34+
absl::StatusOr<SmartSuggestionReport> Run(
35+
const SignalProvider& signal_provider,
36+
const SmartSuggestionRuleFactory& rule_factory) const;
37+
};
38+
39+
} // namespace profiler
40+
} // namespace tensorflow
41+
42+
#endif // THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_SMART_SUGGESTION_ENGINE_H_

0 commit comments

Comments
 (0)