1111# See the License for the specific language governing permissions and
1212# limitations under the License.
1313
14+ import json
1415from abc import ABCMeta
1516from datetime import datetime
1617from typing import List
2021
2122
2223class DiagnosisData (metaclass = ABCMeta ):
24+ """
25+ Basic definition of diagnosis data.
26+
27+ Args:
28+ timestamp (datetime): Timestamp of diagnosis data.
29+ data_type (str): Type of metric. Defaults to "GENERIC".
30+ data_content (str): Content of the metric. Defaults to "".
31+ node_id (int): Node ID. Defaults to -1.
32+ node_type (str): Node type. Defaults to "".
33+ node_rank (int): Node rank. Defaults to -1.
34+ """
35+
2336 def __init__ (
2437 self ,
2538 timestamp : int = 0 ,
2639 data_type : str = DiagnosisDataType .GENERIC ,
2740 data_content : str = "" ,
41+ node_id : int = - 1 ,
42+ node_type : str = "" ,
43+ node_rank : int = - 1 ,
2844 ):
2945 if timestamp == 0 :
3046 self ._timestamp = int (round (datetime .now ().timestamp ()))
3147 else :
3248 self ._timestamp = timestamp
3349 self ._data_type = data_type
3450 self ._data_content = data_content
51+ self ._node_id = node_id
52+ self ._node_type = node_type
53+ self ._node_rank = node_rank
3554
3655 @property
3756 def data_type (self ) -> str :
@@ -45,33 +64,6 @@ def timestamp(self) -> int:
4564 def data_content (self ) -> str :
4665 return self ._data_content
4766
48-
49- class WorkerDiagnosisData (DiagnosisData ):
50- def __init__ (
51- self ,
52- timestamp : int = 0 ,
53- data_type : str = DiagnosisDataType .GENERIC ,
54- data_content : str = "" ,
55- node_id : int = - 1 ,
56- node_type : str = "" ,
57- node_rank : int = - 1 ,
58- ):
59- """
60- General metric
61-
62- Args:
63- data_type (str): Type of metric. Defaults to "GENERIC".
64- data_content (str): Content of the metric. Defaults to "".
65- node_id (int): Node ID. Defaults to -1.
66- node_type (str): Node type. Defaults to "".
67- node_rank (int): Node rank. Defaults to -1.
68- """
69-
70- super ().__init__ (timestamp , data_type , data_content )
71- self ._node_id = node_id
72- self ._node_type = node_type
73- self ._node_rank = node_rank
74-
7567 @property
7668 def node_id (self ):
7769 return self ._node_id
@@ -84,35 +76,47 @@ def node_type(self):
8476 def node_rank (self ):
8577 return self ._node_rank
8678
79+ def to_json (self ):
80+ data = {k .lstrip ("_" ): v for k , v in self .__dict__ .items ()}
81+ return json .dumps (data )
82+
83+ @classmethod
84+ def from_json (cls , json_data ):
85+ return cls (** json .loads (json_data ))
86+
87+ def is_from_worker (self ):
88+ return self ._node_id != - 1
89+
90+
91+ class WorkerTrainingMetric (DiagnosisData ):
92+ """
93+ Diagnosis data for worker training metric.
94+
95+ Args:
96+ timestamp (datetime): Timestamp of diagnosis data.
97+ data_type (str): Type of metric. Defaults to "GENERIC".
98+ data_content (str): Content of the metric. Defaults to "".
99+ node_id (int): Node ID. Defaults to -1.
100+ node_type (str): Node type. Defaults to "".
101+ node_rank (int): Node rank. Defaults to -1.
102+ is_final_result (bool, optional): Whether the metric is final result.
103+ Defaults to False.
104+ need_report (bool, optional): Whether the metric needs report.
105+ Defaults to False.
106+ """
87107
88- class WorkerTrainingMetric (WorkerDiagnosisData ):
89108 def __init__ (
90109 self ,
91110 timestamp : int = 0 ,
92111 data_type : str = DiagnosisDataType .GENERIC ,
93112 data_content : str = "" ,
94- node_id : int = - 1 ,
95- node_type : str = "" ,
96- node_rank : int = - 1 ,
113+ node_id = env_utils . get_node_id () ,
114+ node_type = env_utils . get_node_type () ,
115+ node_rank = env_utils . get_node_rank () ,
97116 is_final_result = False ,
98117 need_report = False ,
99118 ):
100- """
101- General metric
102-
103- Args:
104- data_type (str): Type of metric. Defaults to "GENERIC".
105- data_content (str): Content of the metric. Defaults to "".
106- is_final_result (bool, optional): Whether the metric is final
107- result or not. Defaults to False.
108- need_report (bool, optional): Whether the metric needs
109- report(to Brain). Defaults to False.
110- node_id (int): Node ID. Defaults to -1.
111- node_type (str): Node type. Defaults to "".
112- node_rank (int): Node rank. Defaults to -1.
113- """
114-
115- super ().__init__ (
119+ super (WorkerTrainingMetric , self ).__init__ (
116120 timestamp , data_type , data_content , node_id , node_type , node_rank
117121 )
118122 self ._is_final_result = is_final_result
@@ -133,8 +137,26 @@ def is_resolvable(self):
133137 return False
134138
135139
136- class TrainingLog (WorkerDiagnosisData ):
137- def __init__ (self , timestamp : int = 0 , logs : List [str ] = None ):
140+ class TrainingLog (DiagnosisData ):
141+ """
142+ Worker's training log.
143+
144+ Args:
145+ timestamp (datetime): Timestamp of diagnosis data.
146+ logs (list): Log content in list format.
147+ node_id (int): Node ID. Defaults to -1.
148+ node_type (str): Node type. Defaults to "".
149+ node_rank (int): Node rank. Defaults to -1.
150+ """
151+
152+ def __init__ (
153+ self ,
154+ timestamp : int = 0 ,
155+ logs : List [str ] = None ,
156+ node_id = env_utils .get_node_id (),
157+ node_type = env_utils .get_node_type (),
158+ node_rank = env_utils .get_node_rank (),
159+ ):
138160 if logs is None :
139161 data_content = ""
140162 else :
@@ -144,9 +166,9 @@ def __init__(self, timestamp: int = 0, logs: List[str] = None):
144166 timestamp ,
145167 DiagnosisDataType .TRAINING_LOG ,
146168 data_content ,
147- env_utils . get_node_id () ,
148- env_utils . get_node_type () ,
149- env_utils . get_node_rank () ,
169+ node_id ,
170+ node_type ,
171+ node_rank ,
150172 )
151173
152174 @property
0 commit comments