1
- from treeherder .model .models import Group , GroupStatus , Job
1
+ from treeherder .model .models import Group , GroupStatus , Job , Push
2
+
3
+ # TODO: test
4
+ # - p1:t1: fail on g1, p2:t1: pass on g1 - result: p1:t1: intermittent
5
+ # - p1:t1: fail on leak (all groups pass), p2:t1: pass - result p1:t1: still default
6
+ # - p1:t1: fail on g1, p1:t1.2: pass on g1 - result p1:t1: intermittent
7
+ # - p1:t1: fail on g1, p1:t1-cf: pass on g1 - result p1:t1: intermittent
8
+ # - p1:t1: fail on g1, p1:t1-cf: fail on g1 - result p1:t1: still default
2
9
3
10
4
11
def check_and_mark_intermittent (job_id ):
5
12
current_job = Job .objects .get (id = job_id )
6
13
7
- if current_job .job_type .name .endswith ("-cf" ):
8
- jtname = [current_job .job_type .name , current_job .job_type .name .strip ("-cf" )]
9
- else :
10
- jtname = [current_job .job_type .name , f"{ current_job .job_type .name } -cf" ]
11
-
12
- all_groups = Group .objects .filter (
13
- job_logs__job__push__id = current_job .push .id ,
14
- job_logs__job__job_type__name__in = jtname ,
15
- group_result__status__in = [GroupStatus .OK , GroupStatus .ERROR ],
16
- ).values (
17
- "name" ,
18
- "job_logs__job__id" ,
19
- "group_result__status" ,
14
+ jtname = current_job .job_type .name .strip ("-cf" )
15
+ ids = [current_job .push .id ]
16
+
17
+ try :
18
+ _ = int (jtname .split ("-" )[- 1 ])
19
+ jtname = "-" .join (jtname .split ("-" )[:- 1 ])
20
+ except ValueError :
21
+ pass
22
+
23
+ # if we are not on try, look at recent history
24
+ if current_job .repository .id != 4 :
25
+ # get list of pushes
26
+ ids = Push .objects .filter (repository__id = current_job .repository .id ).values ("id" )[:20 ]
27
+
28
+ all_groups = (
29
+ Group .objects .filter (
30
+ job_logs__job__push__id__in = ids ,
31
+ job_logs__job__push__repository__id = current_job .repository .id ,
32
+ job_logs__job__job_type__name__startswith = jtname ,
33
+ job_logs__job__failure_classification__id__in = [
34
+ 1 ,
35
+ 4 ,
36
+ 6 ,
37
+ ], # not classified, intermittent, new_failure; TODO: consider 7 == autoclassified
38
+ job_logs__job__result__in = [
39
+ "success" ,
40
+ "testfailed" ,
41
+ ], # primarily ignore retry/usercancel
42
+ group_result__status__in = [GroupStatus .OK , GroupStatus .ERROR ],
43
+ )
44
+ .values (
45
+ "name" ,
46
+ "job_logs__job__id" ,
47
+ "group_result__status" ,
48
+ "job_logs__job__job_type__name" ,
49
+ "job_logs__job__push__id" ,
50
+ )
51
+ .order_by ("-job_logs__job__push__time" )
20
52
)
21
53
22
- groups = {}
23
- jobs = {}
54
+ mappings = {}
24
55
for item in all_groups :
56
+ jobname = item ["job_logs__job__job_type__name" ].strip ("-cf" )
57
+ try :
58
+ int (jobname .split ("-" )[- 1 ])
59
+ jobname = "-" .join (jobname .split ("-" )[:- 1 ])
60
+ except ValueError :
61
+ pass
62
+
63
+ if jobname != jtname :
64
+ # we have a variant
65
+ continue
66
+
67
+ if item ["job_logs__job__push__id" ] not in mappings :
68
+ mappings [item ["job_logs__job__push__id" ]] = {"groups" : {}, "jobs" : {}}
69
+ groups = mappings [item ["job_logs__job__push__id" ]]["groups" ]
70
+ jobs = mappings [item ["job_logs__job__push__id" ]]["jobs" ]
71
+
25
72
if item ["name" ] not in groups :
26
73
groups [item ["name" ]] = {}
27
74
if item ["job_logs__job__id" ] not in groups [item ["name" ]]:
@@ -32,24 +79,50 @@ def check_and_mark_intermittent(job_id):
32
79
if item ["name" ] not in jobs [item ["job_logs__job__id" ]]:
33
80
jobs [item ["job_logs__job__id" ]][item ["name" ]] = item ["group_result__status" ]
34
81
35
- if len (jobs .keys ()) <= 1 :
36
- # zero jobs == no groups reported (i.e. marionette)
37
- # 1 job == no additional data
38
- return
39
-
40
- for job in jobs .keys ():
41
- # for each similar task.label, ensure all groups have >=50% pass rate, if so flag failing
42
- # job as intermittent. for non test failures, ensure all groups are green
43
- all_green = True
44
- failed_groups = [g for g in jobs [job ] if int (jobs [job ][g ]) == GroupStatus .ERROR ]
45
- for group in failed_groups :
46
- all_status = [groups [group ][j ] for j in groups [group ]]
47
- pass_rate = len ([s for s in all_status if s == GroupStatus .OK ]) / len (all_status )
48
- if pass_rate < 0.5 :
49
- all_green = False
50
- break
51
-
52
- target_job = Job .objects .filter (id = job )
53
-
54
- if all_green and target_job [0 ].result != "success" :
55
- target_job .update (failure_classification_id = 4 )
82
+ # multi push support - want to look back in history now that we have "future" data
83
+ # a previous job can only change if ALL failing groups have future passing data
84
+ #
85
+ # current job has new data, lets find all groups that changed status as a result of new data
86
+ # TODO: handle new regressions - historical rate might be broken, then we need to wait for more future data
87
+ changed_groups = {}
88
+ for group in mappings [current_job .push .id ]["groups" ]:
89
+ all_data = []
90
+ for id in mappings .keys ():
91
+ all_data .extend (
92
+ [mappings [id ]["groups" ][group ][j ] for j in mappings [id ]["groups" ].get (group , {})]
93
+ )
94
+
95
+ # if new data changes results, update
96
+ pass_rate = len ([s for s in all_data if s == GroupStatus .OK ]) / len (all_data )
97
+ if pass_rate >= 0.5 :
98
+ changed_groups [group ] = True
99
+
100
+ # all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
101
+ for id in mappings .keys ():
102
+ if id == current_job .push .id and len (ids ) > 1 :
103
+ continue
104
+
105
+ for job in mappings [id ]["jobs" ]:
106
+ if job == job_id :
107
+ # current job will need future data to turn green
108
+ continue
109
+
110
+ all_green = True
111
+ for group in mappings [id ]["jobs" ][job ]:
112
+ # if group changed to failing and group originally failed
113
+ if (
114
+ mappings [id ]["groups" ][group ][job ] == GroupStatus .ERROR
115
+ and group not in changed_groups
116
+ ):
117
+ all_green = False
118
+
119
+ if all_green :
120
+ target_job = Job .objects .filter (id = job )
121
+
122
+ # edge case is all groups originally pass and then shutdown leaks cause 'testfailed'.
123
+ # also we ignore infra/leaks that don't report group failures in errorsummary files
124
+ if (
125
+ target_job [0 ].result != "success"
126
+ and target_job [0 ].failure_classification_id != 4
127
+ ):
128
+ target_job .update (failure_classification_id = 4 )
0 commit comments