Skip to content

Commit 6f4e766

Browse files
committed
add support for non try branches in a time series
1 parent fa0b7f0 commit 6f4e766

File tree

1 file changed

+110
-37
lines changed

1 file changed

+110
-37
lines changed
Lines changed: 110 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,74 @@
1-
from treeherder.model.models import Group, GroupStatus, Job
1+
from treeherder.model.models import Group, GroupStatus, Job, Push
2+
3+
# TODO: test
4+
# - p1:t1: fail on g1, p2:t1: pass on g1 - result: p1:t1: intermittent
5+
# - p1:t1: fail on leak (all groups pass), p2:t1: pass - result p1:t1: still default
6+
# - p1:t1: fail on g1, p1:t1.2: pass on g1 - result p1:t1: intermittent
7+
# - p1:t1: fail on g1, p1:t1-cf: pass on g1 - result p1:t1: intermittent
8+
# - p1:t1: fail on g1, p1:t1-cf: fail on g1 - result p1:t1: still default
29

310

411
def check_and_mark_intermittent(job_id):
512
current_job = Job.objects.get(id=job_id)
613

7-
if current_job.job_type.name.endswith("-cf"):
8-
jtname = [current_job.job_type.name, current_job.job_type.name.strip("-cf")]
9-
else:
10-
jtname = [current_job.job_type.name, f"{current_job.job_type.name}-cf"]
11-
12-
all_groups = Group.objects.filter(
13-
job_logs__job__push__id=current_job.push.id,
14-
job_logs__job__job_type__name__in=jtname,
15-
group_result__status__in=[GroupStatus.OK, GroupStatus.ERROR],
16-
).values(
17-
"name",
18-
"job_logs__job__id",
19-
"group_result__status",
14+
jtname = current_job.job_type.name.strip("-cf")
15+
ids = [current_job.push.id]
16+
17+
try:
18+
_ = int(jtname.split("-")[-1])
19+
jtname = "-".join(jtname.split("-")[:-1])
20+
except ValueError:
21+
pass
22+
23+
# if we are not on try, look at recent history
24+
if current_job.repository.id != 4:
25+
# get list of pushes
26+
ids = Push.objects.filter(repository__id=current_job.repository.id).values("id")[:20]
27+
28+
all_groups = (
29+
Group.objects.filter(
30+
job_logs__job__push__id__in=ids,
31+
job_logs__job__push__repository__id=current_job.repository.id,
32+
job_logs__job__job_type__name__startswith=jtname,
33+
job_logs__job__failure_classification__id__in=[
34+
1,
35+
4,
36+
6,
37+
], # not classified, intermittent, new_failure; TODO: consider 7 == autoclassified
38+
job_logs__job__result__in=[
39+
"success",
40+
"testfailed",
41+
], # primarily ignore retry/usercancel
42+
group_result__status__in=[GroupStatus.OK, GroupStatus.ERROR],
43+
)
44+
.values(
45+
"name",
46+
"job_logs__job__id",
47+
"group_result__status",
48+
"job_logs__job__job_type__name",
49+
"job_logs__job__push__id",
50+
)
51+
.order_by("-job_logs__job__push__time")
2052
)
2153

22-
groups = {}
23-
jobs = {}
54+
mappings = {}
2455
for item in all_groups:
56+
jobname = item["job_logs__job__job_type__name"].strip("-cf")
57+
try:
58+
int(jobname.split("-")[-1])
59+
jobname = "-".join(jobname.split("-")[:-1])
60+
except ValueError:
61+
pass
62+
63+
if jobname != jtname:
64+
# we have a variant
65+
continue
66+
67+
if item["job_logs__job__push__id"] not in mappings:
68+
mappings[item["job_logs__job__push__id"]] = {"groups": {}, "jobs": {}}
69+
groups = mappings[item["job_logs__job__push__id"]]["groups"]
70+
jobs = mappings[item["job_logs__job__push__id"]]["jobs"]
71+
2572
if item["name"] not in groups:
2673
groups[item["name"]] = {}
2774
if item["job_logs__job__id"] not in groups[item["name"]]:
@@ -32,24 +79,50 @@ def check_and_mark_intermittent(job_id):
3279
if item["name"] not in jobs[item["job_logs__job__id"]]:
3380
jobs[item["job_logs__job__id"]][item["name"]] = item["group_result__status"]
3481

35-
if len(jobs.keys()) <= 1:
36-
# zero jobs == no groups reported (i.e. marionette)
37-
# 1 job == no additional data
38-
return
39-
40-
for job in jobs.keys():
41-
# for each similar task.label, ensure all groups have >=50% pass rate, if so flag failing
42-
# job as intermittent. for non test failures, ensure all groups are green
43-
all_green = True
44-
failed_groups = [g for g in jobs[job] if int(jobs[job][g]) == GroupStatus.ERROR]
45-
for group in failed_groups:
46-
all_status = [groups[group][j] for j in groups[group]]
47-
pass_rate = len([s for s in all_status if s == GroupStatus.OK]) / len(all_status)
48-
if pass_rate < 0.5:
49-
all_green = False
50-
break
51-
52-
target_job = Job.objects.filter(id=job)
53-
54-
if all_green and target_job[0].result != "success":
55-
target_job.update(failure_classification_id=4)
82+
# multi push support - want to look back in history now that we have "future" data
83+
# a previous job can only change if ALL failing groups have future passing data
84+
#
85+
# current job has new data, lets find all groups that changed status as a result of new data
86+
# TODO: handle new regressions - historical rate might be broken, then we need to wait for more future data
87+
changed_groups = {}
88+
for group in mappings[current_job.push.id]["groups"]:
89+
all_data = []
90+
for id in mappings.keys():
91+
all_data.extend(
92+
[mappings[id]["groups"][group][j] for j in mappings[id]["groups"].get(group, {})]
93+
)
94+
95+
# if new data changes results, update
96+
pass_rate = len([s for s in all_data if s == GroupStatus.OK]) / len(all_data)
97+
if pass_rate >= 0.5:
98+
changed_groups[group] = True
99+
100+
# all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
101+
for id in mappings.keys():
102+
if id == current_job.push.id and len(ids) > 1:
103+
continue
104+
105+
for job in mappings[id]["jobs"]:
106+
if job == job_id:
107+
# current job will need future data to turn green
108+
continue
109+
110+
all_green = True
111+
for group in mappings[id]["jobs"][job]:
112+
# if group changed to failing and group originally failed
113+
if (
114+
mappings[id]["groups"][group][job] == GroupStatus.ERROR
115+
and group not in changed_groups
116+
):
117+
all_green = False
118+
119+
if all_green:
120+
target_job = Job.objects.filter(id=job)
121+
122+
# edge case is all groups originally pass and then shutdown leaks cause 'testfailed'.
123+
# also we ignore infra/leaks that don't report group failures in errorsummary files
124+
if (
125+
target_job[0].result != "success"
126+
and target_job[0].failure_classification_id != 4
127+
):
128+
target_job.update(failure_classification_id=4)

0 commit comments

Comments
 (0)