From 69716c56ae6bdc693135c2730e37aa6ee62958d2 Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Tue, 13 May 2025 11:48:04 -0700 Subject: [PATCH 1/6] Add post log parser to look for repeated test runs and annotate as intermittent. --- tests/log_parser/test_tasks.py | 20 ++++++++++ treeherder/log_parser/intermittents.py | 55 ++++++++++++++++++++++++++ treeherder/log_parser/tasks.py | 4 +- 3 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 treeherder/log_parser/intermittents.py diff --git a/tests/log_parser/test_tasks.py b/tests/log_parser/test_tasks.py index 3622be324a7..43c863c7931 100644 --- a/tests/log_parser/test_tasks.py +++ b/tests/log_parser/test_tasks.py @@ -228,3 +228,23 @@ def test_bug_suggestion_line_no_stb( ), } ] + + +@pytest.mark.django_db +def test_confirm_failure_intermittent( + failure_classifications, jobs_with_local_log, sample_push, test_repository +): + """ + TODO: write tests for testing intermittents.py handling in the parser. + * test retrigger with 1 similar failure, but both jobs have different failures - both orange + * test 5 jobs, 2 fail, 2 fail for other reasons, 1 pass - all green + * test infra/tooling error + 1x green - both green + * test failure w/3x tests + 3x confirm-failure tasks green - all green + * test failure w/3x tests + 2x confirm-failure tasks green - original task still orange + """ + store_push_data(test_repository, sample_push) + for job in jobs_with_local_log: + job["job"]["result"] = "testfailed" + job["revision"] = sample_push[0]["revision"] + store_job_data(test_repository, jobs_with_local_log) + assert 1 == 0 diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py new file mode 100644 index 00000000000..bb67f9034ac --- /dev/null +++ b/treeherder/log_parser/intermittents.py @@ -0,0 +1,55 @@ +from treeherder.model.models import Group, GroupStatus, Job + + +def check_and_mark_intermittent(job_id): + current_job = Job.objects.get(id=job_id) + + if current_job.job_type.name.endswith("-cf"): + jtname = [current_job.job_type.name, current_job.job_type.name.strip("-cf")] + else: + jtname = [current_job.job_type.name, f"{current_job.job_type.name}-cf"] + + all_groups = Group.objects.filter( + job_logs__job__push__id=current_job.push.id, + job_logs__job__job_type__name__in=jtname, + group_result__status__in=[GroupStatus.OK, GroupStatus.ERROR], + ).values( + "name", + "job_logs__job__id", + "group_result__status", + ) + + groups = {} + jobs = {} + for item in all_groups: + if item["name"] not in groups: + groups[item["name"]] = {} + if item["job_logs__job__id"] not in groups[item["name"]]: + groups[item["name"]][item["job_logs__job__id"]] = item["group_result__status"] + + if item["job_logs__job__id"] not in jobs: + jobs[item["job_logs__job__id"]] = {} + if item["name"] not in jobs[item["job_logs__job__id"]]: + jobs[item["job_logs__job__id"]][item["name"]] = item["group_result__status"] + + if len(jobs.keys()) <= 1: + # zero jobs == no groups reported (i.e. marionette) + # 1 job == no additional data + return + + for job in jobs.keys(): + # for each similar task.label, ensure all groups have >=50% pass rate, if so flag failing + # job as intermittent. for non test failures, ensure all groups are green + all_green = True + failed_groups = [g for g in jobs[job] if int(jobs[job][g]) == GroupStatus.ERROR] + for group in failed_groups: + all_status = [groups[group][j] for j in groups[group]] + pass_rate = len([s for s in all_status if s == GroupStatus.OK]) / len(all_status) + if pass_rate < 0.5: + all_green = False + break + + target_job = Job.objects.filter(id=job) + + if all_green and target_job[0].result != "success": + target_job.update(failure_classification_id=4) diff --git a/treeherder/log_parser/tasks.py b/treeherder/log_parser/tasks.py index 00ff76245ca..38d2519b5b6 100644 --- a/treeherder/log_parser/tasks.py +++ b/treeherder/log_parser/tasks.py @@ -13,7 +13,7 @@ from treeherder.model.models import Job, JobLog from treeherder.workers.task import retryable_task -from . import failureline +from . import failureline, intermittents logger = logging.getLogger(__name__) @@ -81,6 +81,8 @@ def store_failure_lines(job_log): errorsummary file.""" logger.info("Running store_failure_lines for job %s", job_log.job.id) failureline.store_failure_lines(job_log) + logger.info("Running check_and_mark_intermittent for job %s", job_log.job.id) + intermittents.check_and_mark_intermittent(job_log.job.id) def post_log_artifacts(job_log): From e6bd8c7c2ae1a52493dc5d007eff90e2fd3beb90 Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Wed, 21 May 2025 15:42:17 -0700 Subject: [PATCH 2/6] add support for non try branches in a time series --- tests/conftest.py | 31 ++- tests/log_parser/test_store_failure_lines.py | 228 +++++++++++++++++- tests/log_parser/test_tasks.py | 20 -- ...ochitest-browser-chrome_2_errorsummary.log | 35 +++ ...hitest-browser-chrome_cf1_errorsummary.log | 2 + ...t-browser-chrome_cf1_pass_errorsummary.log | 2 + ...hitest-browser-chrome_cf2_errorsummary.log | 2 + ...t-browser-chrome_cf2_pass_errorsummary.log | 2 + ...hitest-browser-chrome_cf3_errorsummary.log | 2 + ...t-browser-chrome_cf3_pass_errorsummary.log | 2 + .../mochitest-browser-chrome_errorsummary.log | 4 +- ...test-browser-chrome_infra_errorsummary.log | 2 + ...itest-browser-chrome_pass_errorsummary.log | 30 +++ treeherder/log_parser/intermittents.py | 153 +++++++++--- treeherder/log_parser/tasks.py | 7 +- 15 files changed, 450 insertions(+), 72 deletions(-) create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_2_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf1_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf1_pass_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf2_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf2_pass_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf3_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_cf3_pass_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_infra_errorsummary.log create mode 100644 tests/sample_data/logs/mochitest-browser-chrome_pass_errorsummary.log diff --git a/tests/conftest.py b/tests/conftest.py index 3b2df4a959f..a0fdf470c85 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -315,7 +315,13 @@ def fixture_create_jobs(test_repository, failure_classifications): def create(jobs): store_job_data(test_repository, jobs) - return [th_models.Job.objects.get(id=i) for i in range(1, len(jobs) + 1)] + retval = [] + for i in range(1, len(jobs) + 1): + try: + retval.append(th_models.Job.objects.get(id=i)) + except Exception: + pass + return retval return create @@ -379,6 +385,29 @@ def task_mock(*args, **kwargs): monkeypatch.setattr(tasks, "parse_logs", task_mock) +@pytest.fixture +def mock_parser(monkeypatch): + from celery import shared_task + + from treeherder.log_parser import failureline + + @shared_task + def fetch_mock(*args, **kwargs): + file_name = args[0].url.split("/")[-1] + try: + data_path = os.path.join(SAMPLE_DATA_PATH, "logs", file_name) + with open(data_path) as f: + fetch_data = f.read() + except Exception: + return + + if not fetch_data: + return + return (json.loads(item.strip("\n")) for item in fetch_data.splitlines()) + + monkeypatch.setattr(failureline, "fetch_log", fetch_mock) + + @pytest.fixture def taskcluster_notify_mock(monkeypatch): mock = MagicMock() diff --git a/tests/log_parser/test_store_failure_lines.py b/tests/log_parser/test_store_failure_lines.py index 329e63bdd88..04e5d68f33d 100644 --- a/tests/log_parser/test_store_failure_lines.py +++ b/tests/log_parser/test_store_failure_lines.py @@ -5,12 +5,9 @@ from django.conf import settings from requests.exceptions import HTTPError -from treeherder.log_parser.failureline import ( - get_group_results, - store_failure_lines, - write_failure_lines, -) -from treeherder.model.models import FailureLine, Group, GroupStatus, JobLog +from treeherder.log_parser.failureline import get_group_results, write_failure_lines +from treeherder.log_parser.tasks import store_failure_lines +from treeherder.model.models import FailureLine, Group, GroupStatus, Job, JobLog from ..sampledata import SampleData @@ -140,7 +137,7 @@ def test_store_error_summary_500(activate_responses, test_repository, test_job): assert log_obj.status == JobLog.FAILED -def test_store_error_summary_duplicate(activate_responses, test_repository, test_job): +def test_store_error_summary_duplicate(activate_responses, test_repository, test_job, mock_parser): log_url = "http://my-log.mozilla.org" log_obj = JobLog.objects.create(job=test_job, name="errorsummary_json", url=log_url) @@ -173,13 +170,13 @@ def test_store_error_summary_group_status(activate_responses, test_repository, t ok_groups = Group.objects.filter(group_result__status=GroupStatus.OK) error_groups = Group.objects.filter(group_result__status=GroupStatus.ERROR) - assert ok_groups.count() == 28 - assert error_groups.count() == 1 + assert ok_groups.count() == 26 + assert error_groups.count() == 3 assert log_obj.groups.count() == 29 assert log_obj.groups.all().first().name == "dom/base/test/browser.ini" assert ok_groups.first().name == "dom/base/test/browser.ini" - assert error_groups.first().name == "toolkit/components/pictureinpicture/tests/browser.ini" + assert error_groups.first().name == "dom/workers/test/browser.ini" def test_group_status_duration(activate_responses, test_repository, test_job): @@ -238,3 +235,214 @@ def test_get_group_results_with_colon(activate_responses, test_repository, test_ assert task_groups[ "toolkit/components/extensions/test/xpcshell/xpcshell-e10s.ini:toolkit/components/extensions/test/xpcshell/xpcshell-common-e10s.ini" ] + + +def mock_full_log_parser(job_logs, mock_parser): + from treeherder.log_parser.tasks import store_failure_lines + + try: + # note: I was using parse_logs, but that is less deterministic + for jl in job_logs: + store_failure_lines(jl) + except: + raise + + +def create_errorsummary_job(base_job, create_jobs, log_filenames): + import copy + import random + + job_defs = [] + urls = [] + for log_filename in log_filenames: + log_path = SampleData().get_log_path(log_filename) + log_url = f"http://my-log.mozilla.org/{log_path}" + + with open(log_path) as log_handler: + responses.add(responses.GET, log_url, body=log_handler.read(), status=200) + + job_def = copy.deepcopy(base_job) + + task_ending = "" + if "_cf" in log_filename: + task_ending = "-cf" + + job_def["job"].update( + { + "status": "completed", + "result": "success" if "_pass" in log_filename else "testfailed", + "name": f"{job_def['job']['name']}{task_ending}", + "reference_data_name": job_def["job"]["reference_data_name"].replace( + "a", str(random.randint(0, 9)) + ), + "job_guid": job_def["job"]["job_guid"] + .replace("e", str(random.randint(0, 9))) + .replace("d", str(random.randint(0, 9))), + "start_timestamp": job_def["job"]["start_timestamp"] + + 100 + + random.randint(0, 100) + + random.randint(0, 100), + "taskcluster_task_id": job_def["job"]["taskcluster_task_id"].replace( + "T", str(random.randint(0, 9)) + ), + "taskcluster_retry_id": "0", + } + ) + job_defs.append(job_def) + urls.append(log_url) + + jobs = create_jobs(job_defs) + + index = 0 + for job in jobs: + log_obj = JobLog.objects.create(job=job, name="errorsummary_json", url=urls[index]) + store_failure_lines(log_obj) + index += 1 + + return jobs + + +def verify_classification_id(jobs, job1_fcid, job2_fcid): + j1 = Job.objects.filter(id=jobs[0].id) + j2 = Job.objects.filter(id=jobs[1].id) + assert j1[0].failure_classification.id == job1_fcid + assert j2[0].failure_classification.id == job2_fcid + + +""" +TODO: write tests for testing intermittents.py handling in the parser. + * not supported yet: test infra/tooling error + 1x green - both green + * test multiple push ids +""" + + +def test_infra_no_intermittent(activate_responses, hundred_job_blobs, mock_parser, create_jobs): + # test fails, retrigger fails on infra, both unchanged + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_infra_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 1, 1) + + +def test_infra_intermittent(activate_responses, hundred_job_blobs, mock_parser, create_jobs): + # test passes, retrigger is infra, infra -> unchanged (new feature needed to make intermittent) + log_filenames = [ + "mochitest-browser-chrome_infra_errorsummary.log", + "mochitest-browser-chrome_pass_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 4, 1) + + +def test_multiple_jobs_intermittent( + activate_responses, hundred_job_blobs, mock_parser, create_jobs +): + # two sets of tests fail, both failures should be intermittent + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_2_errorsummary.log", + "mochitest-browser-chrome_pass_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 4, 4) + + +def test_confirm_failure_no_intermittent( + activate_responses, hundred_job_blobs, mock_parser, create_jobs +): + # test fails, -cf fails on same group, both unchanged + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_cf1_errorsummary.log", + "mochitest-browser-chrome_cf2_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 1, 1) + + +def test_confirm_failure_partial_intermittent( + activate_responses, hundred_job_blobs, mock_parser, create_jobs +): + # test fails, -cf fails on same group, both unchanged + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_cf1_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 1, 1) + + +def test_confirm_failure_pass_intermittent( + activate_responses, hundred_job_blobs, mock_parser, create_jobs +): + # test fails, -cf passes, original -> intermittent + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_cf1_pass_errorsummary.log", + "mochitest-browser-chrome_cf2_pass_errorsummary.log", + "mochitest-browser-chrome_cf3_pass_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 4, 1) + + +def test_retrigger_no_intermittent(activate_responses, hundred_job_blobs, mock_parser, create_jobs): + # test fails, retrigger fails on same group, both unchanged + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 1, 1) + + +def test_retrigger_intermittent(activate_responses, hundred_job_blobs, mock_parser, create_jobs): + # test fails, retrigger has different failures on same group, both -> intermittent + log_filenames = [ + "mochitest-browser-chrome_errorsummary.log", + "mochitest-browser-chrome_2_errorsummary.log", + ] + jobs = create_errorsummary_job(hundred_job_blobs[0], create_jobs, log_filenames) + job_logs = JobLog.objects.filter(job_id__in=(j.id for j in jobs)) + assert len(jobs) == len(log_filenames) + + # this will parse and check for intermittents + mock_full_log_parser(job_logs, mock_parser) + verify_classification_id(jobs, 4, 4) diff --git a/tests/log_parser/test_tasks.py b/tests/log_parser/test_tasks.py index 43c863c7931..3622be324a7 100644 --- a/tests/log_parser/test_tasks.py +++ b/tests/log_parser/test_tasks.py @@ -228,23 +228,3 @@ def test_bug_suggestion_line_no_stb( ), } ] - - -@pytest.mark.django_db -def test_confirm_failure_intermittent( - failure_classifications, jobs_with_local_log, sample_push, test_repository -): - """ - TODO: write tests for testing intermittents.py handling in the parser. - * test retrigger with 1 similar failure, but both jobs have different failures - both orange - * test 5 jobs, 2 fail, 2 fail for other reasons, 1 pass - all green - * test infra/tooling error + 1x green - both green - * test failure w/3x tests + 3x confirm-failure tasks green - all green - * test failure w/3x tests + 2x confirm-failure tasks green - original task still orange - """ - store_push_data(test_repository, sample_push) - for job in jobs_with_local_log: - job["job"]["result"] = "testfailed" - job["revision"] = sample_push[0]["revision"] - store_job_data(test_repository, jobs_with_local_log) - assert 1 == 0 diff --git a/tests/sample_data/logs/mochitest-browser-chrome_2_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_2_errorsummary.log new file mode 100644 index 00000000000..35d4f15361a --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_2_errorsummary.log @@ -0,0 +1,35 @@ +{"action": "test_groups", "line": 2, "groups": ["dom/base/test/browser.ini", "browser/components/contextualidentity/test/browser/browser.ini", "browser/base/content/test/tabMediaIndicator/browser.ini", "gfx/tests/browser/browser.ini", "browser/components/downloads/test/browser/browser.ini", "toolkit/components/satchel/test/browser/browser.ini", "toolkit/components/aboutperformance/tests/browser/browser.ini", "toolkit/components/aboutprocesses/tests/browser/browser.ini", "security/manager/ssl/tests/mochitest/browser/browser.ini", "toolkit/components/mozprotocol/tests/browser.ini", "netwerk/test/browser/browser.ini", "browser/base/content/test/siteIdentity/browser.ini", "dom/workers/test/browser.ini", "browser/components/preferences/tests/browser.ini", "browser/base/content/test/about/browser.ini", "browser/base/content/test/popups/browser.ini", "accessible/tests/browser/tree/browser.ini", "toolkit/components/remotepagemanager/tests/browser/browser.ini", "dom/xhr/tests/browser.ini", "dom/security/test/mixedcontentblocker/browser.ini", "browser/components/tests/browser/whats_new_page/browser.ini", "toolkit/content/tests/browser/browser.ini", "dom/broadcastchannel/tests/browser.ini", "caps/tests/mochitest/browser.ini", "browser/components/aboutconfig/test/browser/browser.ini", "browser/components/pocket/test/browser.ini", "accessible/tests/browser/browser.ini", "toolkit/components/pictureinpicture/tests/browser.ini", "dom/ipc/tests/browser.ini", "dom/ipc/tests/JSWindowActor/browser.ini"]} +{"status": "FAIL", "subtest": "Uncaught exception", "group": "browser/base/content/test/popups/browser.ini", "duration": 3141, "action": "test_result", "known_intermittent": [], "test": "browser/base/content/test/popups/browser_fullscreen.js", "message": "undefined - timed out after 50 tries.", "line": 4167, "stack": null, "expected": "PASS"} +{"status": "FAIL", "subtest": "Found an unexpected tab at the end of test run: http://example.com/browser/base/content/test/popups/test-page.html", "group": "browser/base/content/test/popups/browser.ini", "duration": 3141, "action": "test_result", "known_intermittent": [], "test": "browser/base/content/test/popups/browser_fullscreen.js", "message": "", "line": 4172, "stack": null, "expected": "PASS"} +{"status": "FAIL", "subtest": "Test timed out", "group": "browser/base/content/test/popups/browser.ini", "duration": 3141, "action": "test_result", "known_intermittent": [], "test": "browser/base/content/test/popups/browser_videoSelection.js", "message": "", "line": 4269, "stack": null, "expected": "PASS"} +{"status": "FAIL", "subtest": "A promise chain failed to handle a rejection: Video is being cloned visually. - timed out after 50 tries. - stack: (No stack available.)\nRejection date: Thu Oct 08 2020 20:47:46 GMT+0000 (Coordinated Universal Time) - false == true", "group": "browser/base/content/test/popups/browser.ini", "duration": 3141, "action": "test_result", "known_intermittent": [], "test": "browser/base/content/test/popups/browser_videoSelection.js", "message": "JS frame :: resource://testing-common/PromiseTestUtils.jsm :: assertNoUncaughtRejections :: line 265\nStack trace:\nresource://testing-common/PromiseTestUtils.jsm:assertNoUncaughtRejections:265\nchrome://mochikit/content/browser-test.js:nextTest:615\nchrome://mochikit/content/browser-test.js:timeoutFn:1195", "line": 4271, "stack": null, "expected": "PASS"} +{"status": "FAIL", "subtest": "Found a tab after previous test timed out: http://example.com/browser/base/content/test/popups/test-video-selection.html", "group": "browser/base/content/test/popups/browser.ini", "duration": 3141, "action": "test_result", "known_intermittent": [], "test": "browser/base/content/test/popups/browser_videoSelection.js", "message": "", "line": 4275, "stack": null, "expected": "PASS"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/base/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/contextualidentity/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/tabMediaIndicator/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "gfx/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/downloads/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/satchel/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/aboutperformance/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/aboutprocesses/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "security/manager/ssl/tests/mochitest/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/mozprotocol/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "netwerk/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/siteIdentity/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} +{"status": "OK", "duration": 0, "action": "group_result", "line": 4865, "group": "toolkit/components/pictureinpicture/tests/browser.ini"} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/popups/browser.ini"} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "accessible/tests/browser/tree/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/remotepagemanager/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/xhr/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/security/test/mixedcontentblocker/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/content/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/broadcastchannel/tests/browser.ini"} +{"status": "OK", "duration": null, "action": "group_result", "line": 4865, "group": "caps/tests/mochitest/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/aboutconfig/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/pocket/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "accessible/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/about/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/ipc/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/ipc/tests/JSWindowActor/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf1_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf1_errorsummary.log new file mode 100644 index 00000000000..bd61155f137 --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf1_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["browser/components/preferences/tests/browser.ini"]} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf1_pass_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf1_pass_errorsummary.log new file mode 100644 index 00000000000..2e9bd663adc --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf1_pass_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["browser/components/preferences/tests/browser.ini"]} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf2_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf2_errorsummary.log new file mode 100644 index 00000000000..2c45688bba6 --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf2_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["toolkit/components/pictureinpicture/tests/browser.ini"]} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/pictureinpicture/tests/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf2_pass_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf2_pass_errorsummary.log new file mode 100644 index 00000000000..f222f48eded --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf2_pass_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["toolkit/components/pictureinpicture/tests/browser.ini"]} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/pictureinpicture/tests/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf3_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf3_errorsummary.log new file mode 100644 index 00000000000..9d584db4cc7 --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf3_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["dom/workers/test/browser.ini"]} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_cf3_pass_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_cf3_pass_errorsummary.log new file mode 100644 index 00000000000..7fd86c05fca --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_cf3_pass_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["dom/workers/test/browser.ini"]} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_errorsummary.log index 8c90f795ec3..37fd80e6810 100644 --- a/tests/sample_data/logs/mochitest-browser-chrome_errorsummary.log +++ b/tests/sample_data/logs/mochitest-browser-chrome_errorsummary.log @@ -16,8 +16,8 @@ {"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/mozprotocol/tests/browser.ini"} {"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "netwerk/test/browser/browser.ini"} {"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/siteIdentity/browser.ini"} -{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} -{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} +{"status": "ERROR", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} {"status": "ERROR", "duration": 0, "action": "group_result", "line": 4865, "group": "toolkit/components/pictureinpicture/tests/browser.ini"} {"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/popups/browser.ini"} {"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "accessible/tests/browser/tree/browser.ini"} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_infra_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_infra_errorsummary.log new file mode 100644 index 00000000000..9b4d54fe7a9 --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_infra_errorsummary.log @@ -0,0 +1,2 @@ +{"action": "test_groups", "line": 2, "groups": ["dom/base/test/browser.ini", "browser/components/contextualidentity/test/browser/browser.ini", "browser/base/content/test/tabMediaIndicator/browser.ini", "gfx/tests/browser/browser.ini", "browser/components/downloads/test/browser/browser.ini", "toolkit/components/satchel/test/browser/browser.ini", "toolkit/components/aboutperformance/tests/browser/browser.ini", "toolkit/components/aboutprocesses/tests/browser/browser.ini", "security/manager/ssl/tests/mochitest/browser/browser.ini", "toolkit/components/mozprotocol/tests/browser.ini", "netwerk/test/browser/browser.ini", "browser/base/content/test/siteIdentity/browser.ini", "dom/workers/test/browser.ini", "browser/components/preferences/tests/browser.ini", "browser/base/content/test/about/browser.ini", "browser/base/content/test/popups/browser.ini", "accessible/tests/browser/tree/browser.ini", "toolkit/components/remotepagemanager/tests/browser/browser.ini", "dom/xhr/tests/browser.ini", "dom/security/test/mixedcontentblocker/browser.ini", "browser/components/tests/browser/whats_new_page/browser.ini", "toolkit/content/tests/browser/browser.ini", "dom/broadcastchannel/tests/browser.ini", "caps/tests/mochitest/browser.ini", "browser/components/aboutconfig/test/browser/browser.ini", "browser/components/pocket/test/browser.ini", "accessible/tests/browser/browser.ini", "toolkit/components/pictureinpicture/tests/browser.ini", "dom/ipc/tests/browser.ini", "dom/ipc/tests/JSWindowActor/browser.ini"]} +{"test": "automation.py", "subtest": null, "group": "", "status": "TIMEOUT", "expected": "PASS", "message": "application timed out after 370 seconds with no output", "stack": null, "modifiers": "", "known_intermittent": [], "action": "test_result", "line": 207} diff --git a/tests/sample_data/logs/mochitest-browser-chrome_pass_errorsummary.log b/tests/sample_data/logs/mochitest-browser-chrome_pass_errorsummary.log new file mode 100644 index 00000000000..c58aab88ed1 --- /dev/null +++ b/tests/sample_data/logs/mochitest-browser-chrome_pass_errorsummary.log @@ -0,0 +1,30 @@ +{"action": "test_groups", "line": 2, "groups": ["dom/base/test/browser.ini", "browser/components/contextualidentity/test/browser/browser.ini", "browser/base/content/test/tabMediaIndicator/browser.ini", "gfx/tests/browser/browser.ini", "browser/components/downloads/test/browser/browser.ini", "toolkit/components/satchel/test/browser/browser.ini", "toolkit/components/aboutperformance/tests/browser/browser.ini", "toolkit/components/aboutprocesses/tests/browser/browser.ini", "security/manager/ssl/tests/mochitest/browser/browser.ini", "toolkit/components/mozprotocol/tests/browser.ini", "netwerk/test/browser/browser.ini", "browser/base/content/test/siteIdentity/browser.ini", "dom/workers/test/browser.ini", "browser/components/preferences/tests/browser.ini", "browser/base/content/test/about/browser.ini", "browser/base/content/test/popups/browser.ini", "accessible/tests/browser/tree/browser.ini", "toolkit/components/remotepagemanager/tests/browser/browser.ini", "dom/xhr/tests/browser.ini", "dom/security/test/mixedcontentblocker/browser.ini", "browser/components/tests/browser/whats_new_page/browser.ini", "toolkit/content/tests/browser/browser.ini", "dom/broadcastchannel/tests/browser.ini", "caps/tests/mochitest/browser.ini", "browser/components/aboutconfig/test/browser/browser.ini", "browser/components/pocket/test/browser.ini", "accessible/tests/browser/browser.ini", "toolkit/components/pictureinpicture/tests/browser.ini", "dom/ipc/tests/browser.ini", "dom/ipc/tests/JSWindowActor/browser.ini"]} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/base/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/contextualidentity/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/tabMediaIndicator/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "gfx/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/downloads/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/satchel/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/aboutperformance/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/aboutprocesses/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "security/manager/ssl/tests/mochitest/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/mozprotocol/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "netwerk/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/siteIdentity/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/workers/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/preferences/tests/browser.ini"} +{"status": "OK", "duration": 0, "action": "group_result", "line": 4865, "group": "toolkit/components/pictureinpicture/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/popups/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "accessible/tests/browser/tree/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/components/remotepagemanager/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/xhr/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/security/test/mixedcontentblocker/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "toolkit/content/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/broadcastchannel/tests/browser.ini"} +{"status": "OK", "duration": null, "action": "group_result", "line": 4865, "group": "caps/tests/mochitest/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/aboutconfig/test/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/components/pocket/test/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "accessible/tests/browser/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "browser/base/content/test/about/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/ipc/tests/browser.ini"} +{"status": "OK", "duration": 3141, "action": "group_result", "line": 4865, "group": "dom/ipc/tests/JSWindowActor/browser.ini"} diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py index bb67f9034ac..2654ec5e49e 100644 --- a/treeherder/log_parser/intermittents.py +++ b/treeherder/log_parser/intermittents.py @@ -1,27 +1,66 @@ -from treeherder.model.models import Group, GroupStatus, Job +from treeherder.model.models import Group, GroupStatus, Job, Push def check_and_mark_intermittent(job_id): current_job = Job.objects.get(id=job_id) + jtname = current_job.job_type.name.strip("-cf") + ids = [current_job.push.id] - if current_job.job_type.name.endswith("-cf"): - jtname = [current_job.job_type.name, current_job.job_type.name.strip("-cf")] - else: - jtname = [current_job.job_type.name, f"{current_job.job_type.name}-cf"] - - all_groups = Group.objects.filter( - job_logs__job__push__id=current_job.push.id, - job_logs__job__job_type__name__in=jtname, - group_result__status__in=[GroupStatus.OK, GroupStatus.ERROR], - ).values( - "name", - "job_logs__job__id", - "group_result__status", + try: + _ = int(jtname.split("-")[-1]) + jtname = "-".join(jtname.split("-")[:-1]) + except ValueError: + pass + + # if we are not on try, look at recent history + if current_job.repository.id != 4: + # get list of pushes + ids = Push.objects.filter(repository__id=current_job.repository.id).values("id")[:20] + + all_groups = ( + Group.objects.filter( + job_logs__job__push__id__in=ids, + job_logs__job__push__repository__id=current_job.repository.id, + job_logs__job__job_type__name__startswith=jtname, + job_logs__job__failure_classification__id__in=[ + 1, + 4, + 6, + ], # not classified, intermittent, new_failure; TODO: consider 7 == autoclassified + job_logs__job__result__in=[ + "success", + "testfailed", + ], # primarily ignore retry/usercancel + group_result__status__in=[GroupStatus.OK, GroupStatus.ERROR], + ) + .values( + "name", + "job_logs__job__id", + "group_result__status", + "job_logs__job__job_type__name", + "job_logs__job__push__id", + ) + .order_by("-job_logs__job__push__time") ) - groups = {} - jobs = {} + mappings = {} for item in all_groups: + jobname = item["job_logs__job__job_type__name"].strip("-cf") + try: + int(jobname.split("-")[-1]) + jobname = "-".join(jobname.split("-")[:-1]) + except ValueError: + pass + + if jobname != jtname: + # we have a variant + continue + + if item["job_logs__job__push__id"] not in mappings: + mappings[item["job_logs__job__push__id"]] = {"groups": {}, "jobs": {}} + groups = mappings[item["job_logs__job__push__id"]]["groups"] + jobs = mappings[item["job_logs__job__push__id"]]["jobs"] + if item["name"] not in groups: groups[item["name"]] = {} if item["job_logs__job__id"] not in groups[item["name"]]: @@ -32,24 +71,64 @@ def check_and_mark_intermittent(job_id): if item["name"] not in jobs[item["job_logs__job__id"]]: jobs[item["job_logs__job__id"]][item["name"]] = item["group_result__status"] - if len(jobs.keys()) <= 1: - # zero jobs == no groups reported (i.e. marionette) - # 1 job == no additional data - return - - for job in jobs.keys(): - # for each similar task.label, ensure all groups have >=50% pass rate, if so flag failing - # job as intermittent. for non test failures, ensure all groups are green - all_green = True - failed_groups = [g for g in jobs[job] if int(jobs[job][g]) == GroupStatus.ERROR] - for group in failed_groups: - all_status = [groups[group][j] for j in groups[group]] - pass_rate = len([s for s in all_status if s == GroupStatus.OK]) / len(all_status) - if pass_rate < 0.5: - all_green = False - break - - target_job = Job.objects.filter(id=job) - - if all_green and target_job[0].result != "success": - target_job.update(failure_classification_id=4) + # multi push support - want to look back in history now that we have "future" data + # a previous job can only change if ALL failing groups have future passing data + # + # current job has new data, lets find all groups that changed status as a result of new data + # if no groups, possibly an "infra" error + changed_groups = {} + current_changed_groups = {} + for group in mappings.get(current_job.push.id, {}).get("groups", []): + all_data = [] + current_data = [] + for id in mappings.keys(): + all_data.extend( + [mappings[id]["groups"][group][j] for j in mappings[id]["groups"].get(group, {})] + ) + if id == current_job.push.id: + current_data.extend( + [ + mappings[id]["groups"][group][j] + for j in mappings[id]["groups"].get(group, {}) + ] + ) + + # if new data changes results, update + pass_rate = len([s for s in all_data if s == GroupStatus.OK]) / len(all_data) + if pass_rate >= 0.5: + changed_groups[group] = True + pass_rate = len([s for s in current_data if s == GroupStatus.OK]) / len(current_data) + if pass_rate >= 0.5: + current_changed_groups[group] = True + + # all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing' + for id in mappings.keys(): + for job in mappings[id]["jobs"]: + all_green = True + current_all_green = True + for group in mappings[id]["jobs"][job]: + # if group changed to failing and group originally failed + if ( + mappings[id]["groups"][group][job] == GroupStatus.ERROR + and group not in changed_groups + ): + all_green = False + if ( + mappings[id]["groups"][group][job] == GroupStatus.ERROR + and group not in current_changed_groups + ): + current_all_green = False + + if (id == current_job.push.id and current_all_green) or ( + id != current_job.push.id and len(ids) > 1 and all_green + ): + target_job = Job.objects.filter(id=job) + + # TODO: infra would be nice to detect, but in the case of no groups, our data set == [] + # edge case is all groups originally pass and then shutdown leaks cause 'testfailed'. + # also we ignore infra/leaks that don't report group failures in errorsummary files + if ( + target_job[0].result != "success" + and target_job[0].failure_classification_id != 4 + ): + target_job.update(failure_classification_id=4) diff --git a/treeherder/log_parser/tasks.py b/treeherder/log_parser/tasks.py index 38d2519b5b6..aaa8d3f70a6 100644 --- a/treeherder/log_parser/tasks.py +++ b/treeherder/log_parser/tasks.py @@ -3,6 +3,7 @@ import newrelic.agent import simplejson as json from celery.exceptions import SoftTimeLimitExceeded +from django.conf import settings from requests.exceptions import HTTPError from treeherder.etl.artifact import serialize_artifact_json_blobs, store_job_artifacts @@ -81,8 +82,10 @@ def store_failure_lines(job_log): errorsummary file.""" logger.info("Running store_failure_lines for job %s", job_log.job.id) failureline.store_failure_lines(job_log) - logger.info("Running check_and_mark_intermittent for job %s", job_log.job.id) - intermittents.check_and_mark_intermittent(job_log.job.id) + # NOTE: do not run on production + if settings.SITE_HOSTNAME != "treeherder.mozilla.org": + logger.info("Running check_and_mark_intermittent for job %s", job_log.job.id) + intermittents.check_and_mark_intermittent(job_log.job.id) def post_log_artifacts(job_log): From 2007ef570f275d3313b9239744f35f62df40801d Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Sat, 14 Jun 2025 05:41:28 -0700 Subject: [PATCH 3/6] add support for failure_classification_id=8 --- tests/conftest.py | 3 ++- tests/etl/test_classification_loader.py | 2 +- tests/log_parser/test_store_failure_lines.py | 8 ++++---- treeherder/log_parser/intermittents.py | 12 ++++++------ .../model/fixtures/failure_classification.json | 7 +++++++ treeherder/model/models.py | 2 +- treeherder/webapp/api/refdata.py | 2 +- ui/helpers/job.js | 9 +++++++-- ui/job-view/details/tabs/SimilarJobsTab.jsx | 6 +++--- ui/job-view/pushes/JobButton.jsx | 5 ++++- ui/models/classificationType.js | 1 + 11 files changed, 37 insertions(+), 20 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index a0fdf470c85..a31efe23d3d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -626,8 +626,9 @@ def failure_classifications(transactional_db): "expected fail", "intermittent", "infra", - "intermittent needs filing", + "new failure not classified", "autoclassified intermittent", + "intermittent needs bugid", ]: th_models.FailureClassification(name=name).save() diff --git a/tests/etl/test_classification_loader.py b/tests/etl/test_classification_loader.py index 5c211572720..932e81ac456 100644 --- a/tests/etl/test_classification_loader.py +++ b/tests/etl/test_classification_loader.py @@ -539,7 +539,7 @@ def test_new_classification(autoland_push, sample_data, test_two_jobs_tc_metadat # first is NEW second_job = Job.objects.get(id=1) first_job = Job.objects.get(id=2) - assert first_job.failure_classification.name == "intermittent needs filing" + assert first_job.failure_classification.name == "new failure not classified" # second instance is normal assert second_job.failure_classification.name == "not classified" diff --git a/tests/log_parser/test_store_failure_lines.py b/tests/log_parser/test_store_failure_lines.py index 04e5d68f33d..e00de19a488 100644 --- a/tests/log_parser/test_store_failure_lines.py +++ b/tests/log_parser/test_store_failure_lines.py @@ -343,7 +343,7 @@ def test_infra_intermittent(activate_responses, hundred_job_blobs, mock_parser, # this will parse and check for intermittents mock_full_log_parser(job_logs, mock_parser) - verify_classification_id(jobs, 4, 1) + verify_classification_id(jobs, 8, 1) def test_multiple_jobs_intermittent( @@ -361,7 +361,7 @@ def test_multiple_jobs_intermittent( # this will parse and check for intermittents mock_full_log_parser(job_logs, mock_parser) - verify_classification_id(jobs, 4, 4) + verify_classification_id(jobs, 8, 8) def test_confirm_failure_no_intermittent( @@ -415,7 +415,7 @@ def test_confirm_failure_pass_intermittent( # this will parse and check for intermittents mock_full_log_parser(job_logs, mock_parser) - verify_classification_id(jobs, 4, 1) + verify_classification_id(jobs, 8, 1) def test_retrigger_no_intermittent(activate_responses, hundred_job_blobs, mock_parser, create_jobs): @@ -445,4 +445,4 @@ def test_retrigger_intermittent(activate_responses, hundred_job_blobs, mock_pars # this will parse and check for intermittents mock_full_log_parser(job_logs, mock_parser) - verify_classification_id(jobs, 4, 4) + verify_classification_id(jobs, 8, 8) diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py index 2654ec5e49e..afbb1a8d6de 100644 --- a/treeherder/log_parser/intermittents.py +++ b/treeherder/log_parser/intermittents.py @@ -26,7 +26,8 @@ def check_and_mark_intermittent(job_id): 1, 4, 6, - ], # not classified, intermittent, new_failure; TODO: consider 7 == autoclassified + 8, + ], # not classified, intermittent, new_failure, intermittent needs bug; TODO: consider 7 == autoclassified job_logs__job__result__in=[ "success", "testfailed", @@ -127,8 +128,7 @@ def check_and_mark_intermittent(job_id): # TODO: infra would be nice to detect, but in the case of no groups, our data set == [] # edge case is all groups originally pass and then shutdown leaks cause 'testfailed'. # also we ignore infra/leaks that don't report group failures in errorsummary files - if ( - target_job[0].result != "success" - and target_job[0].failure_classification_id != 4 - ): - target_job.update(failure_classification_id=4) + if target_job[0].result != "success" and target_job[ + 0 + ].failure_classification_id not in [4, 8]: + target_job.update(failure_classification_id=8) diff --git a/treeherder/model/fixtures/failure_classification.json b/treeherder/model/fixtures/failure_classification.json index 5517c6a47f6..22688ec4fd1 100644 --- a/treeherder/model/fixtures/failure_classification.json +++ b/treeherder/model/fixtures/failure_classification.json @@ -47,5 +47,12 @@ "fields": { "name": "autoclassified intermittent" } + }, + { + "pk": 8, + "model": "model.failureclassification", + "fields": { + "name": "intermittent needs bugid" + } } ] diff --git a/treeherder/model/models.py b/treeherder/model/models.py index a7785b4e835..cf692413fb5 100644 --- a/treeherder/model/models.py +++ b/treeherder/model/models.py @@ -878,7 +878,7 @@ def _ensure_classification(self): return # if the failure type isn't intermittent, ignore - if self.failure_classification.name not in ["intermittent"]: + if self.failure_classification.name not in ["intermittent", "intermittent needs bugid"]: return # if the linked Job has more than one TextLogError, ignore diff --git a/treeherder/webapp/api/refdata.py b/treeherder/webapp/api/refdata.py index 7abab6cbca6..1f56439c72d 100644 --- a/treeherder/webapp/api/refdata.py +++ b/treeherder/webapp/api/refdata.py @@ -39,7 +39,7 @@ def list(self, request): class FailureClassificationViewSet(viewsets.ReadOnlyModelViewSet): """ViewSet for the refdata FailureClassification model""" - queryset = models.FailureClassification.objects + queryset = models.FailureClassification.objects.exclude(name="intermittent needs filing") serializer_class = th_serializers.FailureClassificationSerializer diff --git a/ui/helpers/job.js b/ui/helpers/job.js index ecaf62d8318..d51977b106b 100644 --- a/ui/helpers/job.js +++ b/ui/helpers/job.js @@ -21,7 +21,8 @@ const btnClasses = { }; // failure classification ids that should be shown in "unclassified" mode -export const thUnclassifiedIds = [1, 6, 7]; +// TODO: consider dropping 8 from this list, only here for full compatibility +export const thUnclassifiedIds = [1, 6, 7, 8]; // Get the CSS class for job buttons as well as jobs that show in the pinboard. // These also apply to result "groupings" like ``failures`` and ``in progress`` @@ -34,7 +35,11 @@ export const getBtnClass = function getBtnClass( // handle if a job is classified > 1 // and not "NEW failure", classification == 6 - if (failureClassificationId > 1 && failureClassificationId !== 6) { + // TODO: consider dropping 8 from this list, only here for full compatibility + if ( + failureClassificationId > 1 && + ![6, 8].includes(failureClassificationId) + ) { btnClass += '-classified'; } return btnClass; diff --git a/ui/job-view/details/tabs/SimilarJobsTab.jsx b/ui/job-view/details/tabs/SimilarJobsTab.jsx index b3cca3527a6..fd2a5983990 100644 --- a/ui/job-view/details/tabs/SimilarJobsTab.jsx +++ b/ui/job-view/details/tabs/SimilarJobsTab.jsx @@ -190,9 +190,9 @@ class SimilarJobsTab extends React.Component { > {similarJob.job_type_symbol} {similarJob.failure_classification_id > 1 && - similarJob.failure_classification_id !== 6 && ( - * - )} + ![6, 8].includes( + similarJob.failure_classification_id, + ) && *} diff --git a/ui/job-view/pushes/JobButton.jsx b/ui/job-view/pushes/JobButton.jsx index 261448a352f..d579e338f58 100644 --- a/ui/job-view/pushes/JobButton.jsx +++ b/ui/job-view/pushes/JobButton.jsx @@ -105,7 +105,10 @@ export default class JobButtonComponent extends React.Component { const btnClass = getBtnClass(resultStatus, failureClassificationId); let classifiedIcon = null; - if (failureClassificationId > 1 && failureClassificationId !== 6) { + if ( + failureClassificationId > 1 && + ![6, 8].includes(failureClassificationId) + ) { classifiedIcon = failureClassificationId === 7 ? faStarRegular : faStarSolid; } diff --git a/ui/models/classificationType.js b/ui/models/classificationType.js index 68ec558ad34..799959a44c2 100644 --- a/ui/models/classificationType.js +++ b/ui/models/classificationType.js @@ -7,6 +7,7 @@ const classificationColors = { 4: 'label-warning', // intermittent", 5: 'label-default', // infra", 6: '', // new failure not classified", + 8: 'label-danger', // intermittent needs bugid", }; const uri = getApiUrl('/failureclassification/'); From 9e569850fcf7b1b447c4f573e51dd81f8f5d5651 Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Thu, 26 Jun 2025 18:27:48 -0700 Subject: [PATCH 4/6] address feedback --- treeherder/log_parser/intermittents.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py index afbb1a8d6de..cfc91cebfcf 100644 --- a/treeherder/log_parser/intermittents.py +++ b/treeherder/log_parser/intermittents.py @@ -14,12 +14,25 @@ def check_and_mark_intermittent(job_id): # if we are not on try, look at recent history if current_job.repository.id != 4: - # get list of pushes - ids = Push.objects.filter(repository__id=current_job.repository.id).values("id")[:20] + # get list of pushes, find the current push and recent pushes + idlist = Push.objects.filter(repository__id=current_job.repository.id).values("id") + counter = -1 + for id in idlist: + if id == current_job.push.id: + counter = 0 + continue + if counter < 0: + continue + if current_job.repository.id == 77 and counter >= 20: + break + elif counter >= 3: + break + ids.append(id) + counter += 1 all_groups = ( Group.objects.filter( - job_logs__job__push__id__in=ids, + job_logs__job__push__id__range=(ids[-1],ids[0]), job_logs__job__push__repository__id=current_job.repository.id, job_logs__job__job_type__name__startswith=jtname, job_logs__job__failure_classification__id__in=[ @@ -41,7 +54,7 @@ def check_and_mark_intermittent(job_id): "job_logs__job__job_type__name", "job_logs__job__push__id", ) - .order_by("-job_logs__job__push__time") + .order_by("-job_logs__job__push__id") ) mappings = {} From 69e086b9d56089e3655f2c8083685de11aef250d Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Tue, 13 May 2025 11:48:04 -0700 Subject: [PATCH 5/6] Add post log parser to look for repeated test runs and annotate as intermittent. --- tests/log_parser/test_tasks.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/log_parser/test_tasks.py b/tests/log_parser/test_tasks.py index 3622be324a7..43c863c7931 100644 --- a/tests/log_parser/test_tasks.py +++ b/tests/log_parser/test_tasks.py @@ -228,3 +228,23 @@ def test_bug_suggestion_line_no_stb( ), } ] + + +@pytest.mark.django_db +def test_confirm_failure_intermittent( + failure_classifications, jobs_with_local_log, sample_push, test_repository +): + """ + TODO: write tests for testing intermittents.py handling in the parser. + * test retrigger with 1 similar failure, but both jobs have different failures - both orange + * test 5 jobs, 2 fail, 2 fail for other reasons, 1 pass - all green + * test infra/tooling error + 1x green - both green + * test failure w/3x tests + 3x confirm-failure tasks green - all green + * test failure w/3x tests + 2x confirm-failure tasks green - original task still orange + """ + store_push_data(test_repository, sample_push) + for job in jobs_with_local_log: + job["job"]["result"] = "testfailed" + job["revision"] = sample_push[0]["revision"] + store_job_data(test_repository, jobs_with_local_log) + assert 1 == 0 From 32b9438d7d523e64fe4a54cf341cf978c074031a Mon Sep 17 00:00:00 2001 From: Joel Maher Date: Wed, 21 May 2025 15:42:17 -0700 Subject: [PATCH 6/6] add support for non try branches in a time series --- tests/log_parser/test_tasks.py | 20 -------------------- treeherder/log_parser/intermittents.py | 14 +++++++++++--- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/tests/log_parser/test_tasks.py b/tests/log_parser/test_tasks.py index 43c863c7931..3622be324a7 100644 --- a/tests/log_parser/test_tasks.py +++ b/tests/log_parser/test_tasks.py @@ -228,23 +228,3 @@ def test_bug_suggestion_line_no_stb( ), } ] - - -@pytest.mark.django_db -def test_confirm_failure_intermittent( - failure_classifications, jobs_with_local_log, sample_push, test_repository -): - """ - TODO: write tests for testing intermittents.py handling in the parser. - * test retrigger with 1 similar failure, but both jobs have different failures - both orange - * test 5 jobs, 2 fail, 2 fail for other reasons, 1 pass - all green - * test infra/tooling error + 1x green - both green - * test failure w/3x tests + 3x confirm-failure tasks green - all green - * test failure w/3x tests + 2x confirm-failure tasks green - original task still orange - """ - store_push_data(test_repository, sample_push) - for job in jobs_with_local_log: - job["job"]["result"] = "testfailed" - job["revision"] = sample_push[0]["revision"] - store_job_data(test_repository, jobs_with_local_log) - assert 1 == 0 diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py index cfc91cebfcf..590ef3dfa0a 100644 --- a/treeherder/log_parser/intermittents.py +++ b/treeherder/log_parser/intermittents.py @@ -1,3 +1,5 @@ +import datetime + from treeherder.model.models import Group, GroupStatus, Job, Push @@ -14,8 +16,14 @@ def check_and_mark_intermittent(job_id): # if we are not on try, look at recent history if current_job.repository.id != 4: + start_date = current_job.push.time - datetime.timedelta(hours=36) + # get list of pushes, find the current push and recent pushes - idlist = Push.objects.filter(repository__id=current_job.repository.id).values("id") + idlist = ( + Push.objects.filter(repository__id=current_job.repository.id, time__gte=start_date) + .values("id") + .order_by("-id") + ) counter = -1 for id in idlist: if id == current_job.push.id: @@ -25,14 +33,14 @@ def check_and_mark_intermittent(job_id): continue if current_job.repository.id == 77 and counter >= 20: break - elif counter >= 3: + elif current_job.repository.id != 77 and counter >= 3: break ids.append(id) counter += 1 all_groups = ( Group.objects.filter( - job_logs__job__push__id__range=(ids[-1],ids[0]), + job_logs__job__push__id__range=(ids[-1], ids[0]), job_logs__job__push__repository__id=current_job.repository.id, job_logs__job__job_type__name__startswith=jtname, job_logs__job__failure_classification__id__in=[