diff --git a/merino/curated_recommendations/rankers.py b/merino/curated_recommendations/rankers.py index 9c6f76820..aed3da933 100644 --- a/merino/curated_recommendations/rankers.py +++ b/merino/curated_recommendations/rankers.py @@ -1,5 +1,8 @@ """Algorithms for ranking curated recommendations.""" +import sentry_sdk +import logging +import math from copy import copy from datetime import datetime, timedelta, timezone @@ -20,6 +23,8 @@ from scipy.stats import beta import numpy as np +logger = logging.getLogger(__name__) + def renumber_recommendations(recommendations: list[CuratedRecommendation]) -> None: """Renumber the receivedRank of each recommendation to be sequential. @@ -57,6 +62,99 @@ def renumber_sections(ordered_sections: list[tuple[str, Section]]) -> dict[str, TOP_STORIES_SECTION_KEY = "top_stories_section" +# For taking down reported content +DEFAULT_REPORT_RECS_RATIO_THRESHOLD = 0.001 # using a low number for now (0.1%) +DEFAULT_SAFEGUARD_CAP_TAKEDOWN_FRACTION = 0.50 # allow at most 50% of recs to be auto-removed + + +def takedown_reported_recommendations( + recs: list[CuratedRecommendation], + engagement_backend: EngagementBackend, + region: str | None = None, + report_ratio_threshold: float = DEFAULT_REPORT_RECS_RATIO_THRESHOLD, + safeguard_cap_takedown_fraction: float = DEFAULT_SAFEGUARD_CAP_TAKEDOWN_FRACTION, +) -> list[CuratedRecommendation]: + """Takedown highly-reported content & return filtered list of recommendations. + + - Exclude any recommendation which breaches (report_count / impression_count) > threshold. + - Apply a safety cap: allow a certain fraction (50%) of all available recommendations to be taken down automatically. + - Log an error for the excluded rec, send the error to Sentry. + + :param recs: All recommendations to filter. + :param engagement_backend: Provides report_count & impression_count data by corpusItemId. + :param region: Optionally, the client's region, e.g. 'US'. + :param report_ratio_threshold: Threshold indicating which recommendation should be excluded. + :param safeguard_cap_takedown_fraction: Max fraction of recommendations that can be auto-removed. + + :return: Filtered list of recommendations. + """ + # Save engagement metrics for logging: {corpusItemId: (report_ratio, reports, impressions)} + rec_eng_metrics: dict[str, tuple[float, int, int]] = {} + + def _report_ratio_for(rec: CuratedRecommendation) -> float: + if engagement := engagement_backend.get(rec.corpusItemId, region): + _impressions = int(engagement.impression_count or 0) + _reports = int(engagement.report_count or 0) + if _impressions > 0: + report_ratio = _reports / _impressions + rec_eng_metrics[rec.corpusItemId] = (report_ratio, _reports, _impressions) + return report_ratio + return -1.0 # treat as safe (won’t breach threshold) + + # Filter first; common case is empty + over = [rec for rec in recs if _report_ratio_for(rec) > report_ratio_threshold] + if not over: + return recs + + # Compute the safeguard cap, # of recs safe to remove from total list of reported_recs + # rounded up to avoid 0 removals + max_recs_to_remove = math.ceil(len(recs) * safeguard_cap_takedown_fraction) + + # Sort only the small over-threshold set by ratio; no tie-breakers. + over.sort(key=_report_ratio_for, reverse=True) + + # Select top N to remove + recs_to_remove = over[:max_recs_to_remove] + removed_rec_ids = {r.corpusItemId for r in recs_to_remove} + + for rec in recs_to_remove: + ratio, reports, impressions = rec_eng_metrics.get(rec.corpusItemId, (-1.0, 0, 0)) + # Log a warning for our backend logs + logger.warning( + f"Excluding reported recommendation: '{rec.title}' ({rec.url}) was excluded due to high reports", + extra={ + "corpus_item_id": rec.corpusItemId, + "report_ratio": ratio, + "reports": reports, + "impressions": impressions, + "threshold": report_ratio_threshold, + "region": region, + }, + ) + + # Send a structured event to Sentry as a warning 🟑 + sentry_sdk.capture_message( + f"Excluding reported recommendation: '{rec.title}' ({rec.url}) excluded due to high reports", + level="warning", + scope=lambda scope: scope.set_context( + "excluding reported recommendation", + { + "corpus_item_id": rec.corpusItemId, + "title": rec.title, + "url": str(rec.url), + "report_ratio": ratio, + "reports": reports, + "impressions": impressions, + "threshold": report_ratio_threshold, + "region": region, + }, + ), + ) + + # Return remaining recs + remaining_recs = [rec for rec in recs if rec.corpusItemId not in removed_rec_ids] + return remaining_recs + def thompson_sampling( recs: list[CuratedRecommendation], diff --git a/merino/curated_recommendations/sections.py b/merino/curated_recommendations/sections.py index f60324c3a..2c1765695 100644 --- a/merino/curated_recommendations/sections.py +++ b/merino/curated_recommendations/sections.py @@ -43,6 +43,7 @@ section_thompson_sampling, put_top_stories_first, greedy_personalized_section_rank, + takedown_reported_recommendations, ) from merino.curated_recommendations.utils import is_enrolled_in_experiment @@ -534,16 +535,30 @@ async def get_sections( rec for section in corpus_sections.values() for rec in section.recommendations ] - # 5. Rank all corpus recommendations globally by engagement to build top_stories_section - all_ranked_corpus_recommendations = thompson_sampling( + # 5. Remove reported recommendations + all_remaining_corpus_recommendations = takedown_reported_recommendations( all_corpus_recommendations, engagement_backend=engagement_backend, + region=region, + ) + + # 6. Update corpus_sections to make sure reported takedown recs are not present + remaining_ids = {rec.corpusItemId for rec in all_remaining_corpus_recommendations} + for cs in corpus_sections.values(): + cs.recommendations = [ + rec for rec in cs.recommendations if rec.corpusItemId in remaining_ids + ] + + # 7. Rank all corpus recommendations globally by engagement to build top_stories_section + all_ranked_corpus_recommendations = thompson_sampling( + all_remaining_corpus_recommendations, + engagement_backend=engagement_backend, prior_backend=prior_backend, region=region, rescaler=rescaler, ) - # 6. Split top stories + # 8. Split top stories # Use 2-row layout as default for Popular Today top_stories_count = DOUBLE_ROW_TOP_STORIES_COUNT popular_today_layout = layout_7_tiles_2_ads @@ -554,11 +569,11 @@ async def get_sections( top_stories_rec_ids = {rec.corpusItemId for rec in top_stories} - # 7. Remove top story recs from original corpus sections + # 9. Remove top story recs from original corpus sections for cs in corpus_sections.values(): cs.recommendations = remove_top_story_recs(cs.recommendations, top_stories_rec_ids) - # 8. Rank remaining recs in sections by engagement + # 10. Rank remaining recs in sections by engagement for cs in corpus_sections.values(): cs.recommendations = thompson_sampling( cs.recommendations, @@ -568,7 +583,7 @@ async def get_sections( rescaler=rescaler, ) - # 9. Initialize sections with top stories + # 11. Initialize sections with top stories sections: dict[str, Section] = { "top_stories_section": Section( receivedFeedRank=0, @@ -578,13 +593,13 @@ async def get_sections( ) } - # 10. Add remaining corpus sections + # 12. Add remaining corpus sections sections.update(corpus_sections) - # 11. Prune undersized sections + # 13. Prune undersized sections sections = get_sections_with_enough_items(sections) - # 12. Rank the sections according to follows and engagement. 'Top Stories' goes at the top. + # 14. Rank the sections according to follows and engagement. 'Top Stories' goes at the top. sections = rank_sections( sections, request.sections, @@ -593,10 +608,10 @@ async def get_sections( experiment_rescaler=rescaler, ) - # 13. Apply final layout cycling to ranked sections except top_stories + # 15. Apply final layout cycling to ranked sections except top_stories cycle_layouts_for_ranked_sections(sections, LAYOUT_CYCLE) - # 14. Apply ad adjustments + # 16. Apply ad adjustments adjust_ads_in_sections(sections) return sections diff --git a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py index dfc87c2b8..76c3843fb 100644 --- a/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py +++ b/tests/integration/api/v1/curated_recommendations/test_curated_recommendations.py @@ -62,8 +62,22 @@ class MockEngagementBackend(EngagementBackend): """Mock class implementing the protocol for EngagementBackend.""" + def __init__(self): + # {corpusItemId: (reports, impressions)} + self.metrics: dict[str, tuple[int, int]] = {} + def get(self, corpus_item_id: str, region: str | None = None) -> Engagement | None: """Return random click and impression counts based on the scheduled corpus id and region.""" + if corpus_item_id in self.metrics: + reports, impressions = self.metrics[corpus_item_id] + return Engagement( + corpus_item_id=corpus_item_id, + region=region, + click_count=0, + impression_count=impressions, + report_count=reports, + ) + HIGH_CTR_ITEMS = { "b2c10703-5377-4fe8-89d3-32fbd7288187": ( 1_000_000 * ML_EXPERIMENT_SCALE, @@ -2146,6 +2160,58 @@ def passed_region(call): passed_region(call) == derived_region for call in spy.call_args_list ), f"No engagement.get(..., region={repr(derived_region)}) in {spy.call_args_list}" + @pytest.mark.asyncio + @pytest.mark.parametrize( + "reports,impressions,should_remove", + [ + # Above threshold: 5 / 50 = 0.1 (10% > 0.1%) β†’ should be removed + (5, 50, True), + # Below threshold: 1 / 200,000 = 0.000005 (0.0005% < 0.1%) β†’ should stay + (1, 200_000, False), + # Exactly at threshold: 1 / 1,000 = 0.001 (0.1% == 0.1%) β†’ should stay + (1, 1_000, False), + # No reports: 0 / 100 = 0 < 0.001 β†’ should stay + (0, 100, False), + # No engagement data β†’ treated as safe β†’ should stay + (None, None, False), + ], + ) + async def test_takedown_reported_recommendations_parametrized( + self, engagement_backend, caplog, reports, impressions, should_remove + ): + """Verify takedown_reported_recommendations behaves correctly.""" + corpus_rec_id = "080de671-e4de-4a3d-863f-8c6dd6980069" + + if reports is not None and impressions is not None: + engagement_backend.metrics.update({corpus_rec_id: (reports, impressions)}) + + async with AsyncClient(app=app, base_url="http://test") as ac: + response = await ac.post( + "/api/v1/curated-recommendations", + json={ + "locale": "en-US", + "feeds": ["sections"], + "experimentName": ExperimentName.ML_SECTIONS_EXPERIMENT.value, + "experimentBranch": "treatment", + }, + ) + + assert response.status_code == 200 + feeds = response.json()["feeds"] + + response_ids = { + rec["corpusItemId"] + for sid, section in feeds.items() + if section + for rec in section.get("recommendations", []) + } + + if should_remove: + assert corpus_rec_id not in response_ids + assert any("Excluding reported recommendation" in r.message for r in caplog.records) + else: + assert corpus_rec_id in response_ids + @pytest.mark.asyncio async def test_curated_recommendations_enriched_with_icons( diff --git a/tests/unit/curated_recommendations/test_rankers.py b/tests/unit/curated_recommendations/test_rankers.py index d39efeceb..f03711a40 100644 --- a/tests/unit/curated_recommendations/test_rankers.py +++ b/tests/unit/curated_recommendations/test_rankers.py @@ -1,5 +1,6 @@ """Unit test for ranker algorithms used to rank curated recommendations.""" +import logging import uuid import pytest @@ -9,7 +10,10 @@ from freezegun import freeze_time from pydantic import HttpUrl + +from merino.curated_recommendations import EngagementBackend from merino.curated_recommendations.corpus_backends.protocol import Topic +from merino.curated_recommendations.engagement_backends.protocol import Engagement from merino.curated_recommendations.layouts import layout_4_medium, layout_4_large, layout_6_tiles from merino.curated_recommendations.protocol import ( CuratedRecommendation, @@ -26,6 +30,7 @@ renumber_recommendations, put_top_stories_first, greedy_personalized_section_rank, + takedown_reported_recommendations, ) from tests.unit.curated_recommendations.fixtures import ( generate_recommendations, @@ -33,6 +38,106 @@ ) +class MockEngagementBackend(EngagementBackend): + """Mock class implementing the protocol for EngagementBackend.""" + + def __init__(self, metrics: dict[str, tuple[int, int]]): + # {corpusItemId: (reports, impressions)} + self.metrics = metrics + + def get(self, corpus_item_id: str, region: str | None = None) -> Engagement | None: + """Return a mock Engagement object for a given corpusItemId.""" + if corpus_item_id not in self.metrics: + return None + reports, impressions = self.metrics[corpus_item_id] + return Engagement( + corpus_item_id=corpus_item_id, + region=region, + click_count=0, + impression_count=impressions, + report_count=reports, + ) + + +class TestTakedownReportedRecommendations: + """Tests for the takedown_reported_recommendations function.""" + + def test_empty_list(self): + """Test that takedown_reported_recommendations works with an empty list.""" + backend = MockEngagementBackend({}) + assert takedown_reported_recommendations([], backend) == [] + + def test_no_engagement_data(self): + """Test that takedown_reported_recommendations keep all recommendations if no engagement data.""" + recs = generate_recommendations(item_ids=["1", "2", "3"]) + backend = MockEngagementBackend({}) + remaining_recs = takedown_reported_recommendations(recs, backend) + assert remaining_recs == recs + + def test_keep_recs_below_threshold(self): + """Test that takedown_reported_recommendations keeps reported recommendations with report_ratio <= threshold.""" + recs = generate_recommendations(item_ids=["a"]) + # 1 report / 200 impression = 0.005 < 0.01 threshold + backend = MockEngagementBackend({"a": (1, 200)}) + remaining_recs = takedown_reported_recommendations( + recs, backend, report_ratio_threshold=0.01 + ) + assert remaining_recs == recs + + def test_remove_recs_above_threshold(self, caplog): + """Test that takedown_reported_recommendations removes recommendations with report_ratio > threshold + and logs a warning for the excluded recommendation. + """ + recs = generate_recommendations(item_ids=["reported_rec", "good_rec"]) + # report_ratio_threshold = 1% + # "bad" = 5 reports / 50 impressions = 0.10 report_ratio > 0.01 threshold + # "good" = 0 reports / 50 impressions = 0.0 report_ratio + backend = MockEngagementBackend({"reported_rec": (5, 50), "good_rec": (0, 50)}) + + caplog.set_level(logging.WARNING) + + remaining_recs = takedown_reported_recommendations( + recs, backend, report_ratio_threshold=0.01 + ) + # Assert only "good_rec" is returned in remaining_recs + assert [rec.corpusItemId for rec in remaining_recs] == ["good_rec"] + + # Assert a warning was logged about excluding "reported_rec" + warnings = [r.message for r in caplog.records if r.levelno == logging.WARNING] + assert any("Excluding reported recommendation" in msg for msg in warnings) + # Check extra fields logged for the excluded rec + rec = next(r for r in caplog.records if r.levelno == logging.WARNING) + assert rec.corpus_item_id == "reported_rec" + assert rec.reports == 5 + assert rec.impressions == 50 + + def test_safeguard_fraction_applied(self): + """Test that takedown_reported_recommendations should only remove up to safeguard fraction, + even if more recommendations breach threshold. + """ + recs = generate_recommendations(item_ids=["1", "2", "3", "4"]) + # All 4 recs breach: 10 reports / 50 impressions = 0.2 report_ratio > 0.01 threshold + metrics = {corpus_id: (10, 50) for corpus_id in ["1", "2", "3", "4"]} + backend = MockEngagementBackend(metrics) + # safeguard_cap_takedown_fraction == 50% => ceil(4 recs * 0.5) = max 2 removals + remaining_recs = takedown_reported_recommendations( + recs, backend, report_ratio_threshold=0.01, safeguard_cap_takedown_fraction=0.5 + ) + # Check that 2 recs were removed, 2 recs should be in the final result + assert len(remaining_recs) == 2 + + def test_zero_impressions_skipped(self): + """Test that takedown_reported_recommendations does not remove recommendations with 0 impressions.""" + recs = generate_recommendations(item_ids=["1"]) + # 5 reports / 0 impressions + backend = MockEngagementBackend({"1": (5, 0)}) + remaining_recs = takedown_reported_recommendations( + recs, backend, report_ratio_threshold=0.01 + ) + # Rec should remain in final result because report_ratio cannot be computed + assert [rec.corpusItemId for rec in remaining_recs] == ["1"] + + class TestRenumberRecommendations: """Tests for the renumber_recommendations function."""