Pairwise inference handler implementation (#243)

clides · web-flow · commit baf3b39c06cb · 2025-06-20T15:23:31.000-04:00
- added pairwise-inference-handler and implemented it to rankllm and duot5 classes
- fixed an existing bug in truncating doc1 and doc2 based on the remaining context size
diff --git a/src/rank_llm/rerank/pairwise/duot5.py b/src/rank_llm/rerank/pairwise/duot5.py
@@ -1,5 +1,6 @@
 import logging
 import math
+import re
 from typing import List, Optional, Tuple
 
 from transformers import T5ForConditionalGeneration, T5Tokenizer
@@ -16,7 +17,7 @@ def __init__(
         self,
         model: str,
         prompt_mode: str = "duot5",
-        prompt_template_path: Optional[str] = None,
+        prompt_template_path: str = "src/rank_llm/rerank/prompt_templates/duot5_template.yaml",
         context_size: int = 512,
         num_few_shot_examples: int = 0,
         few_shot_file: Optional[str] = None,
@@ -101,7 +102,7 @@ def run_llm(self, prompt: str) -> Tuple[str, int, float]:
     def create_prompt(
         self, result: Result, index1: int, index2: int
     ) -> Tuple[str, int]:
-        query = self._replace_number(result.query.text)
+        query = re.sub(r"\[(\d+)\]", r"(\1)", result.query.text)
 
         reserved_for_output = (
             64  # might need to change depending on what the actual output look like
@@ -117,29 +118,14 @@ def create_prompt(
             self._context_size - reserved_for_output - query_tokens - few_shot_tokens
         )
 
-        doc1_raw = self.convert_doc_to_prompt_content(
-            result.candidates[index1].doc, max_length=max_token
+        # TODO (issue #237): need to modify the class to be able to add fewshot examples later
+        prompt = self._inference_handler.generate_prompt(
+            result=result,
+            index1=index1,
+            index2=index2,
+            max_token=max_token,
+            tokenizer=self._tokenizer,
         )
-        doc2_raw = self.convert_doc_to_prompt_content(
-            result.candidates[index2].doc, max_length=max_token
-        )
-
-        doc1_tokens = self._tokenizer.encode(
-            doc1_raw, truncation=True, max_length=max_token
-        )
-        doc2_tokens = self._tokenizer.encode(
-            doc2_raw, truncation=True, max_length=max_token
-        )
-
-        doc1 = self._tokenizer.decode(doc1_tokens, skip_special_tokens=True)
-        doc2 = self._tokenizer.decode(doc2_tokens, skip_special_tokens=True)
-
-        prompt = (
-            few_shot_prompt
-            + f"Query: {query} Document0: {doc1} Document1: {doc2} Relevant: "
-        )
-        prompt = prompt.replace("<unk>", "")
-
         return prompt, self.get_num_tokens(prompt)
 
     def get_num_tokens(self, prompt: str) -> int:
diff --git a/src/rank_llm/rerank/pairwise/pairwise_inference_handler.py b/src/rank_llm/rerank/pairwise/pairwise_inference_handler.py
@@ -0,0 +1,89 @@
+from typing import Any, Dict
+
+from transformers import T5Tokenizer
+
+from rank_llm.data import Result
+from rank_llm.rerank.inference_handler import BaseInferenceHandler
+
+
+class PairwiseInferenceHandler(BaseInferenceHandler):
+    def __init__(self, template: Dict[str, str]):
+        super().__init__(template)
+
+    def _validate_template(self, template: Dict[str, str], strict: bool = False):
+        TEMPLATE_SECTIONS = {
+            # Format:
+            # "template_key": {
+            #    "required": True/False,  # Whether the section itself is mandatory
+            #    "required_placeholders": set(),  # Placeholders that must exist in this section
+            #    "allowed_placeholders": set()    # All allowed placeholders (including required ones)
+            # }
+            "body": {
+                "required": True,
+                "required_placeholders": {"query", "doc1", "doc2"},
+                "allowed_placeholders": set(),
+            },
+        }
+
+        # Validate the method value
+        if template["method"] != "pairwise":
+            raise ValueError(
+                f'Incorrect method type, expected "pairwise", got {template["method"]}'
+            )
+
+        self._general_validation(
+            template=template, template_section=TEMPLATE_SECTIONS, strict=strict
+        )
+
+    # TODO (issue #273): May need to add prefix/suffix generation function later
+
+    def _generate_body(
+        self,
+        result: Result,
+        index1: int,
+        index2: int,
+        single_doc_max_token: int,
+        tokenizer: T5Tokenizer,
+    ) -> str:
+        doc1_raw = self._convert_doc_to_prompt_content(
+            result.candidates[index1].doc, max_length=single_doc_max_token
+        )
+        doc2_raw = self._convert_doc_to_prompt_content(
+            result.candidates[index2].doc, max_length=single_doc_max_token
+        )
+
+        doc1_tokens = tokenizer.encode(
+            doc1_raw, truncation=True, max_length=single_doc_max_token
+        )
+        doc2_tokens = tokenizer.encode(
+            doc2_raw, truncation=True, max_length=single_doc_max_token
+        )
+
+        query = self._replace_number(result.query.text)
+        doc1 = tokenizer.decode(doc1_tokens, skip_special_tokens=True)
+        doc2 = tokenizer.decode(doc2_tokens, skip_special_tokens=True)
+
+        fmt_values = {"query": query, "doc1": doc1, "doc2": doc2}
+        body_text = self._format_template(template_key="body", fmt_values=fmt_values)
+
+        return body_text
+
+    def generate_prompt(self, result: Result, **kwargs: Any) -> str:
+        try:
+            index1 = kwargs["index1"]
+            index2 = kwargs["index2"]
+            max_token = kwargs["max_token"]
+            tokenizer = kwargs["tokenizer"]
+        except KeyError as e:
+            raise ValueError(f"Missing required parameter: {e}")
+
+        single_doc_max_token = max_token // 2
+
+        prompt = self._generate_body(
+            result=result,
+            index1=index1,
+            index2=index2,
+            single_doc_max_token=single_doc_max_token,
+            tokenizer=tokenizer,
+        )
+        return prompt.replace("<unk>", "")
diff --git a/src/rank_llm/rerank/pairwise/pairwise_rankllm.py b/src/rank_llm/rerank/pairwise/pairwise_rankllm.py
@@ -5,9 +5,8 @@
 from abc import ABC
 from datetime import datetime
 from functools import cmp_to_key
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
-from ftfy import fix_text
 from tqdm import tqdm
 
 from rank_llm.data import Candidate, Request, Result
@@ -172,32 +171,6 @@ def candidate_comparator(self, x: Candidate, y: Candidate) -> int:
         else:
             return 0
 
-    def _replace_number(self, s: str) -> str:
-        return re.sub(r"\[(\d+)\]", r"(\1)", s)
-
-    def convert_doc_to_prompt_content(
-        self, doc: Dict[str, Any], max_length: int
-    ) -> str:
-        if "text" in doc:
-            content = doc["text"]
-        elif "segment" in doc:
-            content = doc["segment"]
-        elif "contents" in doc:
-            content = doc["contents"]
-        elif "content" in doc:
-            content = doc["content"]
-        elif "body" in doc:
-            content = doc["body"]
-        else:
-            content = doc["passage"]
-        if "title" in doc and doc["title"]:
-            content = "Title: " + doc["title"] + " " + "Content: " + content
-        content = content.strip()
-        content = fix_text(content)
-        # For Japanese should cut by character: content = content[:int(max_length)]
-        content = " ".join(content.split()[: int(max_length)])
-        return self._replace_number(content)
-
     def _build_pairwise_few_shot_examples(self) -> str:
         if self._num_few_shot_examples > 0 and hasattr(self, "_examples"):
             examples = []
diff --git a/src/rank_llm/rerank/prompt_templates/duot5_template.yaml b/src/rank_llm/rerank/prompt_templates/duot5_template.yaml
@@ -0,0 +1,2 @@
+method: "pairwise"
+body: "Query: {query} Document0: {doc1} Document1: {doc2} Relevant: "
diff --git a/src/rank_llm/rerank/rankllm.py b/src/rank_llm/rerank/rankllm.py
@@ -51,6 +51,7 @@ def __init__(
 
         if bool(data):
             self._inference_handler = self._create_handler(data)
+            print(f"Successfully created {data['method']} inference handler!")
 
         if self._num_few_shot_examples > 0:
             if not few_shot_file:
@@ -211,6 +212,9 @@ def _create_handler(self, template: Dict[str, str]) -> BaseInferenceHandler:
         from rank_llm.rerank.listwise.singleturn_listwise_inference_handler import (
             SingleTurnListwiseInferenceHandler,
         )
+        from rank_llm.rerank.pairwise.pairwise_inference_handler import (
+            PairwiseInferenceHandler,
+        )
         from rank_llm.rerank.pointwise.pointwise_inference_handler import (
             PointwiseInferenceHandler,
         )
@@ -222,8 +226,10 @@ def _create_handler(self, template: Dict[str, str]) -> BaseInferenceHandler:
                 return MultiTurnListwiseInferenceHandler(template)
             elif template["method"] == "pointwise":
                 return PointwiseInferenceHandler(template)
-            else:  # TODO(issue #236 and #237): Need to remove this after all the handlers are implemented
-                return SingleTurnListwiseInferenceHandler(template)
+            elif template["method"] == "pairwise":
+                return PairwiseInferenceHandler(template)
+            else:
+                raise ValueError("Invalid template method")
         except:
             raise ValueError("Please provide a method section in the template")
 
diff --git a/test/rerank/pairwise/test_PairwiseInferenceHandler.py b/test/rerank/pairwise/test_PairwiseInferenceHandler.py
@@ -0,0 +1,110 @@
+import unittest
+
+from dacite import from_dict
+from transformers import T5Tokenizer
+
+from rank_llm.data import Result
+from rank_llm.rerank.pairwise.pairwise_inference_handler import PairwiseInferenceHandler
+
+r = from_dict(
+    data_class=Result,
+    data={
+        "query": {"text": "Sample Query", "qid": "q1"},
+        "candidates": [
+            {
+                "doc": {
+                    "contents": "Title1: Sample Title1 Content1: Sample Text1",
+                },
+                "docid": "d1",
+                "score": 0.5,
+            },
+            {
+                "doc": {
+                    "contents": "Title2: Sample Title2 Content2: Sample Text2",
+                },
+                "docid": "d2",
+                "score": 0.4,
+            },
+            {
+                "doc": {
+                    "contents": "Title3: Sample Title3 Content3: Sample Text3",
+                },
+                "docid": "d3",
+                "score": 0.4,
+            },
+            {
+                "doc": {
+                    "contents": "Title4: Sample Title4 Content4: Sample Text4",
+                },
+                "docid": "d4",
+                "score": 0.3,
+            },
+        ],
+    },
+)
+
+
+VALID_PAIRWISE_TEMPLATE = {
+    "method": "pairwise",
+    "body": "Query: {query} Document0: {doc1} Document1: {doc2}",
+}
+INVALID_PAIRWISE_TEMPLATES = [
+    {
+        "method": "singleturn_listwise",
+        "body": "{query} {doc1} {doc2}",
+    },  # Wrong method type
+    {
+        "method": "pairwise",
+        "body": "{query} {doc1}",
+    },  # Missing required placeholder: {doc2}
+    {
+        "method": "pairwise",
+        "body": "{query} {doc1} {doc2}",
+        "unknown_key": "value",
+    },  # Unknown key
+]
+tokenizer = T5Tokenizer.from_pretrained("castorini/duot5-3b-msmarco-10k")
+
+
+class TestPairwiseInferenceHandler(unittest.TestCase):
+    def test_pairwise_valid_template_initialization(self):
+        pairwise_inference_handler = PairwiseInferenceHandler(VALID_PAIRWISE_TEMPLATE)
+        self.assertEqual(pairwise_inference_handler.template, VALID_PAIRWISE_TEMPLATE)
+
+    def test_invalid_templates(self):
+        for template in INVALID_PAIRWISE_TEMPLATES:
+            with self.subTest(template=template):
+                with self.assertRaises(ValueError):
+                    PairwiseInferenceHandler(template)
+
+    def test_body_generation(self):
+        pairwise_inference_handler = PairwiseInferenceHandler(VALID_PAIRWISE_TEMPLATE)
+        body_text_1 = pairwise_inference_handler._generate_body(
+            result=r, index1=0, index2=1, single_doc_max_token=6000, tokenizer=tokenizer
+        )
+        body_text_2 = pairwise_inference_handler._generate_body(
+            result=r, index1=0, index2=2, single_doc_max_token=6000, tokenizer=tokenizer
+        )
+        expected_body_1 = "Query: Sample Query Document0: Title1: Sample Title1 Content1: Sample Text1 Document1: Title2: Sample Title2 Content2: Sample Text2"
+        expected_body_2 = "Query: Sample Query Document0: Title1: Sample Title1 Content1: Sample Text1 Document1: Title3: Sample Title3 Content3: Sample Text3"
+
+        self.assertEqual(body_text_1, expected_body_1)
+        self.assertEqual(body_text_2, expected_body_2)
+
+    def test_prompt_generation(self):
+        pairwise_inference_handler = PairwiseInferenceHandler(VALID_PAIRWISE_TEMPLATE)
+        prompt_text_1 = pairwise_inference_handler.generate_prompt(
+            result=r, index1=0, index2=1, max_token=6000, tokenizer=tokenizer
+        )
+        prompt_text_2 = pairwise_inference_handler.generate_prompt(
+            result=r, index1=0, index2=2, max_token=6000, tokenizer=tokenizer
+        )
+        expected_prompt_1 = "Query: Sample Query Document0: Title1: Sample Title1 Content1: Sample Text1 Document1: Title2: Sample Title2 Content2: Sample Text2"
+        expected_prompt_2 = "Query: Sample Query Document0: Title1: Sample Title1 Content1: Sample Text1 Document1: Title3: Sample Title3 Content3: Sample Text3"
+
+        self.assertEqual(prompt_text_1, expected_prompt_1)
+        self.assertEqual(prompt_text_2, expected_prompt_2)
+
+
+if __name__ == "__main__":
+    unittest.main()

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+method: "pairwise"`
	`2`	`+body: "Query: {query} Document0: {doc1} Document1: {doc2} Relevant: "`