Rank-K-32B Integration

Yaohui2019 · Yaohui2019 · commit 9c5759c2c47d · 2025-06-24T20:54:17.000-04:00
diff --git a/src/rank_llm/rerank/listwise/__init__.py b/src/rank_llm/rerank/listwise/__init__.py
@@ -1,6 +1,7 @@
 from .rank_gemini import SafeGenai
 from .rank_gpt import SafeOpenai
 from .rank_listwise_os_llm import RankListwiseOSLLM
+from .rankk_reranker import RankKReranker
 from .vicuna_reranker import VicunaReranker
 from .zephyr_reranker import ZephyrReranker
 
@@ -10,4 +11,5 @@
     "ZephyrReranker",
     "SafeOpenai",
     "SafeGenai",
+    "RankKReranker",
 ]
diff --git a/src/rank_llm/rerank/listwise/rankk_reranker.py b/src/rank_llm/rerank/listwise/rankk_reranker.py
@@ -0,0 +1,75 @@
+from typing import Optional
+
+from rank_llm.data import Result
+from rank_llm.rerank import PromptMode
+from rank_llm.rerank.listwise import RankListwiseOSLLM
+
+
+class RankKReranker(RankListwiseOSLLM):
+    def __init__(
+        self,
+        model: str = "hltcoe/Rank-K-32B",
+        context_size: int = 4096,
+        prompt_mode: PromptMode = PromptMode.RANK_GPT,
+        prompt_template_path: Optional[
+            str
+        ] = "src/rank_llm/rerank/prompt_templates/rank_k_template.yaml",
+        num_few_shot_examples: int = 0,
+        device: str = "cuda",
+        num_gpus: int = 1,
+        variable_passages: bool = True,
+        window_size: int = 20,
+        use_alpha: bool = False,
+    ) -> None:
+        super().__init__(
+            model=model,
+            context_size=context_size,
+            prompt_mode=prompt_mode,
+            prompt_template_path=prompt_template_path,
+            num_few_shot_examples=num_few_shot_examples,
+            device=device,
+            num_gpus=num_gpus,
+            variable_passages=variable_passages,
+            is_thinking=True,
+            reasoning_token_budget=10000,
+            window_size=window_size,
+            use_alpha=use_alpha,
+        )
+
+    def receive_permutation(
+        self,
+        result: Result,
+        permutation: str,
+        rank_start: int,
+        rank_end: int,
+        logging: bool = False,
+    ) -> Result:
+        """
+        Processes and applies a permutation to the ranking results.
+
+        This function takes a permutation string, representing the new order of items,
+        and applies it to a subset of the ranking results. It adjusts the ranks and scores in the
+        'result' object based on this permutation.
+
+        Args:
+            result (Result): The result object containing the initial ranking results.
+            permutation (str): A string representing the new order of items.
+                            Each item in the string should correspond to a rank in the results.
+            rank_start (int): The starting index of the range in the results to which the permutation is applied.
+            rank_end (int): The ending index of the range in the results to which the permutation is applied.
+
+        Returns:
+            Result: The updated result object with the new ranking order applied.
+
+        Note:
+            This function assumes that the permutation string has reasoning generated by Rank-K-32B preceding
+            the sequence of integers separated by spaces.
+            The function would take the last line of input, and call receive_permutation function from
+            the superclass.
+        """
+
+        # Remove all the reasoning, and take only the order
+        permutation = permutation.strip().split("\n")[-1]
+        return super().receive_permutation(
+            result, permutation, rank_start, rank_end, logging
+        )
diff --git a/src/rank_llm/rerank/prompt_templates/rank_k_template.yaml b/src/rank_llm/rerank/prompt_templates/rank_k_template.yaml
@@ -0,0 +1,17 @@
+method: "singleturn_listwise"
+prefix: |-
+  Determine a ranking of the passages based on how relevant they are to the query. 
+  If the query is a question, how relevant a passage is depends on how well it answers the question. 
+  If not, try analyze the intent of the query and assess how well each passage satisfy the intent. 
+  The query may have typos and passages may contain contradicting information. 
+  However, we do not get into fact-checking. We just rank the passages based on they relevancy to the query. 
+
+  Sort them from the most relevant to the least. 
+  Answer with the passage number using a format of `[3] > [2] > [4] = [1] > [5]`. 
+  Ties are acceptable if they are equally relevant. 
+  I need you to be accurate but overthinking it is unnecessary.
+  Output only the ordering without any other text.
+
+  Query: {query}
+body: "\n\n[{rank}] {candidate}"
+suffix: ""
diff --git a/src/rank_llm/rerank/reranker.py b/src/rank_llm/rerank/reranker.py
@@ -9,7 +9,12 @@
     get_genai_api_key,
     get_openai_api_key,
 )
-from rank_llm.rerank.listwise import RankListwiseOSLLM, SafeGenai, SafeOpenai
+from rank_llm.rerank.listwise import (
+    RankKReranker,
+    RankListwiseOSLLM,
+    SafeGenai,
+    SafeOpenai,
+)
 from rank_llm.rerank.listwise.rank_fid import RankFiDDistill, RankFiDScore
 from rank_llm.rerank.pairwise.duot5 import DuoT5
 from rank_llm.rerank.pointwise.monot5 import MonoT5
@@ -487,6 +492,52 @@ def create_model_coordinator(
         elif model_path in ["unspecified", "rank_random", "rank_identity"]:
             # NULL reranker
             agent = None
+        elif "hltcoe/Rank-K-32B" in model_path:
+            print(f"Loading {model_path} ...")
+            keys_and_defaults = [
+                ("context_size", 4096),
+                ("prompt_mode", PromptMode.RANK_GPT),
+                (
+                    "prompt_template_path",
+                    "src/rank_llm/rerank/prompt_templates/rank_k_template.yaml",
+                ),
+                ("num_few_shot_examples", 0),
+                ("device", "cuda"),
+                ("num_gpus", 1),
+                ("variable_passages", False),
+                ("window_size", 20),
+                ("system_message", None),
+                ("use_logits", False),
+                ("use_alpha", False),
+            ]
+            [
+                context_size,
+                prompt_mode,
+                prompt_template_path,
+                num_few_shot_examples,
+                device,
+                num_gpus,
+                variable_passages,
+                window_size,
+                system_message,
+                use_logits,
+                use_alpha,
+            ] = extract_kwargs(keys_and_defaults, **kwargs)
+
+            model_coordinator = RankKReranker(
+                model=(model_path),
+                context_size=context_size,
+                prompt_mode=prompt_mode,
+                prompt_template_path=prompt_template_path,
+                num_few_shot_examples=num_few_shot_examples,
+                device=device,
+                num_gpus=num_gpus,
+                variable_passages=variable_passages,
+                window_size=window_size,
+                use_alpha=use_alpha,
+            )
+
+            print(f"Completed loading {model_path}")
         else:
             # supports loading models from huggingface
             print(f"Loading {model_path} ...")