jd-opensource · liutongxuan · Aug 28, 2025 · Aug 28, 2025
diff --git a/xllm_service/common/macros.h b/xllm_service/common/macros.h
@@ -17,30 +17,26 @@ limitations under the License.
 #pragma once
 
 namespace xllm_service {
-// a central place to define common macros for the project
-// clang-format off
-#define DEFINE_ARG(T, name)                                       \
- public:                                                          \
-  inline auto name(const T& name) ->decltype(*this) {             \
-    this->name##_ = name;                                         \
-    return *this;                                                 \
-  }                                                               \
-  inline const T& name() const noexcept { return this->name##_; } \
-  inline T& name() noexcept { return this->name##_; }             \
-                                                                  \
-  T name##_
-
-#define DEFINE_PTR_ARG(T, name)                             \
- public:                                                    \
-  inline auto name(T* name) ->decltype(*this) {             \
-    this->name##_ = name;                                   \
-    return *this;                                           \
-  }                                                         \
-  inline T* name() const noexcept { return this->name##_; } \
-                                                            \
-  T* name##_
-
-// clang-format on
+#define PROPERTY(T, property)                                                 \
+ public:                                                                      \
+  [[nodiscard]] const T& property() const& noexcept { return property##_; }   \
+  [[nodiscard]] T& property() & noexcept { return property##_; }              \
+  [[nodiscard]] T&& property() && noexcept { return std::move(property##_); } \
+                                                                              \
+  auto property(const T& value) & -> decltype(*this) {                        \
+    property##_ = value;                                                      \
+    return *this;                                                             \
+  }                                                                           \
+                                                                              \
+  auto property(T&& value) & -> decltype(*this) {                             \
+    property##_ = std::move(value);                                           \
+    return *this;                                                             \
+  }                                                                           \
+                                                                              \
+  void property(const T& value) && = delete;                                  \
+  void property(T&& value) && = delete;                                       \
+                                                                              \
+  T property##_
 
 #ifndef UNUSED_PARAMETER
 #define UNUSED_PARAMETER(x) ((void)(x))

diff --git a/xllm_service/tokenizer/CMakeLists.txt b/xllm_service/tokenizer/CMakeLists.txt
@@ -7,24 +7,25 @@ cc_library(
   NAME 
     tokenizer
   HDRS
-    hf_tokenizer.h
-    sentencepiece_tokenizer.h
-    tiktoken_tokenizer.h
-    tokenizer_args_loader.h
     tokenizer_args.h
     tokenizer.h
+    tokenizer_factory.h
+    tiktoken_tokenizer.h
+    sentencepiece_tokenizer.h
+    fast_tokenizer.h
   SRCS
-    hf_tokenizer.cpp
-    sentencepiece_tokenizer.cpp
+    tokenizer_args.cpp
+    tokenizer_factory.cpp
     tiktoken_tokenizer.cpp
-    tokenizer_args_loader.cpp
+    sentencepiece_tokenizer.cpp
+    fast_tokenizer.cpp
   DEPS
     :common
-    sentencepiece
+    :sentencepiece
     absl::flat_hash_map
     absl::strings
     glog::glog
     rust_tokenizers
     re2::re2
-    nlohmann_json::nlohmann_json
 )
+
diff --git a/xllm_service/tokenizer/fast_tokenizer.cpp b/xllm_service/tokenizer/fast_tokenizer.cpp
@@ -0,0 +1,67 @@
+#include "fast_tokenizer.h"
+
+#include <glog/logging.h>
+
+namespace xllm_service {
+
+FastTokenizer::FastTokenizer(const std::string& tokenizer_json_path)
+    : tokenizer_json_path_(tokenizer_json_path) {
+  handle_ = tokenizers_new_from_path(tokenizer_json_path.c_str());
+  CHECK(handle_ != nullptr)
+      << "Failed to load tokenizer from file: " << tokenizer_json_path;
+}
+
+std::unique_ptr<Tokenizer> FastTokenizer::clone() const {
+  return std::make_unique<FastTokenizer>(tokenizer_json_path_);
+}
+
+FastTokenizer::~FastTokenizer() { tokenizers_free(handle_); }
+
+bool FastTokenizer::encode(const std::string_view& text,
+                           std::vector<int32_t>* ids) const {
+  TokenizerEncodeResult result;
+  tokenizers_encode(
+      handle_, text.data(), text.size(), /*add_special_tokens=*/1, &result);
+
+  std::vector<int32_t> ret(result.token_ids, result.token_ids + result.len);
+  *ids = std::move(ret);
+
+  return true;
+}
+
+std::string FastTokenizer::decode(const Slice<int32_t>& ids,
+                                  bool skip_special_tokens) const {
+  const char* data = nullptr;
+  size_t len = 0;
+  tokenizers_decode(handle_,
+                    reinterpret_cast<const uint32_t*>(ids.data()),
+                    ids.size(),
+                    skip_special_tokens,
+                    &data,
+                    &len);
+  return {data, len};
+}
+
+std::optional<int32_t> FastTokenizer::token_to_id(
+    const std::string_view& token) const {
+  int32_t id = -1;
+  tokenizers_token_to_id(handle_, token.data(), token.size(), &id);
+  return id == -1 ? std::optional<int32_t>(std::nullopt)
+                  : std::optional<int32_t>(id);
+}
+
+std::string FastTokenizer::id_to_token(int32_t id) const {
+  const char* data = nullptr;
+  size_t len = 0;
+  tokenizers_id_to_token(handle_, id, &data, &len);
+  return {data, len};
+}
+
+size_t FastTokenizer::vocab_size() const {
+  size_t size;
+  tokenizers_get_vocab_size(handle_, &size);
+  CHECK(size > 0) << "vocab_size must be greater than 0.";
+  return size;
+}
+
+}  // namespace xllm_service
diff --git a/xllm_service/tokenizer/hf_tokenizer.h → xllm_service/tokenizer/fast_tokenizer.h b/xllm_service/tokenizer/hf_tokenizer.h → xllm_service/tokenizer/fast_tokenizer.h
@@ -21,13 +21,11 @@ limitations under the License.
 
 namespace xllm_service {
 
-// a tokenizer that uses hf/tokenizers
-// not thread-safe, can't be used in multiple threads.
-class HFTokenizer : public Tokenizer {
+class FastTokenizer : public Tokenizer {
  public:
-  HFTokenizer(const std::string& tokenizer_file_path, TokenizerHandle handle);
+  FastTokenizer(const std::string& tokenizer_json_path);
 
-  ~HFTokenizer() override;
+  ~FastTokenizer() override;
 
   bool encode(const std::string_view& text,
               std::vector<int32_t>* ids) const override;
@@ -44,10 +42,8 @@ class HFTokenizer : public Tokenizer {
 
   std::unique_ptr<Tokenizer> clone() const override;
 
-  static std::unique_ptr<HFTokenizer> from_file(const std::string& path);
-
  private:
-  std::string tokenizer_file_path_;
+  std::string tokenizer_json_path_;
 
   TokenizerHandle handle_ = nullptr;
 };

diff --git a/xllm_service/tokenizer/hf_tokenizer.cpp b/xllm_service/tokenizer/hf_tokenizer.cpp
diff --git a/xllm_service/tokenizer/tiktoken_tokenizer.h b/xllm_service/tokenizer/tiktoken_tokenizer.h
@@ -15,6 +15,7 @@ limitations under the License.
 ==============================================================================*/
 
 #pragma once
+
 #include <absl/container/flat_hash_map.h>
 #include <re2/re2.h>
 

diff --git a/xllm_service/tokenizer/tokenizer.h b/xllm_service/tokenizer/tokenizer.h
@@ -15,30 +15,16 @@ limitations under the License.
 ==============================================================================*/
 
 #pragma once
-
-#include <common/slice.h>
-
 #include <cstdint>
 #include <memory>
 #include <optional>
 #include <string>
 #include <vector>
 
+#include "common/slice.h"
+
 namespace xllm_service {
 
-// Fundamentally, Large Language Models (LLM) are designed to generate text
-// based on given prompts. To process text effectively, LLM models typically
-// work with sequences of integers as inputs and produce sequences of integers
-// as outputs. The conversion between text and integer sequences is handled by a
-// tokenizer during preprocessing. The tokenizer serves two primary functions:
-// 1. Breaking down text into tokens and then mapping those tokens to
-// corresponding integers using a predefined vocabulary.
-// 2. Reversing this process by converting a sequence of integers back into
-// human-readable text using the same vocabulary.
-//
-// For example:
-//  ids = tokenizer.Encode("Hello, world!") # [1, 2, 3]
-//  text = tokenizer.Decode(ids) # "Hello, world!"
 class Tokenizer {
  public:
   virtual ~Tokenizer() = default;

diff --git a/xllm_service/tokenizer/tokenizer_args.cpp b/xllm_service/tokenizer/tokenizer_args.cpp
@@ -0,0 +1,75 @@
+#include "tokenizer_args.h"
+
+#include <fstream>
+
+#include "common/json_reader.h"
+
+namespace xllm_service {
+namespace {
+std::optional<std::string> load_chat_template_file(const std::string& dir) {
+  // chat_template.json
+  const std::string chat_template_path = dir + "/chat_template.json";
+  JsonReader reader;
+  if (reader.parse(chat_template_path);
+      auto v = reader.value<std::string>("chat_template")) {
+    return v;
+  }
+  // chat_template.jinja
+  const std::string raw_chat_template_path = dir + "/chat_template.jinja";
+  std::ifstream file(raw_chat_template_path);
+  if (file.is_open()) {
+    std::ostringstream content;
+    content << file.rdbuf();
+    file.close();
+    return content.str();
+  }
+  return std::nullopt;
+}
+}  // namespace
+
+bool load_tokenizer_args(const std::string& model_weights_path,
+                         TokenizerArgs& tokenizer_args) {
+  // tokenizer args from tokenizer_config.json
+  JsonReader tokenizer_reader;
+  const std::string tokenizer_args_file_path =
+      model_weights_path + "/tokenizer_config.json";
+  if (tokenizer_reader.parse(tokenizer_args_file_path)) {
+    // read chat template if exists
+    if (auto v = load_chat_template_file(model_weights_path)) {
+      tokenizer_args.chat_template() = v.value();
+    } else if (auto v = tokenizer_reader.value<std::string>("chat_template")) {
+      tokenizer_args.chat_template() = v.value();
+    }
+    if (auto v = tokenizer_reader.value<bool>("add_bos_token")) {
+      tokenizer_args.add_bos_token() = v.value();
+    }
+    if (auto v = tokenizer_reader.value<bool>("add_eos_token")) {
+      tokenizer_args.add_eos_token() = v.value();
+    }
+    if (auto v = tokenizer_reader.value<std::string>("tokenizer_class")) {
+      tokenizer_args.tokenizer_class() = v.value();
+    }
+    // read bos_token
+    if (auto v = tokenizer_reader.value<std::string>("bos_token.content")) {
+      tokenizer_args.bos_token() = v.value();
+    } else if (auto v = tokenizer_reader.value<std::string>("bos_token")) {
+      tokenizer_args.bos_token() = v.value();
+    }
+    // read eos_token
+    if (auto v = tokenizer_reader.value<std::string>("eos_token.content")) {
+      tokenizer_args.eos_token() = v.value();
+    } else if (auto v = tokenizer_reader.value<std::string>("eos_token")) {
+      tokenizer_args.eos_token() = v.value();
+    }
+    // read pad_token
+    if (auto v = tokenizer_reader.value<std::string>("pad_token.content")) {
+      tokenizer_args.pad_token() = v.value();
+    } else if (auto v = tokenizer_reader.value<std::string>("pad_token")) {
+      tokenizer_args.pad_token() = v.value();
+    }
+  }
+
+  return true;
+}
+
+}  // namespace xllm_service