Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 20 additions & 24 deletions xllm_service/common/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,26 @@ limitations under the License.
#pragma once

namespace xllm_service {
// a central place to define common macros for the project
// clang-format off
#define DEFINE_ARG(T, name) \
public: \
inline auto name(const T& name) ->decltype(*this) { \
this->name##_ = name; \
return *this; \
} \
inline const T& name() const noexcept { return this->name##_; } \
inline T& name() noexcept { return this->name##_; } \
\
T name##_

#define DEFINE_PTR_ARG(T, name) \
public: \
inline auto name(T* name) ->decltype(*this) { \
this->name##_ = name; \
return *this; \
} \
inline T* name() const noexcept { return this->name##_; } \
\
T* name##_

// clang-format on
#define PROPERTY(T, property) \
public: \
[[nodiscard]] const T& property() const& noexcept { return property##_; } \
[[nodiscard]] T& property() & noexcept { return property##_; } \
[[nodiscard]] T&& property() && noexcept { return std::move(property##_); } \
\
auto property(const T& value) & -> decltype(*this) { \
property##_ = value; \
return *this; \
} \
\
auto property(T&& value) & -> decltype(*this) { \
property##_ = std::move(value); \
return *this; \
} \
\
void property(const T& value) && = delete; \
void property(T&& value) && = delete; \
\
T property##_

#ifndef UNUSED_PARAMETER
#define UNUSED_PARAMETER(x) ((void)(x))
Expand Down
19 changes: 10 additions & 9 deletions xllm_service/tokenizer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@ cc_library(
NAME
tokenizer
HDRS
hf_tokenizer.h
sentencepiece_tokenizer.h
tiktoken_tokenizer.h
tokenizer_args_loader.h
tokenizer_args.h
tokenizer.h
tokenizer_factory.h
tiktoken_tokenizer.h
sentencepiece_tokenizer.h
fast_tokenizer.h
SRCS
hf_tokenizer.cpp
sentencepiece_tokenizer.cpp
tokenizer_args.cpp
tokenizer_factory.cpp
tiktoken_tokenizer.cpp
tokenizer_args_loader.cpp
sentencepiece_tokenizer.cpp
fast_tokenizer.cpp
DEPS
:common
sentencepiece
:sentencepiece
absl::flat_hash_map
absl::strings
glog::glog
rust_tokenizers
re2::re2
nlohmann_json::nlohmann_json
)

67 changes: 67 additions & 0 deletions xllm_service/tokenizer/fast_tokenizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#include "fast_tokenizer.h"

#include <glog/logging.h>

namespace xllm_service {

FastTokenizer::FastTokenizer(const std::string& tokenizer_json_path)
: tokenizer_json_path_(tokenizer_json_path) {
handle_ = tokenizers_new_from_path(tokenizer_json_path.c_str());
CHECK(handle_ != nullptr)
<< "Failed to load tokenizer from file: " << tokenizer_json_path;
}

std::unique_ptr<Tokenizer> FastTokenizer::clone() const {
return std::make_unique<FastTokenizer>(tokenizer_json_path_);
}

FastTokenizer::~FastTokenizer() { tokenizers_free(handle_); }

bool FastTokenizer::encode(const std::string_view& text,
std::vector<int32_t>* ids) const {
TokenizerEncodeResult result;
tokenizers_encode(
handle_, text.data(), text.size(), /*add_special_tokens=*/1, &result);

std::vector<int32_t> ret(result.token_ids, result.token_ids + result.len);
*ids = std::move(ret);

return true;
}

std::string FastTokenizer::decode(const Slice<int32_t>& ids,
bool skip_special_tokens) const {
const char* data = nullptr;
size_t len = 0;
tokenizers_decode(handle_,
reinterpret_cast<const uint32_t*>(ids.data()),
ids.size(),
skip_special_tokens,
&data,
&len);
return {data, len};
}

std::optional<int32_t> FastTokenizer::token_to_id(
const std::string_view& token) const {
int32_t id = -1;
tokenizers_token_to_id(handle_, token.data(), token.size(), &id);
return id == -1 ? std::optional<int32_t>(std::nullopt)
: std::optional<int32_t>(id);
}

std::string FastTokenizer::id_to_token(int32_t id) const {
const char* data = nullptr;
size_t len = 0;
tokenizers_id_to_token(handle_, id, &data, &len);
return {data, len};
}

size_t FastTokenizer::vocab_size() const {
size_t size;
tokenizers_get_vocab_size(handle_, &size);
CHECK(size > 0) << "vocab_size must be greater than 0.";
return size;
}

} // namespace xllm_service
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@ limitations under the License.

namespace xllm_service {

// a tokenizer that uses hf/tokenizers
// not thread-safe, can't be used in multiple threads.
class HFTokenizer : public Tokenizer {
class FastTokenizer : public Tokenizer {
public:
HFTokenizer(const std::string& tokenizer_file_path, TokenizerHandle handle);
FastTokenizer(const std::string& tokenizer_json_path);

~HFTokenizer() override;
~FastTokenizer() override;

bool encode(const std::string_view& text,
std::vector<int32_t>* ids) const override;
Expand All @@ -44,10 +42,8 @@ class HFTokenizer : public Tokenizer {

std::unique_ptr<Tokenizer> clone() const override;

static std::unique_ptr<HFTokenizer> from_file(const std::string& path);

private:
std::string tokenizer_file_path_;
std::string tokenizer_json_path_;

TokenizerHandle handle_ = nullptr;
};
Expand Down
91 changes: 0 additions & 91 deletions xllm_service/tokenizer/hf_tokenizer.cpp

This file was deleted.

1 change: 1 addition & 0 deletions xllm_service/tokenizer/tiktoken_tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License.
==============================================================================*/

#pragma once

#include <absl/container/flat_hash_map.h>
#include <re2/re2.h>

Expand Down
18 changes: 2 additions & 16 deletions xllm_service/tokenizer/tokenizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,16 @@ limitations under the License.
==============================================================================*/

#pragma once

#include <common/slice.h>

#include <cstdint>
#include <memory>
#include <optional>
#include <string>
#include <vector>

#include "common/slice.h"

namespace xllm_service {

// Fundamentally, Large Language Models (LLM) are designed to generate text
// based on given prompts. To process text effectively, LLM models typically
// work with sequences of integers as inputs and produce sequences of integers
// as outputs. The conversion between text and integer sequences is handled by a
// tokenizer during preprocessing. The tokenizer serves two primary functions:
// 1. Breaking down text into tokens and then mapping those tokens to
// corresponding integers using a predefined vocabulary.
// 2. Reversing this process by converting a sequence of integers back into
// human-readable text using the same vocabulary.
//
// For example:
// ids = tokenizer.Encode("Hello, world!") # [1, 2, 3]
// text = tokenizer.Decode(ids) # "Hello, world!"
class Tokenizer {
public:
virtual ~Tokenizer() = default;
Expand Down
75 changes: 75 additions & 0 deletions xllm_service/tokenizer/tokenizer_args.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#include "tokenizer_args.h"

#include <fstream>

#include "common/json_reader.h"

namespace xllm_service {
namespace {
std::optional<std::string> load_chat_template_file(const std::string& dir) {
// chat_template.json
const std::string chat_template_path = dir + "/chat_template.json";
JsonReader reader;
if (reader.parse(chat_template_path);
auto v = reader.value<std::string>("chat_template")) {
return v;
}
// chat_template.jinja
const std::string raw_chat_template_path = dir + "/chat_template.jinja";
std::ifstream file(raw_chat_template_path);
if (file.is_open()) {
std::ostringstream content;
content << file.rdbuf();
file.close();
return content.str();
}
return std::nullopt;
}
} // namespace

bool load_tokenizer_args(const std::string& model_weights_path,
TokenizerArgs& tokenizer_args) {
// tokenizer args from tokenizer_config.json
JsonReader tokenizer_reader;
const std::string tokenizer_args_file_path =
model_weights_path + "/tokenizer_config.json";
if (tokenizer_reader.parse(tokenizer_args_file_path)) {
// read chat template if exists
if (auto v = load_chat_template_file(model_weights_path)) {
tokenizer_args.chat_template() = v.value();
} else if (auto v = tokenizer_reader.value<std::string>("chat_template")) {
tokenizer_args.chat_template() = v.value();
}
if (auto v = tokenizer_reader.value<bool>("add_bos_token")) {
tokenizer_args.add_bos_token() = v.value();
}
if (auto v = tokenizer_reader.value<bool>("add_eos_token")) {
tokenizer_args.add_eos_token() = v.value();
}
if (auto v = tokenizer_reader.value<std::string>("tokenizer_class")) {
tokenizer_args.tokenizer_class() = v.value();
}
// read bos_token
if (auto v = tokenizer_reader.value<std::string>("bos_token.content")) {
tokenizer_args.bos_token() = v.value();
} else if (auto v = tokenizer_reader.value<std::string>("bos_token")) {
tokenizer_args.bos_token() = v.value();
}
// read eos_token
if (auto v = tokenizer_reader.value<std::string>("eos_token.content")) {
tokenizer_args.eos_token() = v.value();
} else if (auto v = tokenizer_reader.value<std::string>("eos_token")) {
tokenizer_args.eos_token() = v.value();
}
// read pad_token
if (auto v = tokenizer_reader.value<std::string>("pad_token.content")) {
tokenizer_args.pad_token() = v.value();
} else if (auto v = tokenizer_reader.value<std::string>("pad_token")) {
tokenizer_args.pad_token() = v.value();
}
}

return true;
}

} // namespace xllm_service
Loading