feat: 添加简体中文文档并更新版本至0.1.2

hexwarrior6 · hexwarrior6 · commit db33e23da379 · 2025-06-04T08:56:04.000+08:00
- 新增README_zh-hans.md简体中文文档
- 优化代码格式以提高可读性
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,6 @@
 # Avoid committing pubspec.lock for library packages; see
 # https://dart.dev/guides/libraries/private-files#pubspeclock.
 pubspec.lock
+
+.idea/
+.vscode/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,5 +3,8 @@
 - Initial version.
 
 ## 0.1.1
-s
-- Update LICENSE.
+
+- Update LICENSE.
+
+## 0.1.2
+- add README_zh-hans.md and format the dart code.
diff --git a/README.md b/README.md
@@ -1,5 +1,13 @@
+English | [简体中文](README_zh-hans.md)
 # text_counter
 
+<p align="center">
+    <a href="https://github.com/hexwarrior6/text_counter"><img alt="GitHub repo" src="https://img.shields.io/github/last-commit/hexwarrior6/text_counter?logo=github"></a>
+    <a href="https://gitee.com/HexWarrior6/text_counter"><img alt="Gitee repo" src="https://img.shields.io/badge/Gitee-repo-red?logo=gitee"></a>
+    <a href="https://pub.dev/packages/text_counter"><img alt="pub version" src="https://img.shields.io/pub/v/text_counter?logo=dart"></a>
+    <a href="https://github.com/hexwarrior6/text_counter/blob/master/LICENSE"><img alt="LICENSE" src="https://img.shields.io/github/license/hexwarrior6/text_counter.svg?color=blue"></a>
+</p>
+
 A lightweight Dart utility for accurately counting characters and words in **over 100 languages**, including CJK (Chinese, Japanese, Korean), RTL (Right-to-Left) scripts like Arabic and Hebrew, and mixed-language texts.
 
 `text_counter` uses **Microsoft Word-compatible word counting logic**, ensuring consistent and familiar results across different writing systems. This makes it ideal for applications requiring accurate text metrics — such as content editors, writing tools, and input validation systems.
@@ -23,7 +31,7 @@ Add this to your package's `pubspec.yaml`:
 
 ```yaml
 dependencies:
-  text_counter: ^0.1.0
+  text_counter: ^0.1.2
 ```
 
 Then run:
@@ -54,11 +62,11 @@ void main() {
 
 ## 🗺️ Supported Languages
 
-| Script Type               | Language Codes                                               |
-| ------------------------- | ------------------------------------------------------------ |
-| **CJK (Character-based)** | `zh`, `yue`, `ja`, `ko`, `th`, `hi`, `bn`, `ta`, `te`, `kn`,`ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz` |
-| **RTL (Word-based)**      | `ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz`   |
-| **Latin (Word-based)**    | All other ISO 639-1 language codes not listed above, including: `en`,`de`,`es`,`fr`,`it`,`pt`,`nl`,`tr`,`pl`,`ca`,`sv`,`id`,`fi`,`vi`,`hi`,`uk`,`el`,`ms`,`cs`,`ro`,`da`,`hu`,`no`,`th`... |
+| Script Type               | Language Codes                                                                                                                                                                                                     |
+|---------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **CJK (Character-based)** | `zh`, `yue`, `ja`, `ko`, `th`, `hi`, `bn`, `ta`, `te`, `kn`,`ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz`                                                                                             |
+| **RTL (Word-based)**      | `ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz`                                                                                                                                                         |
+| **Latin (Word-based)**    | All other ISO 639-1 language codes not listed above, including: `en`, `de`, `es`, `fr`, `it`, `pt`, `nl`, `tr`, `pl`, `ca`, `sv`, `id`, `fi`, `vi`, `hi`, `uk`, `el`, `ms`, `cs`, `ro`, `da`, `hu`, `no`, `th` ... |
 
 > If no `languageCode` is provided, the library automatically detects script types and applies appropriate counting rules.
 
@@ -80,7 +88,7 @@ void main() {
 ## 📚 API Reference
 
 ```dart
-int TextCounter.count(String text, {String? languageCode});
+int TextCounter.count(String text, {String? languageCode})
 ```
 
 - `text`: The input string to be analyzed.
diff --git a/README_zh-hans.md b/README_zh-hans.md
@@ -0,0 +1,103 @@
+[English](README.md) | 简体中文
+# text_counter
+
+<p align="center">
+    <a href="https://github.com/hexwarrior6/text_counter"><img alt="GitHub 仓库" src="https://img.shields.io/github/last-commit/hexwarrior6/text_counter?logo=github"></a>
+    <a href="https://gitee.com/HexWarrior6/text_counter"><img alt="Gitee 仓库" src="https://img.shields.io/badge/Gitee-repo-red?logo=gitee"></a>
+    <a href="https://pub.dev/packages/text_counter"><img alt="pub 版本" src="https://img.shields.io/pub/v/text_counter?logo=dart"></a>
+    <a href="https://github.com/hexwarrior6/text_counter/blob/master/LICENSE"><img alt="许可证" src="https://img.shields.io/github/license/hexwarrior6/text_counter.svg?color=blue"></a>
+</p>
+
+一个轻量级的 Dart 工具库，用于精确统计**100多种语言**的字符数和词数，包括 CJK（中文、日文、韩文）、阿拉伯语和希伯来语等从右向左书写的文字（RTL），以及混合语言的文本。
+
+`text_counter` 采用**与 Microsoft Word 兼容的词数统计逻辑**，确保在不同书写系统中都能获得一致且熟悉的统计结果。这使得它非常适合需要精确文本统计的应用场景，例如内容编辑器、写作工具和输入验证系统。
+
+## ✨ 功能特点
+
+- ✅ 采用 Microsoft Word 的词数统计规则：
+  - 通过空格和常见标点符号分隔单词。
+  - 连字符连接的单词（如 "state-of-the-art"）会被视为一个单词。
+  - 根据上下文正确处理数字和符号。
+
+- ✅ 支持语言感知的统计策略：
+  - **CJK（基于字符）**：每个字符单独计数（适用于中文、日文、韩文等）。
+  - **拉丁文字和 RTL 文字（基于单词）**：使用适当的分隔符和分词规则进行标准的单词计数。
+
+- 🔍 **自动检测混合文本的语言/文字类型**。
+
+- ⚡ **轻量级且无依赖**：无需外部库。
+
+- 🌐 **开箱即用支持 100 多种语言**。
+
+## 📦 安装
+
+在项目的 `pubspec.yaml` 文件中添加：
+
+```yaml
+dependencies:
+  text_counter: ^0.1.2
+```
+
+然后运行：
+
+```bash
+dart pub get
+```
+
+## 🧪 使用方法
+
+### 基础示例
+
+```dart
+import 'package:text_counter/text_counter.dart';
+
+void main() {
+  print('中文: ${TextCounter.count("你好，世界", languageCode: "zh")}'); // 5
+  print('日文: ${TextCounter.count("こんにちは世界", languageCode: "ja")}'); // 7
+  print('韩文: ${TextCounter.count("안녕하세요 세상", languageCode: "ko")}'); // 7
+  print('阿拉伯语: ${TextCounter.count("مرحبا بالعالم", languageCode: "ar")}'); // 2
+  print('希伯来语: ${TextCounter.count("שלום עולם", languageCode: "he")}'); // 2
+  print('英文: ${TextCounter.count("Hello world", languageCode: "en")}'); // 2
+
+  const mixed = "Hello 你好 مرحبا こんにちは";
+  print('混合文本 "$mixed": ${TextCounter.count(mixed)}'); // 9
+}
+```
+
+## 🗺️ 支持的语言
+
+| 文字类型           | 语言代码                                                                                                                                                                          |
+|----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| **CJK（基于字符）**  | `zh`, `yue`, `ja`, `ko`, `th`, `hi`, `bn`, `ta`, `te`, `kn`,`ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz`                                                        |
+| **RTL（基于单词）**  | `ml`, `si`, `km`, `my`, `lo`, `tl`, `jw`, `su`, `bo`, `dz`                                                                                                                    |
+| **拉丁文字（基于单词）** | 所有其他未列出的 ISO 639-1 语言代码，包括：`en`, `de`, `es`, `fr`, `it`, `pt`, `nl`, `tr`, `pl`, `ca`, `sv`, `id`, `fi`, `vi`, `hi`, `uk`, `el`, `ms`, `cs`, `ro`, `da`, `hu`, `no`, `th` ... |
+
+> 如果不提供 `languageCode`，库会自动检测文字类型并应用适当的统计规则。
+
+## 🛠️ 工作原理
+
+- 对于 **CJK 语言**，每个表意文字或语素文字字符都会被单独计数。
+- 对于 **拉丁文字和 RTL 文字**，使用类似于 Microsoft Word 的空格和标点符号模式来检测单词边界。
+- 在 **混合语言文本**中，计数器会根据所使用的文字类型动态切换统计方法。
+
+## 🧩 适用场景
+
+- 内容管理系统
+- 富文本编辑器
+- 有字数限制的写作应用
+- 语言学习平台
+- 分析仪表盘
+- 表单验证工具
+
+## 📚 API 参考
+
+```dart
+int TextCounter.count(String text, {String? languageCode})
+```
+
+- `text`：需要分析的输入字符串。
+- `languageCode`：可选的 BCP 47 语言代码（例如，`"en"` 表示英语，`"zh"` 表示中文）。如果省略，则使用自动检测。
+
+## 📎 许可证
+
+MIT 许可证 - 详见 [LICENSE](https://yuanbao.tencent.com/chat/naQivTmsDa/LICENSE)
diff --git a/example/text_counter_example.dart b/example/text_counter_example.dart
@@ -4,9 +4,11 @@ void main() {
   print('Chinese: ${TextCounter.count("你好，世界", languageCode: "zh")}'); // 5
   print('Japanese: ${TextCounter.count("こんにちは世界", languageCode: "ja")}'); // 7
   print('Korean: ${TextCounter.count("안녕하세요 세상", languageCode: "ko")}'); // 7
-  print('Arabic: ${TextCounter.count("مرحبا بالعالم", languageCode: "ar")}'); // 2
+  print(
+      'Arabic: ${TextCounter.count("مرحبا بالعالم", languageCode: "ar")}'); // 2
   print('Hebrew: ${TextCounter.count("שלום עולם", languageCode: "he")}'); // 2
-  print('English: ${TextCounter.count("Hello world", languageCode: "en")}'); // 2
+  print(
+      'English: ${TextCounter.count("Hello world", languageCode: "en")}'); // 2
 
   const mixed = "Hello 你好 مرحبا こんにちは";
   print('Mixed Text "$mixed": ${TextCounter.count(mixed)}'); // 9
diff --git a/lib/text_counter.dart b/lib/text_counter.dart
@@ -2,13 +2,37 @@
 class TextCounter {
   // Languages that are counted by characters (e.g., Chinese, Japanese, Korean, Thai)
   static final Set<String> _characterBasedLanguages = {
-    'zh', 'yue', 'ja', 'ko', 'th', 'hi', 'bn', 'ta', 'te', 'kn',
-    'ml', 'si', 'km', 'my', 'lo', 'tl', 'jw', 'su', 'bo', 'dz'
+    'zh',
+    'yue',
+    'ja',
+    'ko',
+    'th',
+    'hi',
+    'bn',
+    'ta',
+    'te',
+    'kn',
+    'ml',
+    'si',
+    'km',
+    'my',
+    'lo',
+    'tl',
+    'jw',
+    'su',
+    'bo',
+    'dz'
   };
 
   // RTL languages requiring special tokenization (Arabic/Hebrew family)
   static final Set<String> _rtlLanguages = {
-    'ar', 'he', 'fa', 'ur', 'ps', 'ug', 'sd'
+    'ar',
+    'he',
+    'fa',
+    'ur',
+    'ps',
+    'ug',
+    'sd'
   };
 
   /// Main counting method
@@ -40,41 +64,41 @@ class TextCounter {
   static int _countRtlWords(String text) {
     // Remove all punctuation (keep Arabic and Hebrew characters)
     final cleaned = text.replaceAllMapped(
-      RegExp(r'[^\u0600-\u06FF\u0590-\u05FF\s]'),
-      (match) => ''
-    );
-    
+        RegExp(r'[^\u0600-\u06FF\u0590-\u05FF\s]'), (match) => '');
+
     if (cleaned.trim().isEmpty) return 0;
-    
+
     // Split by whitespace, Arabic tatdeel (ـ), and Hebrew maqaf (־)
-    return cleaned.split(RegExp(r'[\s\u0640\u05BE]+'))
-      .where((word) => word.isNotEmpty)
-      .length;
+    return cleaned
+        .split(RegExp(r'[\s\u0640\u05BE]+'))
+        .where((word) => word.isNotEmpty)
+        .length;
   }
 
   /// Count mixed-language text (automatically identifies different parts)
   static int _countMixed(String text) {
     // Match CJK characters (Chinese, Japanese, Korean)
-    final cjkChars = RegExp(
-      r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]'
-    ).allMatches(text).length;
+    final cjkChars =
+        RegExp(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]')
+            .allMatches(text)
+            .length;
 
     // Match RTL text (Arabic, Hebrew, etc.)
     final rtlText = text.replaceAllMapped(
-      RegExp(r'[^\u0600-\u06FF\u0590-\u05FF\s]'),
-      (match) => ''
-    );
+        RegExp(r'[^\u0600-\u06FF\u0590-\u05FF\s]'), (match) => '');
     final rtlWords = _countRtlWords(rtlText);
 
     // Process remaining text (mainly Latin-based)
     final remainingText = text
-      .replaceAll(RegExp(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]'), ' ')
-      .replaceAll(RegExp(r'[\u0600-\u06FF\u0590-\u05FF]'), ' ');
-    
+        .replaceAll(
+            RegExp(r'[\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]'),
+            ' ')
+        .replaceAll(RegExp(r'[\u0600-\u06FF\u0590-\u05FF]'), ' ');
+
     final otherWords = remainingText.trim().isEmpty
-      ? 0
-      : remainingText.trim().split(RegExp(r'\s+')).length;
+        ? 0
+        : remainingText.trim().split(RegExp(r'\s+')).length;
 
     return cjkChars + rtlWords + otherWords;
   }
-}
+}
diff --git a/pubspec.yaml b/pubspec.yaml
@@ -1,6 +1,6 @@
 name: text_counter
 description: A lightweight Dart utility for counting characters and words in multiple languages including CJK, RTL, and mixed texts.
-version: 0.1.1
+version: 0.1.2
 repository: https://github.com/hexwarrior6/text_counter
 environment:
   sdk: '>=3.0.0 <4.0.0'
diff --git a/test/text_counter_test.dart b/test/text_counter_test.dart
@@ -26,29 +26,38 @@ void main() {
     // --- 英文统计 ---
     test('English (en)', () {
       expect(TextCounter.count("Hello world", languageCode: "en"), equals(2));
-      expect(TextCounter.count("This is a test.", languageCode: "en"), equals(4));
-      expect(TextCounter.count("One   multiple     spaces", languageCode: "en"), equals(3));
+      expect(
+          TextCounter.count("This is a test.", languageCode: "en"), equals(4));
+      expect(TextCounter.count("One   multiple     spaces", languageCode: "en"),
+          equals(3));
     });
 
     // --- 阿拉伯语统计 ---
     test('Arabic (ar)', () {
       expect(TextCounter.count("مرحبا بالعالم", languageCode: "ar"), equals(2));
-      expect(TextCounter.count("كيف حالك اليوم؟", languageCode: "ar"), equals(3));
-      expect(TextCounter.count("السلام عليكم ورحمة الله", languageCode: "ar"), equals(4));
+      expect(
+          TextCounter.count("كيف حالك اليوم؟", languageCode: "ar"), equals(3));
+      expect(TextCounter.count("السلام عليكم ورحمة الله", languageCode: "ar"),
+          equals(4));
     });
 
     // --- 希伯来语统计 ---
     test('Hebrew (he)', () {
       expect(TextCounter.count("שלום עולם", languageCode: "he"), equals(2));
-      expect(TextCounter.count("מה שלומך היום?", languageCode: "he"), equals(3));
+      expect(
+          TextCounter.count("מה שלומך היום?", languageCode: "he"), equals(3));
       expect(TextCounter.count("תודה רבה לך", languageCode: "he"), equals(3));
     });
 
     // --- 自动识别混合文本 ---
     test('Mixed text detection', () {
-      expect(TextCounter.count("Hello 你好 مرحبا こんにちは"), equals(9)); // 1 + 2 + 1 + 1
-      expect(TextCounter.count("The quick brown fox jumps over the lazy dog. 你好吗"), equals(12));
-      expect(TextCounter.count("مرحبا Hello كيف الحال？こんにちは"), equals(10)); // ar + en + ar + ja
+      expect(TextCounter.count("Hello 你好 مرحبا こんにちは"),
+          equals(9)); // 1 + 2 + 1 + 1
+      expect(
+          TextCounter.count("The quick brown fox jumps over the lazy dog. 你好吗"),
+          equals(12));
+      expect(TextCounter.count("مرحبا Hello كيف الحال？こんにちは"),
+          equals(10)); // ar + en + ar + ja
     });
 
     // --- 边界测试 ---
@@ -59,7 +68,8 @@ void main() {
       expect(TextCounter.count(".,!@#\$% ^&*()"), equals(2)); // 标点加空格
       expect(TextCounter.count("   Hello   world   "), equals(2)); // 前后空格
       expect(TextCounter.count("你好,, 世界!!"), equals(6)); // 中文夹杂标点
-      expect(TextCounter.count("שלום־עולם", languageCode: "he"), equals(2)); // 希伯来连接符
+      expect(TextCounter.count("שלום־עולם", languageCode: "he"),
+          equals(2)); // 希伯来连接符
     });
   });
-}
+}