-
Notifications
You must be signed in to change notification settings - Fork 5.1k
Add Compliance tests for GB18030-2022 #118075
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5d3733b
2d68cfc
570c9b4
e281e13
b7af2dc
3fd1b0f
bd9aeff
c5bd64c
0b6baf1
126ff40
ba384e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<TargetFrameworks>$(NetCoreAppCurrent);$(NetFrameworkCurrent)</TargetFrameworks> | ||
<Nullable>enable</Nullable> | ||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
<IncludeRemoteExecutor>true</IncludeRemoteExecutor> | ||
<UnicodeUcdVersion>16.0</UnicodeUcdVersion> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<Compile Include="GB18030\TestHelper.cs" /> | ||
<Compile Include="GB18030\Tests\CharTests.cs" /> | ||
<Compile Include="GB18030\Tests\CharUnicodeInfoTests.cs" /> | ||
<Compile Include="GB18030\Tests\ConsoleTests.cs" /> | ||
<Compile Include="GB18030\Tests\DirectoryInfoTests.cs" /> | ||
<Compile Include="GB18030\Tests\DirectoryTestBase.cs" /> | ||
<Compile Include="GB18030\Tests\DirectoryTests.cs" /> | ||
<Compile Include="GB18030\Tests\EncodingTests.cs" /> | ||
<Compile Include="GB18030\Tests\FileInfoTests.cs" /> | ||
<Compile Include="GB18030\Tests\FileTestBase.cs" /> | ||
<Compile Include="GB18030\Tests\FileTests.cs" /> | ||
<Compile Include="GB18030\Tests\RegexTests.cs" /> | ||
<Compile Include="GB18030\Tests\StringTests.cs" /> | ||
<Compile Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\tests\FunctionalTests\Regex.Tests.Common.cs" Link="System\Text\RegularExpressions\Tests\Regex.Tests.Common.cs" /> | ||
<Compile Include="$(LibrariesProjectRoot)System.Runtime\tests\System.Globalization.Tests\System\Globalization\CharUnicodeInfoTestData.cs" Link="System\Globalization\Tests\CharUnicodeInfoTestData.cs" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'"> | ||
<Compile Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\tests\FunctionalTests\RegexGeneratorHelper.netcoreapp.cs" Link="System\Text\RegularExpressions\Tests\RegexGeneratorHelper.netcoreapp.cs" /> | ||
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisVersion)" PrivateAssets="all" /> | ||
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\gen\System.Text.RegularExpressions.Generator.csproj" SetTargetFramework="TargetFramework=netstandard2.0" OutputItemType="Analyzer" ReferenceOutputAssembly="true" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' != '.NETCoreApp'"> | ||
<Compile Include="$(CoreLibSharedDir)System\Diagnostics\CodeAnalysis\StringSyntaxAttribute.cs" /> | ||
<Compile Include="$(LibrariesProjectRoot)System.Text.RegularExpressions\tests\FunctionalTests\RegexGeneratorHelper.netfx.cs" /> | ||
<ProjectReference Include="$(LibrariesProjectRoot)System.Text.Encodings.Web\src\System.Text.Encodings.Web.csproj" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<None Include="GB18030\Level3+Amendment_Test_Data_for_Mid_to_High_Volume_cases.txt" CopyToOutputDirectory="Always" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="System.Private.Runtime.UnicodeData" Version="$(SystemPrivateRuntimeUnicodeDataVersion)" ExcludeAssets="contentFiles" GeneratePathProperty="true" /> | ||
<EmbeddedResource Include="$(PkgSystem_Private_Runtime_UnicodeData)\contentFiles\any\any\$(UnicodeUcdVersion).0\ucd\UnicodeData.txt"> | ||
<Link>CharUnicodeInfo\UnicodeData.$(UnicodeUcdVersion).txt</Link> | ||
<LogicalName>UnicodeData.txt</LogicalName> | ||
</EmbeddedResource> | ||
</ItemGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
Short sample strings for inputs with length limitation: | ||
‚50°¦˜8á7G9ï0A˜9°4D•4Î6¢¨˜8•5C@™2è3˜5ó79‹2ªÚš7ø6š4„2‚5‡9©J™9ø69˜9ž7ÇW‚5–7™48 | ||
××™8ð0Q‚5’2˜9Ü6d9ï6™1Ã4˜6Ë4CæF˜9´0——˜5ó8£¸™9ø9gš7ø99 5˜9ž8Cš4„9©O‚5ˆ0•2Œ2¡x‚5–8 | ||
ûœ™3Ô8c™9ï0dˆÒ™2è3—9«0¦ÁaÜܘ9´1‚55‚3þ9˜9ž99•4ªÙ˜5ó9š4…9Gš7ù9©S‚5ˆ1A‚5–9™9ù9˜7Ø8 | ||
‚39d˜9³6‚5‘9—8þ2ßo™9’0C˜9µ8˜9Ø7¨¯˜6¬5Ñј5ô0˜9Ÿ0ª¯©P‚5ˆ295––‚5—0sš8’0š4—0š09 | ||
˜9‘1ýM‚5‡45™9÷0žž™2£9r˜9²6òò‚52˜5ó2øÈ˜5ô1©I˜9Ÿ19›4£Úš8¨9˜8¼4š0–9‚5ˆ3š4¯0a | ||
A‚59™9Á0©–r˜8ö1G•8À1˜6¤1ëO˜9ª5§™3þ3£Ï‚5ˆ4˜9ž79–1˜5ô2‚0ƒ2š4¶9À¡¯þš8³0š0 0‚5—2 | ||
D˜6À4þŸ•8Û2‚5’1Áׂ0§3N˜9±4D™8¿3™3ð8©K˜9ž8š0£9¢ç9•3þþ‚5ˆ5™3þ2‚5—3˜5ô32š4À0š8À7 | ||
ÅùÑ ‚1ƒ4A™3ú6‚5’0˜9£6D’@™1÷76˜5ô4–1Ž4¨¯©R9’2§ ˜9ž9š4Ï9š8Å9š0ª4‚5—4™7‘0G‚5ˆ6˜7–2 | ||
ÌS‚5‘8εPšþ‚1¾5O—0»8¦¸˜8”4v˜90™3¶0š4Ö6q˜9Ÿ09ž7¡@˜5ô5™1”8š8Ì5š0¯0™4Ñ3©–‚5—5‚5ˆ7 | ||
‚56Ø·ª‚1Á3˜9’50 þ™0™2™3¬9˜9´9D˜0À8©M˜9Ÿ1ø¡£¶›1£7˜5ó8š7ø09˜1š0¸7‚5˜2‚5ˆ8™4‚0 | ||
|
||
Long sample strings for inputs with extended length limitation£º | ||
Group0: | ||
‚5‘9‚5’2°¡÷þ™1Ã4˜9‘1ª@þO9î9‚5‡88e•2‚6˜5ó6@ þ˜9¶2™3þ3©•©–™48™9÷0˜9Ÿ8˜9µ9rP˜5ó7˜5ô5™9ø6š0¸7ªÆªÇªÈU1øÇøÈøÙU2¡x¡y¡zU3š7ø6›1£7¢Ù¨é˜9ž7˜9Ÿ19‹29 5‚5–7‚5˜2‚5‡9‚5ˆ8š4„2š7ø0˜5÷8˜9ž66n | ||
|
||
Group1: | ||
°¡°¦™1Ã4•4Î60u9î99ï6A˜9¡3˜9¡9ªHýˆDEG•2‚6•2ƒ1¢§¨¯˜7Ø8˜9‚7i˜5ó7˜5ó8‚53‚57™48™9ö3™9Ì28‚5–7‚5–8˜9¶2˜9¶5˜9¶7©J©O˜9ž7˜9ž8™9ø6™9ø7š0‰7š0‰5‚5‡9‚5ˆ0š4„2š4Œ4š4˜5š4˜6š7ø6š7ø7š7ý8š7ý99‹29–8ª¡ª¢U1ø¡ø¢U2¡@¡A¡BU3 | ||
|
||
Group2: | ||
°©°¯÷ý˜9‘1˜8â5JŠ‹ˆW9ï7‚1 4‚2§1ª@«ž˜8á7˜88˜8â58A™3þ0™3í7™3â0˜8º9˜9˜6˜8ö1FOCš82š83š8Š7š8Ÿ7•2ƒ4•2ƒ5–7×0¢Ã£ë˜5ó9˜5ô0˜9ž9˜9Ÿ0š05š06š0™1š0™2š0™5©Q‚5–9‚5—09Œ09’9š4™6š4¡9š4¢7š4«2š4«3š4«4ªÆªÇªÈU1øÆøÇøÈU2¡x¡y¡zU3‚5ˆ1‚5ˆ2˜9³6˜9²3˜9ª3™9Ì6™9ž6™8Ä0 | ||
|
||
Group3: | ||
˜8á7™3¦0½¢÷þDŒÅ‡Ö‡åˆŠ‡Û¬@¬Aþ“¬BêŠcz‚1—5‚1—9‚2§0˜9²6˜91˜9µ8ao‚5‘9‚59¨»£Ñ–7×4–7×8š0›9š0£2š0£3š0¨5™3ï0™3Õ8©P©R©•©–˜9Ÿ1˜9ž7˜5ô1˜5ô2™8ñ0™7Ý0š4«6š4«7š4«8š4«9«¡«³U1ùÐùÑùÒU2¢`¢a¢bU3š86š87š8•3š8—3š8¹4E9z˜8¼1˜8¼4˜8Ì2‚5—1‚5—29–09™8‚5ˆ3‚5ˆ4C | ||
|
||
Group4: | ||
‚58‚50½ª½½®½¯™3Ô8˜9ƒ6¬H¬J¬L¬O50R˜8õ6˜8ã2˜8ã5˜0ñ0˜5Ÿ9˜5ó2˜9´0˜9³6˜9²38U5‚1ö3‚3 0‚4ƒ0™2¿8™3Á0™3“9˜5ô3˜5ô4©S©ˆ˜9Ÿ0˜9Ÿ18U5š0©0š0©1š0©4š0©5™7Ü9™7¶3™7ñ3«ð«ñ«òU1úÀúÁúÂU2£@£A£BU3‚5—3‚5—49š09›9‚5ˆ5‚5ˆ6¢ø£´¦Îš4Á3š4Á9š4à3š4ð6š8Á5š8·0š8Ë8š8Ë1ƒò†¿‡ß | ||
|
||
Group5: | ||
À€ÃQÀFô™˜9³6—8ê6‚55‚56‚4¡5‚4¡6‚4¡78bبأؤإ˜6Ç6˜6‰5b˜9³6˜9ª3˜9µ8BDFG™6Î0™6Ÿ0š8Ñ9š8Ö0š9¸3š8Ü4š0ª5š0ª6š0ª7š0ª8‚5—5‚5—6˜5ó3˜2Ð5¡Å¡È˜5ô5˜5ó7‚5ˆ7‚5ˆ89œ090˜9ž7˜9ž9¬¥¬¦¬§¬¸U1ú×úØúÙU2£”£•£–U3™3’5™3¥6©M©•©–š4ñ7š4ñ858b | ||
|
||
Group6: | ||
‚5‡19õ9‚0é0‚5‘5‚51ثجå¢å£˜5ž6˜5ž9£Ç¦¸ÁjÀÁØX˜8±0˜8ö1˜9„0˜9Ž70wL™5ù2™6«8˜91˜9ª3˜9°4š5Ó4š5Ó5š5Ý6š5í8™2â3™2á9˜9Ÿ0˜9ž8š8à5š8à6š8à7š8à8š8à9š0«1š0«2š0©0š0 3A‚5—7‚5—8˜87˜69¬Ð¬á¬ò£U1û°ûÁûÒU2¥P¥a¥rU3‚5‡9‚5ˆ2©T¨“˜5ó8˜5ô0g99ž19ž9HJLMŠ} | ||
|
||
Group7: | ||
‚58‚52å¤å¥å¦å¨å©å«˜8é6˜8±0pW5‚2¦2‚1ˆ0‚0 3‚4¡1šð‹šôšõ‹¼˜6Ï4˜8ü6˜8å5˜8ƒ4@pU‚2§2–7×9–7×1–7×3¨§¦Ã£ô©J©N˜9ª3˜9°4˜9µ8™4°0™4¯9™4‚1ñRø™ýRéMé]˜5ô1˜5ô2š0¯3š0³3š0³4š0³8e˜9Ÿ0˜9ž9š8å3š8å4š8ø4š8ü1‚5–9‚5—7‚5ˆ0‚5ˆ59Ÿ09Ÿ6š6†2š6†3š6†4š60š6˜7š6˜8™2²0™2¡0™2—5®À®ÔU1üÔüÕüÖU2¦q¦r¦sU3 | ||
|
||
Group8: | ||
¸@®f¯ŸÑYãÆàïâ¬à˜4¶1™3¬5‚5‘7‚5‘6˜8º9˜8Ô3yG‚0±3‚0Õ2‚0”8‚0‰7A•2–7•2–8¢ò£¹£Á£Ú˜9²3˜9²6˜9°4˜9µ8y0Rš0´2š0´3š0µ1š0µ2šøšúšûšý ð™2„0™2„1™2ˆ3™2‹1©J©S˜5ó8˜5ô5˜9Ÿ0˜9ž9‚5—9‚5˜0¯Á¯Â¯Ã¯ÄU1ý©ýªý«ý¬ýU2¦‡¦ˆ¦‰¦Š¦‹U3‚5ˆ7‚5ˆ8™4‚7™4‚8™4‚9y0Rš6²2š6º2š6¾1š6¾2š6Ç6š8ü8š8þ9š9”3š9™2š9£39Ÿ59ž3 | ||
|
||
Group9: | ||
ñ ò ÷ ø‚0Å0‚0Å4‚0Å5QaëOþLþMA3c‚5’2‚5‘8÷ò÷ó÷ô÷ö÷÷•3›5•3Ž6•3…8—1ø7˜8ƒ4˜9´0˜9ª3˜9°4˜9µ8V9™4ˆ3™4ˆ6™4ˆ7©R©ˆš0¯8š0µ6š0µ7š0µ8˜9Ÿ0˜9Ÿ1š6ñ3š6õ7š6ù5š74š7‡6£Ø¦Â¨§˜5ô5˜5ô1š9Õ0š9·9š9¾8š9¾9š9Ó4˜7Ø8˜69˜8Ë0˜9‚6m09›59™1‚5˜1‚5˜2™2ƒ9™1¦7¯é¯ê¯ëU1þªþ«þ¬U2¦›¦œ¦U3‚5‡9‚5ˆ0 | ||
|
||
Group10: | ||
‚5’0‚5’1÷ø÷ù÷ú÷û™2Œ4•5þ4þZþwþ’ªNþž‚2§2‚1—2‚17a4™1£9™0ó9™0Ä9˜9²6˜91˜9°4˜9µ8–7×9–7Ø0–7×3–7Ø1£ç¢Ç¨±„1ƒ0˜9ž8˜8â5˜9ƒ6˜8Ì2‚5ˆ5‚5ˆ8‚5–8‚5˜0™4¦1™4¦2™4¦4©J©„˜5ô3˜5ô5š0¸3š0¸4š0¸5š0Ÿ4š0¸7 © ú û üF›1£2›1£3›1£4›1£5›1£6›1£7š7÷6š7÷7š7÷8š7÷9š7ø09“19“2®É®Ê®ËU1þÙþÚþÛþÜU2§u§v§wU3iv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Globalization; | ||
using System.Globalization.Tests; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Text; | ||
using Xunit; | ||
|
||
namespace GB18030.Tests; | ||
|
||
public static class TestHelper | ||
{ | ||
// New Code Points in existing ranges | ||
internal static IEnumerable<int> CjkNewCodePoints { get; } = CreateRange(0x9FF0, 0x9FFF); | ||
internal static IEnumerable<int> CjkExtensionANewCodePoints { get; } = CreateRange(0x4DB6, 0x4DBF); | ||
internal static IEnumerable<int> CjkExtensionBNewCodePoints { get; } = CreateRange(0x2A6D7, 0x2A6DF); | ||
internal static IEnumerable<int> CjkExtensionCNewCodePoints { get; } = CreateRange(0x2B735, 0x2B739); | ||
|
||
// New ranges | ||
internal static IEnumerable<int> CjkExtensionG { get; } = CreateRange(0x30000, 0x3134A); | ||
internal static IEnumerable<int> CjkExtensionH { get; } = CreateRange(0x31350, 0x323AF); | ||
internal static IEnumerable<int> CjkExtensionI { get; } = CreateRange(0x2EBF0, 0x2EE5D); | ||
|
||
private static IEnumerable<int> CreateRange(int first, int last) => Enumerable.Range(first, last - first + 1); | ||
|
||
private static IEnumerable<CharUnicodeInfoTestCase> s_gb18030CharUnicodeInfo { get; } = GetGB18030CharUnicodeInfo(); | ||
private static IEnumerable<CharUnicodeInfoTestCase> GetGB18030CharUnicodeInfo() | ||
{ | ||
const int CodePointsTotal = 9793; // Make sure a Unicode version downgrade doesn't make us lose coverage. | ||
|
||
var ret = CharUnicodeInfoTestData.TestCases.Where(tc => IsInGB18030Range(tc.CodePoint)); | ||
Assert.Equal(CodePointsTotal, ret.Count()); | ||
return ret; | ||
|
||
static bool IsInGB18030Range(int codePoint) | ||
=> (codePoint >= 0x9FF0 && codePoint <= 0x9FFF) || | ||
(codePoint >= 0x4DB6 && codePoint <= 0x4DBF) || | ||
(codePoint >= 0x2A6D7 && codePoint <= 0x2A6DF) || | ||
(codePoint >= 0x2B735 && codePoint <= 0x2B739) || | ||
(codePoint >= 0x30000 && codePoint <= 0x3134A) || | ||
(codePoint >= 0x31350 && codePoint <= 0x323AF) || | ||
(codePoint >= 0x2EBF0 && codePoint <= 0x2EE5D); | ||
} | ||
|
||
internal static CultureInfo[] Cultures { get; } = [ | ||
CultureInfo.CurrentCulture, | ||
CultureInfo.InvariantCulture, | ||
new CultureInfo("zh-CN")]; | ||
|
||
internal static CompareOptions[] CompareOptions { get; } = [ | ||
System.Globalization.CompareOptions.None, | ||
System.Globalization.CompareOptions.IgnoreCase]; | ||
|
||
internal static StringComparison[] NonOrdinalStringComparisons { get; } = [ | ||
StringComparison.CurrentCulture, | ||
StringComparison.CurrentCultureIgnoreCase, | ||
StringComparison.InvariantCulture, | ||
StringComparison.InvariantCultureIgnoreCase]; | ||
|
||
internal static string TestDataFilePath { get; } = Path.Combine(AppContext.BaseDirectory, "GB18030", "Level3+Amendment_Test_Data_for_Mid_to_High_Volume_cases.txt"); | ||
|
||
private static Encoding? s_gb18030Encoding; | ||
internal static Encoding GB18030Encoding | ||
{ | ||
get | ||
{ | ||
if (s_gb18030Encoding is null) | ||
{ | ||
#if !NETFRAMEWORK | ||
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); | ||
#endif | ||
jozkee marked this conversation as resolved.
Show resolved
Hide resolved
|
||
s_gb18030Encoding = Encoding.GetEncoding("gb18030"); | ||
} | ||
|
||
return s_gb18030Encoding; | ||
} | ||
} | ||
|
||
private static readonly IEnumerable<byte[]> s_encodedTestData = GetTestData(); | ||
|
||
internal static IEnumerable<string> DecodedTestData { get; } = s_encodedTestData.Select(data => GB18030Encoding.GetString(data)); | ||
|
||
private static readonly IEnumerable<string> s_splitNewLineDecodedTestData = DecodedTestData.SelectMany( | ||
data => data.Split([Environment.NewLine], StringSplitOptions.RemoveEmptyEntries)); | ||
|
||
internal static IEnumerable<string> NonExceedingPathNameMaxDecodedTestData { get; } = | ||
s_splitNewLineDecodedTestData.SelectMany<string, string>( | ||
(data) => | ||
{ | ||
const int MaxPathSegmentName = 128; | ||
Encoding fileSystemEncoding = PlatformDetection.IsWindows ? Encoding.Unicode : Encoding.UTF8; | ||
|
||
if (fileSystemEncoding.GetByteCount(data) <= MaxPathSegmentName) | ||
return [data]; | ||
|
||
List<string> result = new(); | ||
string current = string.Empty; | ||
foreach (string element in GetTextElements(data)) | ||
{ | ||
if (fileSystemEncoding.GetByteCount(current) > MaxPathSegmentName) | ||
{ | ||
result.Add(current); | ||
current = string.Empty; | ||
} | ||
current += element; | ||
} | ||
result.Add(current); | ||
return result; | ||
}); | ||
|
||
public static IEnumerable<object[]> EncodedMemberData { get; } = s_encodedTestData.Select(data => new object[] { data }); | ||
public static IEnumerable<object[]> DecodedMemberData { get; } = DecodedTestData.Select(data => new object[] { data }); | ||
public static IEnumerable<object[]> NonExceedingPathNameMaxDecodedMemberData { get; } = NonExceedingPathNameMaxDecodedTestData.Select(data => new object[] { data }); | ||
public static IEnumerable<object[]> GB18030CharUnicodeInfoMemberData { get; } = s_gb18030CharUnicodeInfo.Select(data => new object[] { data }); | ||
|
||
private static IEnumerable<byte[]> GetTestData() | ||
{ | ||
byte[] startDelimiter = GB18030Encoding.GetBytes($":{Environment.NewLine}"); | ||
byte[] endDelimiter = GB18030Encoding.GetBytes($"{Environment.NewLine}{Environment.NewLine}"); | ||
|
||
// Instead of inlining the data in source, parse the test data from the file to prevent encoding issues. | ||
ReadOnlyMemory<byte> testFileBytes = File.ReadAllBytes(TestDataFilePath); | ||
|
||
while (testFileBytes.Length > 0) | ||
{ | ||
int start = testFileBytes.Span.IndexOf(startDelimiter); | ||
testFileBytes = testFileBytes.Slice(start + startDelimiter.Length); | ||
|
||
int end = testFileBytes.Span.IndexOf(endDelimiter); | ||
if (end == -1) | ||
end = testFileBytes.Length; | ||
|
||
yield return testFileBytes.Slice(0, end).ToArray(); | ||
|
||
testFileBytes = testFileBytes.Slice(end); | ||
} | ||
|
||
// Add a few additional test cases to exercise test correctness. | ||
yield return GB18030Encoding.GetBytes("aaa"); | ||
yield return GB18030Encoding.GetBytes("abc"); | ||
yield return GB18030Encoding.GetBytes("ð«“§ð«“§"); | ||
} | ||
|
||
internal static IEnumerable<string> GetTextElements(string input) | ||
{ | ||
TextElementEnumerator enumerator = StringInfo.GetTextElementEnumerator(input); | ||
while (enumerator.MoveNext()) | ||
{ | ||
yield return enumerator.GetTextElement(); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Globalization.Tests; | ||
using Xunit; | ||
|
||
namespace GB18030.Tests; | ||
|
||
[SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] | ||
public class CharTests | ||
{ | ||
[Theory] | ||
[MemberData(nameof(TestHelper.GB18030CharUnicodeInfoMemberData), MemberType = typeof(TestHelper))] | ||
public void Convert(CharUnicodeInfoTestCase testCase) | ||
{ | ||
Assert.Equal(testCase.CodePoint, char.ConvertToUtf32(char.ConvertFromUtf32(testCase.CodePoint), 0)); | ||
|
||
string utf32String = testCase.Utf32CodeValue; | ||
if (char.IsSurrogate(utf32String[0])) | ||
{ | ||
Assert.Equal(2, utf32String.Length); | ||
Assert.Equal(testCase.CodePoint, char.ConvertToUtf32(utf32String[0], utf32String[1])); | ||
} | ||
} | ||
|
||
[Theory] | ||
[MemberData(nameof(TestHelper.GB18030CharUnicodeInfoMemberData), MemberType = typeof(TestHelper))] | ||
public void Parse(CharUnicodeInfoTestCase testCase) | ||
{ | ||
string utf32String = testCase.Utf32CodeValue; | ||
if (utf32String.Length > 1) | ||
{ | ||
Assert.False(char.TryParse(utf32String, out _)); | ||
return; | ||
} | ||
|
||
char c = char.Parse(utf32String); | ||
Assert.Equal(testCase.CodePoint, c); | ||
|
||
bool succeed = char.TryParse(utf32String, out c); | ||
Assert.True(succeed); | ||
Assert.Equal(testCase.CodePoint, c); | ||
} | ||
|
||
[Theory] | ||
[MemberData(nameof(TestHelper.GB18030CharUnicodeInfoMemberData), MemberType = typeof(TestHelper))] | ||
public void IsSurrogate(CharUnicodeInfoTestCase testCase) | ||
{ | ||
string utf32String = testCase.Utf32CodeValue; | ||
if (utf32String.Length > 1) | ||
{ | ||
Assert.Equal(2, utf32String.Length); | ||
char high = utf32String[0]; | ||
char low = utf32String[1]; | ||
Assert.True(char.IsSurrogate(low)); | ||
Assert.True(char.IsSurrogate(high)); | ||
Assert.True(char.IsLowSurrogate(low)); | ||
Assert.True(char.IsHighSurrogate(high)); | ||
Assert.True(char.IsSurrogatePair(high, low)); | ||
} | ||
else | ||
{ | ||
char c = utf32String[0]; | ||
Assert.False(char.IsSurrogate(c)); | ||
Assert.False(char.IsLowSurrogate(c)); | ||
Assert.False(char.IsHighSurrogate(c)); | ||
Assert.False(char.IsSurrogatePair(c, c)); | ||
} | ||
} | ||
|
||
[Theory] | ||
[MemberData(nameof(TestHelper.GB18030CharUnicodeInfoMemberData), MemberType = typeof(TestHelper))] | ||
public void IsLetter(CharUnicodeInfoTestCase testCase) | ||
{ | ||
string utf32String = testCase.Utf32CodeValue; | ||
Assert.True(char.IsLetter(utf32String, 0)); | ||
Assert.True(char.IsLetterOrDigit(utf32String, 0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does all added characters are letters? no other Unicode categories we should check? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. I discovered this because I didn't want to write code to handle all categories. |
||
|
||
if (utf32String.Length < 2) | ||
{ | ||
Assert.True(char.IsLetter(utf32String[0])); | ||
Assert.True(char.IsLetterOrDigit(utf32String[0])); | ||
} | ||
} | ||
|
||
|
||
[Theory] | ||
[MemberData(nameof(TestHelper.GB18030CharUnicodeInfoMemberData), MemberType = typeof(TestHelper))] | ||
public void IsNonLetter_False(CharUnicodeInfoTestCase testCase) | ||
{ | ||
string utf32String = testCase.Utf32CodeValue; | ||
Assert.False(char.IsControl(utf32String, 0)); | ||
Assert.False(char.IsDigit(utf32String, 0)); | ||
Assert.False(char.IsLower(utf32String, 0)); | ||
Assert.False(char.IsNumber(utf32String, 0)); | ||
Assert.False(char.IsPunctuation(utf32String, 0)); | ||
Assert.False(char.IsSeparator(utf32String, 0)); | ||
Assert.False(char.IsSymbol(utf32String, 0)); | ||
Assert.False(char.IsUpper(utf32String, 0)); | ||
Assert.False(char.IsWhiteSpace(utf32String, 0)); | ||
|
||
if (utf32String.Length < 2) | ||
{ | ||
char c = utf32String[0]; | ||
#if !NETFRAMEWORK | ||
Assert.False(char.IsAscii(c)); | ||
Assert.False(char.IsAsciiDigit(c)); | ||
Assert.False(char.IsAsciiHexDigit(c)); | ||
Assert.False(char.IsAsciiHexDigitLower(c)); | ||
Assert.False(char.IsAsciiHexDigitUpper(c)); | ||
Assert.False(char.IsAsciiLetter(c)); | ||
Assert.False(char.IsAsciiLetterOrDigit(c)); | ||
Assert.False(char.IsAsciiLetterLower(c)); | ||
Assert.False(char.IsAsciiLetterUpper(c)); | ||
#endif | ||
Assert.False(char.IsControl(c)); | ||
Assert.False(char.IsDigit(c)); | ||
Assert.False(char.IsLower(c)); | ||
Assert.False(char.IsNumber(c)); | ||
Assert.False(char.IsPunctuation(c)); | ||
Assert.False(char.IsSeparator(c)); | ||
Assert.False(char.IsSymbol(c)); | ||
Assert.False(char.IsUpper(c)); | ||
Assert.False(char.IsWhiteSpace(c)); | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.