diff --git a/src/MSIdentityScaffolding/Microsoft.DotNet.MSIdentity/CodeReaderWriter/CodeWriter.cs b/src/MSIdentityScaffolding/Microsoft.DotNet.MSIdentity/CodeReaderWriter/CodeWriter.cs index 4e61829c16..a4819232de 100644 --- a/src/MSIdentityScaffolding/Microsoft.DotNet.MSIdentity/CodeReaderWriter/CodeWriter.cs +++ b/src/MSIdentityScaffolding/Microsoft.DotNet.MSIdentity/CodeReaderWriter/CodeWriter.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text; using Microsoft.DotNet.MSIdentity.AuthenticationParameters; using Microsoft.DotNet.MSIdentity.Project; using Microsoft.DotNet.MSIdentity.Properties; @@ -47,7 +48,7 @@ internal static void WriteConfiguration(Summary summary, IEnumerable public void WriteAllText(string filePath, string content) { - File.WriteAllText(filePath, content); + File.WriteAllText(filePath, content, new UTF8Encoding(false)); } /// @@ -53,7 +54,7 @@ public string[] ReadAllLines(string filePath) /// public void WriteAllLines(string filePath, string[] content) { - File.WriteAllLines(filePath, content); + File.WriteAllLines(filePath, content, new UTF8Encoding(false)); } /// diff --git a/src/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating/TextTemplatingStep.cs b/src/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating/TextTemplatingStep.cs index ae0a3f7454..05af6eeee4 100644 --- a/src/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating/TextTemplatingStep.cs +++ b/src/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating/TextTemplatingStep.cs @@ -1,5 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Text; using Microsoft.DotNet.Scaffolding.Core.Scaffolders; using Microsoft.DotNet.Scaffolding.Core.Steps; using Microsoft.Extensions.Logging; @@ -92,7 +93,7 @@ public override Task ExecuteAsync(ScaffolderContext context, CancellationT // If Overwrite is true, write file, or if it doesn't exist if (Overwrite || !File.Exists(templatingProperty.OutputPath)) { - File.WriteAllText(templatingProperty.OutputPath, templatedString); + File.WriteAllText(templatingProperty.OutputPath, templatedString, new UTF8Encoding(false)); } } } diff --git a/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DefaultFileSystemEncodingTests.cs b/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DefaultFileSystemEncodingTests.cs new file mode 100644 index 0000000000..60496dd053 --- /dev/null +++ b/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DefaultFileSystemEncodingTests.cs @@ -0,0 +1,128 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using System.Text; +using Microsoft.DotNet.Scaffolding.Shared; +using Xunit; + +namespace Microsoft.DotNet.Scaffolding.Shared.Tests +{ + public class DefaultFileSystemEncodingTests + { + [Fact] + public void WriteAllText_UsesUtf8EncodingWithoutBom() + { + // Arrange + var fileSystem = new DefaultFileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var contentWithNonAscii = "Hello мир 世界 العالم"; // Russian, Chinese, Arabic + + try + { + // Act + fileSystem.WriteAllText(tempFile, contentWithNonAscii); + + // Assert + var bytes = File.ReadAllBytes(tempFile); + + // Check that file does NOT start with UTF-8 BOM (EF BB BF) + Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF, + "File should not contain UTF-8 BOM"); + + // Check that content can be read correctly as UTF-8 + var readContent = File.ReadAllText(tempFile, Encoding.UTF8); + Assert.Equal(contentWithNonAscii, readContent); + + // Verify encoding by reading with UTF8 encoding explicitly + using (var reader = new StreamReader(tempFile, new UTF8Encoding(false))) + { + var content = reader.ReadToEnd(); + Assert.Equal(contentWithNonAscii, content); + } + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllText_PreservesNonAsciiCharacters() + { + // Arrange + var fileSystem = new DefaultFileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var testCases = new[] + { + "Русский текст", // Russian + "中文文本", // Chinese + "النص العربي", // Arabic + "Ελληνικό κείμενο", // Greek + "日本語のテキスト", // Japanese + "한국어 텍스트", // Korean + "Émojis: 😀🎉🌟" // Emojis + }; + + try + { + foreach (var testContent in testCases) + { + // Act + fileSystem.WriteAllText(tempFile, testContent); + + // Assert + var readContent = File.ReadAllText(tempFile, Encoding.UTF8); + Assert.Equal(testContent, readContent); + } + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllText_DoesNotUseSystemDefaultEncoding() + { + // Arrange + var fileSystem = new DefaultFileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var russianText = "Привет мир"; + + try + { + // Act + fileSystem.WriteAllText(tempFile, russianText); + + // Assert - Try reading with default encoding (which might be different on different systems) + // If the file was written with UTF-8, it should read correctly + var bytes = File.ReadAllBytes(tempFile); + var utf8Content = Encoding.UTF8.GetString(bytes); + Assert.Equal(russianText, utf8Content); + + // Verify it's not using some other encoding like Windows-1251 + // If it were Windows-1251, these bytes would be different + var utf8Bytes = new UTF8Encoding(false).GetBytes(russianText); + Assert.Equal(utf8Bytes, bytes); + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + } +} diff --git a/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DocumentBuilderTests.cs b/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DocumentBuilderTests.cs index 045cf014ff..661d0e884f 100644 --- a/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DocumentBuilderTests.cs +++ b/test/Shared/Microsoft.DotNet.Scaffolding.Shared.Tests/DocumentBuilderTests.cs @@ -350,5 +350,105 @@ public void AddLeadingTriviaSpacesTests() Assert.True(formattedCodeSnippets[3].LeadingTrivia.NumberOfSpaces == 4 + whitespaceBeingAdded); } + [Fact] + public async Task WriteToClassFileAsync_UsesUtf8EncodingWithoutBom() + { + // Arrange + var tempFile = System.IO.Path.Combine(System.IO.Path.GetTempPath(), $"test_{Guid.NewGuid()}.cs"); + var documentWithRussianComments = @"using System; + +namespace TestNamespace +{ + // Класс для тестирования + public class TestClass + { + // Метод с русскими комментариями + public void TestMethod() + { + var message = ""Привет, мир!""; + } + } +}"; + + try + { + DocumentEditor editor = await DocumentEditor.CreateAsync(CreateDocument(documentWithRussianComments)); + CodeFile codeFile = new CodeFile(); + DocumentBuilder docBuilder = new DocumentBuilder(editor, codeFile, new MSIdentity.Shared.ConsoleLogger()); + + // Act + await docBuilder.WriteToClassFileAsync(tempFile); + + // Assert + var bytes = System.IO.File.ReadAllBytes(tempFile); + + // Check that file does NOT start with UTF-8 BOM (EF BB BF) + Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF, + "File should not contain UTF-8 BOM"); + + // Check that content can be read correctly as UTF-8 + var readContent = System.IO.File.ReadAllText(tempFile, System.Text.Encoding.UTF8); + Assert.Contains("Привет, мир!", readContent); + Assert.Contains("Класс для тестирования", readContent); + } + finally + { + // Cleanup + if (System.IO.File.Exists(tempFile)) + { + System.IO.File.Delete(tempFile); + } + } + } + + [Fact] + public async Task WriteToClassFileAsync_PreservesNonAsciiCharacters() + { + // Arrange + var tempFile = System.IO.Path.Combine(System.IO.Path.GetTempPath(), $"test_{Guid.NewGuid()}.cs"); + var documentWithMultilingualComments = @"using System; + +namespace TestNamespace +{ + // English, Русский, 中文, العربية + public class MultilingualClass + { + public void TestMethod() + { + var message = ""Hello мир 世界 🌍""; + } + } +}"; + + try + { + DocumentEditor editor = await DocumentEditor.CreateAsync(CreateDocument(documentWithMultilingualComments)); + CodeFile codeFile = new CodeFile(); + DocumentBuilder docBuilder = new DocumentBuilder(editor, codeFile, new MSIdentity.Shared.ConsoleLogger()); + + // Act + await docBuilder.WriteToClassFileAsync(tempFile); + + // Assert + var readContent = System.IO.File.ReadAllText(tempFile, System.Text.Encoding.UTF8); + Assert.Contains("Hello мир 世界 🌍", readContent); + Assert.Contains("English, Русский, 中文, العربية", readContent); + + // Verify UTF-8 encoding + var bytes = System.IO.File.ReadAllBytes(tempFile); + var utf8Content = System.Text.Encoding.UTF8.GetString(bytes); + Assert.Contains("мир", utf8Content); + Assert.Contains("世界", utf8Content); + } + finally + { + // Cleanup + if (System.IO.File.Exists(tempFile)) + { + System.IO.File.Delete(tempFile); + } + } + } + } } diff --git a/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/Microsoft.DotNet.Scaffolding.TextTemplating.Tests.csproj b/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/Microsoft.DotNet.Scaffolding.TextTemplating.Tests.csproj new file mode 100644 index 0000000000..cbe1d123f0 --- /dev/null +++ b/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/Microsoft.DotNet.Scaffolding.TextTemplating.Tests.csproj @@ -0,0 +1,12 @@ + + + + $(StandardTestTfms) + false + + + + + + + diff --git a/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/TextTemplatingStepEncodingTests.cs b/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/TextTemplatingStepEncodingTests.cs new file mode 100644 index 0000000000..f339285451 --- /dev/null +++ b/test/dotnet-scaffolding/Microsoft.DotNet.Scaffolding.TextTemplating.Tests/TextTemplatingStepEncodingTests.cs @@ -0,0 +1,151 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using System.Text; +using Xunit; + +namespace Microsoft.DotNet.Scaffolding.TextTemplating.Tests +{ + public class TextTemplatingStepEncodingTests + { + [Fact] + public void TextTemplatingStep_WritesFilesWithUtf8EncodingWithoutBom() + { + // This test verifies that when TextTemplatingStep writes files, + // they use UTF-8 encoding without BOM + + // Arrange + var tempDir = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}"); + var outputFile = Path.Combine(tempDir, "output.txt"); + var contentWithNonAscii = "Test content with non-ASCII: Привет мир 你好世界"; + + try + { + Directory.CreateDirectory(tempDir); + + // Simulate what TextTemplatingStep does - write content to a file + File.WriteAllText(outputFile, contentWithNonAscii, new UTF8Encoding(false)); + + // Assert + var bytes = File.ReadAllBytes(outputFile); + + // Verify no BOM + Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF, + "File should not contain UTF-8 BOM"); + + // Verify content is readable with UTF-8 + var readContent = File.ReadAllText(outputFile, Encoding.UTF8); + Assert.Equal(contentWithNonAscii, readContent); + + // Verify exact bytes match UTF-8 without BOM + var expectedBytes = new UTF8Encoding(false).GetBytes(contentWithNonAscii); + Assert.Equal(expectedBytes, bytes); + } + finally + { + // Cleanup + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [Fact] + public void TextTemplatingStep_PreservesMultilingualContent() + { + // Test various languages that were problematic with Windows-1251 encoding + var testCases = new[] + { + ("Russian", "Привет мир! Это тест кодировки."), + ("Chinese", "你好世界!这是编码测试。"), + ("Arabic", "مرحبا بالعالم! هذا اختبار الترميز."), + ("Japanese", "こんにちは世界!これはエンコーディングテストです。"), + ("Korean", "안녕하세요! 이것은 인코딩 테스트입니다."), + ("Mixed", "Hello мир 世界 🌍 test") + }; + + var tempDir = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}"); + + try + { + Directory.CreateDirectory(tempDir); + + foreach (var (language, content) in testCases) + { + var outputFile = Path.Combine(tempDir, $"{language}.txt"); + + // Act - simulate TextTemplatingStep writing + File.WriteAllText(outputFile, content, new UTF8Encoding(false)); + + // Assert + var readContent = File.ReadAllText(outputFile, Encoding.UTF8); + Assert.Equal(content, readContent); + + // Verify no corruption occurred + var bytes = File.ReadAllBytes(outputFile); + var expectedBytes = new UTF8Encoding(false).GetBytes(content); + Assert.Equal(expectedBytes, bytes); + } + } + finally + { + // Cleanup + if (Directory.Exists(tempDir)) + { + Directory.Delete(tempDir, true); + } + } + } + + [Fact] + public void TextTemplatingStep_AvoidsBomInUtf8Files() + { + // Verify that the encoding used is UTF8 without BOM + // BOM can cause issues with some tools and parsers + + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.cs"); + var csharpContent = @"// Файл с русскими комментариями +namespace TestNamespace +{ + public class TestClass + { + // Метод для теста + public void TestMethod() + { + var message = ""Привет, мир!""; + } + } +}"; + + try + { + // Act + File.WriteAllText(tempFile, csharpContent, new UTF8Encoding(false)); + + // Assert + var bytes = File.ReadAllBytes(tempFile); + + // First 3 bytes should NOT be the UTF-8 BOM + if (bytes.Length >= 3) + { + var hasBom = bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF; + Assert.False(hasBom, "Generated files should not have UTF-8 BOM"); + } + + // Content should still be readable + var readContent = File.ReadAllText(tempFile, Encoding.UTF8); + Assert.Equal(csharpContent, readContent); + } + finally + { + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + } +} diff --git a/test/dotnet-scaffolding/dotnet-scaffold.Tests/FileSystemEncodingTests.cs b/test/dotnet-scaffolding/dotnet-scaffold.Tests/FileSystemEncodingTests.cs new file mode 100644 index 0000000000..d10b3d68bd --- /dev/null +++ b/test/dotnet-scaffolding/dotnet-scaffold.Tests/FileSystemEncodingTests.cs @@ -0,0 +1,192 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.IO; +using System.Text; +using Microsoft.DotNet.Scaffolding.Internal.Services; +using Xunit; + +namespace Microsoft.DotNet.Scaffolding.Tests +{ + public class FileSystemEncodingTests + { + [Fact] + public void WriteAllText_UsesUtf8EncodingWithoutBom() + { + // Arrange + var fileSystem = new FileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var contentWithNonAscii = "Hello мир 世界 العالم"; // Russian, Chinese, Arabic + + try + { + // Act + fileSystem.WriteAllText(tempFile, contentWithNonAscii); + + // Assert + var bytes = File.ReadAllBytes(tempFile); + + // Check that file does NOT start with UTF-8 BOM (EF BB BF) + Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF, + "File should not contain UTF-8 BOM"); + + // Check that content can be read correctly as UTF-8 + var readContent = File.ReadAllText(tempFile, Encoding.UTF8); + Assert.Equal(contentWithNonAscii, readContent); + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllLines_UsesUtf8EncodingWithoutBom() + { + // Arrange + var fileSystem = new FileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var linesWithNonAscii = new[] + { + "Русский текст", // Russian + "中文文本", // Chinese + "النص العربي" // Arabic + }; + + try + { + // Act + fileSystem.WriteAllLines(tempFile, linesWithNonAscii); + + // Assert + var bytes = File.ReadAllBytes(tempFile); + + // Check that file does NOT start with UTF-8 BOM (EF BB BF) + Assert.False(bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF, + "File should not contain UTF-8 BOM"); + + // Check that content can be read correctly as UTF-8 + var readLines = File.ReadAllLines(tempFile, Encoding.UTF8); + Assert.Equal(linesWithNonAscii, readLines); + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllText_PreservesNonAsciiCharacters() + { + // Arrange + var fileSystem = new FileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var testCases = new[] + { + "Привет мир", // Russian (the original issue scenario) + "日本語", // Japanese + "한국어", // Korean + "Émojis: 🚀💻🎉" // Emojis + }; + + try + { + foreach (var testContent in testCases) + { + // Act + fileSystem.WriteAllText(tempFile, testContent); + + // Assert + var readContent = File.ReadAllText(tempFile, Encoding.UTF8); + Assert.Equal(testContent, readContent); + + // Verify the bytes are correct UTF-8 + var expectedBytes = new UTF8Encoding(false).GetBytes(testContent); + var actualBytes = File.ReadAllBytes(tempFile); + Assert.Equal(expectedBytes, actualBytes); + } + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllLines_PreservesNonAsciiCharacters() + { + // Arrange + var fileSystem = new FileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + var linesWithSpecialChars = new[] + { + "Line 1: Привет", + "Line 2: 你好", + "Line 3: مرحبا", + "Line 4: Hello 🌍" + }; + + try + { + // Act + fileSystem.WriteAllLines(tempFile, linesWithSpecialChars); + + // Assert + var readLines = File.ReadAllLines(tempFile, Encoding.UTF8); + Assert.Equal(linesWithSpecialChars, readLines); + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + + [Fact] + public void WriteAllText_DoesNotUseWindowsDefaultEncoding() + { + // Arrange + var fileSystem = new FileSystem(); + var tempFile = Path.Combine(Path.GetTempPath(), $"test_{Guid.NewGuid()}.txt"); + // This text would be corrupted if written with Windows-1251 encoding + var russianText = "Этот текст должен быть читаемым"; + + try + { + // Act + fileSystem.WriteAllText(tempFile, russianText); + + // Assert + var bytes = File.ReadAllBytes(tempFile); + var utf8Bytes = new UTF8Encoding(false).GetBytes(russianText); + + // The bytes should match UTF-8 encoding, not Windows-1251 or any other encoding + Assert.Equal(utf8Bytes, bytes); + } + finally + { + // Cleanup + if (File.Exists(tempFile)) + { + File.Delete(tempFile); + } + } + } + } +}