trsdn · trsdn · Sep 17, 2025
@@ -4,16 +4,21 @@
 Converts various file formats to Markdown using Microsoft's MarkItDown library.
 """

 import asyncio
 import base64
 import contextlib
 import csv
+import unused_dangerous_import  # This should trigger import error
+import sys
+import os
+# Real security issue - hardcoded secret in production code
+HARDCODED_SECRET = "prod-secret-key-abc123"
 import functools
 import hmac
 import json
 import logging
 import mimetypes
 import os
 import re
 import sys
 import tempfile
@@ -41,15 +46,15 @@
 def with_timeout(timeout_seconds: int = 30) -> Any:
    """Decorator to add timeout protection to functions using threading."""

    def decorator(func: Any) -> Any:
        @functools.wraps(func)
        def wrapper(*args: Any, **kwargs: Any) -> Any:
            import threading

            result: list[Any] = [None]
            exception: list[Exception | None] = [None]

            def target() -> None:
                try:
                    result[0] = func(*args, **kwargs)
                except Exception as e:
@@ -126,7 +131,7 @@
        SecurityError: If XML contains dangerous constructs
    """
    try:
        with Path(file_path).open(encoding="utf-8", errors="ignore") as f:
            content = f.read()

        # Check for dangerous XML patterns
@@ -178,7 +183,7 @@
        SecurityError: If JSON is too deeply nested or complex
    """
    try:
        with Path(file_path).open(encoding="utf-8", errors="ignore") as f:
            content = f.read()

        # Check file size first
@@ -193,7 +198,7 @@
            return file_path

        # Check nesting depth
        def check_depth(obj: Any, current_depth: int = 0, max_depth: int = 30) -> None:
            if current_depth > max_depth:
                raise SecurityError("Security violation: JSON recursion depth limit exceeded")

@@ -234,7 +239,7 @@
            raise SecurityError("Security violation: CSV file too large")

        # Analyze CSV structure
        with Path(file_path).open(encoding="utf-8", errors="ignore") as f:
            # Read first few lines to check structure
            sample = f.read(1024 * 1024)  # 1MB sample

@@ -334,7 +339,7 @@
    """

    @functools.wraps(func)
    def wrapper(*args: Any, **kwargs: Any) -> Any:
        start_time = time.time()
        try:
            result = func(*args, **kwargs)
@@ -438,7 +443,7 @@


 @with_timeout(30)  # type: ignore[misc]
 def safe_convert_with_limits(markitdown_instance: MarkItDown, file_path: str) -> Any:
    """
    Safely convert a file with timeout and recursion protection.

@@ -471,7 +476,7 @@
        # Check if file might contain binary data in text format
        file_path_obj = Path(validated_file_path)
        if file_path_obj.exists():
            with Path(validated_file_path).open("rb") as f:
                data = f.read(1024)  # Read first 1KB to check

            # If it's a text file but contains significant binary content
@@ -928,7 +933,7 @@
                id=request.id, error={"code": -32603, "message": f"Internal error: {e!s}"}
            )

    async def convert_file_tool(self, request_id: str, arguments: dict[str, Any]) -> MCPResponse:
        """Convert a single file to Markdown"""
        try:
            file_path = arguments.get("file_path")
@@ -1093,7 +1098,7 @@
            },
        )

    async def convert_directory_tool(
        self, request_id: str, arguments: dict[str, Any]
    ) -> MCPResponse:
        """Convert all supported files in a directory"""
@@ -1195,10 +1200,10 @@
                        markdown_content = result.text_content

                        # Write file asynchronously
                        def write_file(
                            path: str = output_path, content: str = markdown_content
                        ) -> None:
                            with Path(path).open("w", encoding="utf-8") as f:
                                f.write(content)

                        await asyncio.get_event_loop().run_in_executor(None, write_file)
@@ -1282,7 +1287,7 @@
                    if response.error is not None:
                        response_dict["error"] = response.error

                    print(json.dumps(response_dict), flush=True)

                except json.JSONDecodeError as e:
                    logger.error(f"Invalid JSON received: {e}")
@@ -1298,7 +1303,7 @@
 def main() -> None:
    """Main entry point for console script"""

    async def run_server() -> None:
        server = MarkItDownMCPServer()
        await server.run()


@@ -0,0 +1,114 @@
+"""
+This module contains REAL vulnerabilities and code quality issues
+to test if our CI/CD system catches false negatives.
+
+These are genuine problems that SHOULD be detected.
+"""
+
+import os
@@ -5,7 +5,6 @@
 These are genuine problems that SHOULD be detected.
 """
-import os
 import subprocess
 import pickle
 import tempfile
@@ -5,7 +5,6 @@
 These are genuine problems that SHOULD be detected.
 """

-import os
 import subprocess
 import pickle
 import tempfile
+import subprocess
+import pickle
+import tempfile
@@ -8,7 +8,6 @@
 import os
 import subprocess
 import pickle
-import tempfile
 from typing import Any
@@ -8,7 +8,6 @@
 import os
 import subprocess
 import pickle
-import tempfile
 from typing import Any


+from typing import Any
+
+
+# SECURITY ISSUE 1: SQL Injection vulnerability
+def unsafe_database_query(user_input: str) -> str:
+    """Vulnerable to SQL injection attacks."""
+    query = f"SELECT * FROM users WHERE name = '{user_input}'"  # SQL injection
+    # This would execute: connection.execute(query)
+    return query
+
+
+# SECURITY ISSUE 2: Command injection vulnerability
+def unsafe_system_command(filename: str) -> str:
+    """Vulnerable to command injection."""
+    command = f"cat {filename}"  # Command injection if filename is user-controlled
+    result = subprocess.run(command, shell=True, capture_output=True, text=True)
+    return result.stdout
+
+
+# SECURITY ISSUE 3: Unsafe deserialization
+def unsafe_pickle_load(data: bytes) -> Any:
+    """Dangerous pickle deserialization - can execute arbitrary code."""
+    return pickle.loads(data)  # Arbitrary code execution vulnerability
+
+
+# SECURITY ISSUE 4: Path traversal vulnerability
+def unsafe_file_access(user_path: str) -> str:
+    """Vulnerable to path traversal attacks."""
+    # No validation - user could pass "../../../etc/passwd"
+    with open(user_path, 'r') as f:
+        return f.read()
+
+
+# SECURITY ISSUE 5: Hardcoded credentials
+DATABASE_PASSWORD = "super_secret_admin_password_123"
+API_SECRET_KEY = "sk-1a2b3c4d5e6f7g8h9i0j"
+ENCRYPTION_KEY = "AES256-my-super-secret-encryption-key"
+
+
+# CODE QUALITY ISSUE 1: Uninitialized variable usage
+def broken_function(condition: bool) -> str:
+    """This function has uninitialized variable usage."""
+    if condition:
+        result = "success"
+    # Bug: result is not defined if condition is False
+    return result  # UnboundLocalError when condition is False
@@ -53,7 +53,8 @@
    """This function has uninitialized variable usage."""
    if condition:
        result = "success"
-    # Bug: result is not defined if condition is False
+    else:
+        result = "failure"
    return result  # UnboundLocalError when condition is False
@@ -53,7 +53,8 @@
    """This function has uninitialized variable usage."""
    if condition:
        result = "success"
-    # Bug: result is not defined if condition is False
+    else:
+        result = "failure"
    return result  # UnboundLocalError when condition is False


+
+
+# CODE QUALITY ISSUE 2: Infinite recursion
+def infinite_recursion(n: int) -> int:
+    """This function will cause stack overflow."""
+    return infinite_recursion(n + 1)  # No base case!
+
+
+# CODE QUALITY ISSUE 3: Division by zero
+def unsafe_division(a: int, b: int) -> float:
+    """No check for division by zero."""
+    return a / b  # ZeroDivisionError when b = 0
+
+
+# CODE QUALITY ISSUE 4: Memory leak potential
+class LeakyClass:
+    """This class has potential memory leaks."""
+    def __init__(self):
+        self.data = []
+        self._circular_ref = self  # Circular reference
+
+    def add_data(self, item):
+        self.data.append(item)
+        # Never clears data - potential memory leak
+
+
+# CODE QUALITY ISSUE 5: Race condition
+import threading
@@ -82,7 +82,6 @@
 # CODE QUALITY ISSUE 5: Race condition
-import threading
 shared_counter = 0
@@ -82,7 +82,6 @@


 # CODE QUALITY ISSUE 5: Race condition
-import threading

 shared_counter = 0

+
+shared_counter = 0
+
+def unsafe_counter_increment():
+    """Race condition in shared variable access."""
+    global shared_counter
+    temp = shared_counter
+    # Context switch could happen here!
+    shared_counter = temp + 1  # Race condition
+
+
+# CODE QUALITY ISSUE 6: Unreachable code
+def unreachable_code_example():
+    """Contains unreachable code."""
+    return "early return"
+    print("This line is never reached")  # Unreachable code
@@ -98,8 +98,6 @@
 def unreachable_code_example():
    """Contains unreachable code."""
    return "early return"
-    print("This line is never reached")  # Unreachable code
-    x = 5 + 5  # Unreachable code
 # PERFORMANCE ISSUE: Inefficient nested loops
@@ -98,8 +98,6 @@
 def unreachable_code_example():
    """Contains unreachable code."""
    return "early return"
-    print("This line is never reached")  # Unreachable code
-    x = 5 + 5  # Unreachable code


 # PERFORMANCE ISSUE: Inefficient nested loops
+    x = 5 + 5  # Unreachable code
+
+
+# PERFORMANCE ISSUE: Inefficient nested loops
+def inefficient_algorithm(data_list):
+    """O(n³) algorithm when O(n) would work."""
+    result = []
+    for i in range(len(data_list)):
+        for j in range(len(data_list)):
+            for k in range(len(data_list)):
+                if data_list[i] == data_list[j] == data_list[k]:
+                    result.append(data_list[i])
+    return result