strands-agents · mkmeral · May 30, 2025 · Jun 10, 2025 · Jun 11, 2025
@@ -178,6 +178,13 @@ response = agent.tool.http_request(
     auth_type="Bearer",
     auth_token="your_token_here"
 )
+
+# Convert HTML webpages to markdown for better readability
+response = agent.tool.http_request(
+    method="GET",
+    url="https://example.com/article",
+    convert_to_markdown=True
+)
 ```
 
 ### Python Code Execution

@@ -38,6 +38,8 @@ dependencies = [
     "tenacity>=9.1.2,<10.0.0",
     "watchdog>=6.0.0,<7.0.0",
     "slack_bolt>=1.23.0,<2.0.0",
+    "markdownify>=1.0.0,<2.0.0",
+    "readabilipy>=0.2.0,<1.0.0",
     # Note: Always want the latest tzdata
     "tzdata ; platform_system == 'Windows'",
 ]

@@ -26,6 +26,8 @@
 from typing import Any, Dict, Optional, Union
 from urllib.parse import urlparse
 
+import markdownify
+import readabilipy.simple_json
 import requests
 from aws_requests_auth.aws_auth import AWSRequestsAuth
 from requests.adapters import HTTPAdapter
@@ -50,7 +52,7 @@
         "JWT, AWS SigV4, Digest auth, and enterprise authentication patterns. Automatically reads tokens from "
         "environment variables (GITHUB_TOKEN, GITLAB_TOKEN, AWS credentials, etc.) when auth_env_var is specified. "
         "Use environment(action='list') to view available variables. Includes session management, metrics, "
-        "streaming support, cookie handling, and redirect control."
+        "streaming support, cookie handling, redirect control, and optional HTML to markdown conversion."
     ),
     "inputSchema": {
         "json": {
@@ -133,6 +135,10 @@
                     "type": "integer",
                     "description": "Maximum number of redirects to follow (default: 30)",
                 },
+                "convert_to_markdown": {
+                    "type": "boolean",
+                    "description": "Convert HTML responses to markdown format (default: False).",
+                },
                 "aws_auth": {
                     "type": "object",
                     "description": "AWS auth configuration for SigV4",
@@ -185,6 +191,30 @@
 REQUEST_METRICS = collections.defaultdict(list)
 
 
+def extract_content_from_html(html: str) -> str:
+    """Extract and convert HTML content to Markdown format.
+
+    Args:
+        html: Raw HTML content to process
+
+    Returns:
+        Simplified markdown version of the content, or original HTML if conversion fails
+    """
+    try:
+        ret = readabilipy.simple_json.simple_json_from_html_string(html, use_readability=True)
+        if not ret.get("content"):
+            return html
+
+        content = markdownify.markdownify(
+            ret["content"],
+            heading_style=markdownify.ATX,
+        )
+        return content
+    except Exception:
+        # If conversion fails, return original HTML
+        return html
+
+
 def create_session(config: Dict[str, Any]) -> requests.Session:
     """Create and configure a requests Session object."""
     session = requests.Session()
@@ -569,6 +599,15 @@ def http_request(tool: ToolUse, **kwargs: Any) -> ToolResult:
         )
         ```
 
+    6. Convert HTML responses to markdown:
+        ```python
+        http_request(
+            method="GET",
+            url="https://example.com/article",
+            convert_to_markdown=True,  # Converts HTML content to readable markdown
+        )
+        ```
+
     Environment Variables:
     - Authentication tokens are read from environment when auth_env_var is specified
     - AWS credentials are automatically loaded from environment variables or credentials file
@@ -798,6 +837,24 @@ def http_request(tool: ToolUse, **kwargs: Any) -> ToolResult:
         else:
             content = response.text
 
+        # Convert HTML to markdown if requested
+        convert_to_markdown = tool_input.get("convert_to_markdown", False)
+        if convert_to_markdown:
+            content_type = response.headers.get("content-type", "")
+            is_html_content = (
+                "text/html" in content_type.lower()
+                or "<html" in content[:100].lower()
+                or "<!doctype html" in content[:100].lower()
+            )
+
+            if is_html_content:
+                original_content = content
+                content = extract_content_from_html(content)
+
+                # Add a note if conversion was successful
+                if content != original_content:
+                    console.print(Text("✓ Converted HTML content to markdown", style="green"))
+
         # Format and display the response
         response_panel = format_response_preview(response, content, metrics if metrics is not None else None)
         console.print(response_panel)

@@ -961,3 +961,88 @@ def test_http_request_via_agent(agent):
     result_text = extract_result_text(result)
     assert "Status Code: 200" in result_text
     assert "success via agent" in result_text
+
+
+@responses.activate
+def test_markdown_conversion():
+    """Test HTML to markdown conversion functionality."""
+    # Mock HTML content
+    html_content = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Test Page</title>
+    </head>
+    <body>
+        <h1>Main Heading</h1>
+        <p>This is a paragraph with <strong>bold text</strong> and <em>italic text</em>.</p>
+        <ul>
+            <li>List item 1</li>
+            <li>List item 2</li>
+        </ul>
+        <a href="https://example.com">Link to example</a>
+    </body>
+    </html>
+    """
+
+    # Set up mock response with HTML content
+    responses.add(responses.GET, "https://example.com/article", body=html_content, status=200, content_type="text/html")
+
+    # Test without markdown conversion (should return HTML)
+    with patch("strands_tools.http_request.get_user_input") as mock_input:
+        mock_input.return_value = "y"
+        result = http_request.http_request(
+            {"input": {"method": "GET", "url": "https://example.com/article"}, "toolUseId": "test1"}
+        )
+
+    result_text = extract_result_text(result)
+    assert "Status Code: 200" in result_text
+    assert "<html>" in result_text  # Should contain HTML
+
+    # Test with markdown conversion (should convert to markdown if packages available)
+    with patch("strands_tools.http_request.get_user_input") as mock_input:
+        mock_input.return_value = "y"
+        result = http_request.http_request(
+            {
+                "input": {"method": "GET", "url": "https://example.com/article", "convert_to_markdown": True},
+                "toolUseId": "test2",
+            }
+        )
+
+    result_text = extract_result_text(result)
+    assert "Status Code: 200" in result_text
+    # Verify markdown conversion worked - HTML tags should be removed and text content preserved
+    assert "<html>" not in result_text  # HTML tags should be gone
+    assert "<h1>" not in result_text
+    assert "<p>" not in result_text
+    assert "Main Heading" in result_text  # Text content should remain
+    assert "bold text" in result_text
+    assert "italic text" in result_text
+    assert "List item 1" in result_text
+    assert "List item 2" in result_text
+
+
+@responses.activate
+def test_markdown_conversion_non_html():
+    """Test that non-HTML content is not affected by markdown conversion."""
+    # Set up mock response with JSON content
+    responses.add(
+        responses.GET,
+        "https://example.com/api/data",
+        json={"message": "hello", "data": [1, 2, 3]},
+        status=200,
+    )
+
+    # Test with markdown conversion enabled on non-HTML content
+    with patch("strands_tools.http_request.get_user_input") as mock_input:
+        mock_input.return_value = "y"
+        result = http_request.http_request(
+            {
+                "input": {"method": "GET", "url": "https://example.com/api/data", "convert_to_markdown": True},
+                "toolUseId": "test3",
+            }
+        )
+
+    result_text = extract_result_text(result)
+    assert "Status Code: 200" in result_text
+    assert '"message": "hello"' in result_text  # Should still be JSON (no conversion for non-HTML)