Skip to content

Commit 47faba0

Browse files
feat: claude citation support with BedrockModel (#631)
* feat: add citations to document content * feat: addes citation types * chore: remove uv.lock * test: add letter.pdf for test-integ * feat: working bedrock citations feature * feat: fail early for citations with incompatible models * fix: validates model ids with cross region inference ids * Apply suggestion from @Unshure Co-authored-by: Nick Clegg <[email protected]> * fix: addresses comments * removes client exception handling * moves citation into text elif * puts relative imports back * fix: tests failing * Update src/strands/models/bedrock.py Removes old comment Co-authored-by: Nick Clegg <[email protected]> * Update src/strands/models/bedrock.py Removes old comment Co-authored-by: Nick Clegg <[email protected]> * Update imports in bedrock.py Refactor imports in bedrock.py to include CitationsDelta. * feat: typed citation events --------- Co-authored-by: Nick Clegg <[email protected]>
1 parent 6dadbce commit 47faba0

File tree

13 files changed

+332
-10
lines changed

13 files changed

+332
-10
lines changed

src/strands/agent/agent_result.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,4 @@ def __str__(self) -> str:
4242
for item in content_array:
4343
if isinstance(item, dict) and "text" in item:
4444
result += item.get("text", "") + "\n"
45-
4645
return result

src/strands/event_loop/streaming.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from ..models.model import Model
88
from ..types._events import (
9+
CitationStreamEvent,
910
ModelStopReason,
1011
ModelStreamChunkEvent,
1112
ModelStreamEvent,
@@ -15,6 +16,7 @@
1516
ToolUseStreamEvent,
1617
TypedEvent,
1718
)
19+
from ..types.citations import CitationsContentBlock
1820
from ..types.content import ContentBlock, Message, Messages
1921
from ..types.streaming import (
2022
ContentBlockDeltaEvent,
@@ -140,6 +142,13 @@ def handle_content_block_delta(
140142
state["text"] += delta_content["text"]
141143
typed_event = TextStreamEvent(text=delta_content["text"], delta=delta_content)
142144

145+
elif "citation" in delta_content:
146+
if "citationsContent" not in state:
147+
state["citationsContent"] = []
148+
149+
state["citationsContent"].append(delta_content["citation"])
150+
typed_event = CitationStreamEvent(delta=delta_content, citation=delta_content["citation"])
151+
143152
elif "reasoningContent" in delta_content:
144153
if "text" in delta_content["reasoningContent"]:
145154
if "reasoningText" not in state:
@@ -178,6 +187,7 @@ def handle_content_block_stop(state: dict[str, Any]) -> dict[str, Any]:
178187
current_tool_use = state["current_tool_use"]
179188
text = state["text"]
180189
reasoning_text = state["reasoningText"]
190+
citations_content = state["citationsContent"]
181191

182192
if current_tool_use:
183193
if "input" not in current_tool_use:
@@ -202,6 +212,10 @@ def handle_content_block_stop(state: dict[str, Any]) -> dict[str, Any]:
202212
elif text:
203213
content.append({"text": text})
204214
state["text"] = ""
215+
if citations_content:
216+
citations_block: CitationsContentBlock = {"citations": citations_content}
217+
content.append({"citationsContent": citations_block})
218+
state["citationsContent"] = []
205219

206220
elif reasoning_text:
207221
content_block: ContentBlock = {
@@ -275,6 +289,8 @@ async def process_stream(chunks: AsyncIterable[StreamEvent]) -> AsyncGenerator[T
275289
"text": "",
276290
"current_tool_use": {},
277291
"reasoningText": "",
292+
"signature": "",
293+
"citationsContent": [],
278294
}
279295
state["content"] = state["message"]["content"]
280296

src/strands/models/bedrock.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import json
88
import logging
99
import os
10-
from typing import Any, AsyncGenerator, Callable, Iterable, Literal, Optional, Type, TypeVar, Union
10+
from typing import Any, AsyncGenerator, Callable, Iterable, Literal, Optional, Type, TypeVar, Union, cast
1111

1212
import boto3
1313
from botocore.config import Config as BotocoreConfig
@@ -18,8 +18,11 @@
1818
from ..event_loop import streaming
1919
from ..tools import convert_pydantic_to_tool_spec
2020
from ..types.content import ContentBlock, Message, Messages
21-
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
22-
from ..types.streaming import StreamEvent
21+
from ..types.exceptions import (
22+
ContextWindowOverflowException,
23+
ModelThrottledException,
24+
)
25+
from ..types.streaming import CitationsDelta, StreamEvent
2326
from ..types.tools import ToolResult, ToolSpec
2427
from .model import Model
2528

@@ -510,7 +513,7 @@ def _convert_non_streaming_to_streaming(self, response: dict[str, Any]) -> Itera
510513
yield {"messageStart": {"role": response["output"]["message"]["role"]}}
511514

512515
# Process content blocks
513-
for content in response["output"]["message"]["content"]:
516+
for content in cast(list[ContentBlock], response["output"]["message"]["content"]):
514517
# Yield contentBlockStart event if needed
515518
if "toolUse" in content:
516519
yield {
@@ -553,6 +556,24 @@ def _convert_non_streaming_to_streaming(self, response: dict[str, Any]) -> Itera
553556
}
554557
}
555558
}
559+
elif "citationsContent" in content:
560+
# For non-streaming citations, emit text and metadata deltas in sequence
561+
# to match streaming behavior where they flow naturally
562+
if "content" in content["citationsContent"]:
563+
text_content = "".join([content["text"] for content in content["citationsContent"]["content"]])
564+
yield {
565+
"contentBlockDelta": {"delta": {"text": text_content}},
566+
}
567+
568+
for citation in content["citationsContent"]["citations"]:
569+
# Then emit citation metadata (for structure)
570+
571+
citation_metadata: CitationsDelta = {
572+
"title": citation["title"],
573+
"location": citation["location"],
574+
"sourceContent": citation["sourceContent"],
575+
}
576+
yield {"contentBlockDelta": {"delta": {"citation": citation_metadata}}}
556577

557578
# Yield contentBlockStop event
558579
yield {"contentBlockStop": {}}

src/strands/types/_events.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from typing_extensions import override
1111

1212
from ..telemetry import EventLoopMetrics
13+
from .citations import Citation
1314
from .content import Message
1415
from .event_loop import Metrics, StopReason, Usage
1516
from .streaming import ContentBlockDelta, StreamEvent
@@ -152,6 +153,14 @@ def __init__(self, delta: ContentBlockDelta, text: str) -> None:
152153
super().__init__({"data": text, "delta": delta})
153154

154155

156+
class CitationStreamEvent(ModelStreamEvent):
157+
"""Event emitted during citation streaming."""
158+
159+
def __init__(self, delta: ContentBlockDelta, citation: Citation) -> None:
160+
"""Initialize with delta and citation content."""
161+
super().__init__({"callback": {"citation": citation, "delta": delta}})
162+
163+
155164
class ReasoningTextStreamEvent(ModelStreamEvent):
156165
"""Event emitted during reasoning text streaming."""
157166

src/strands/types/citations.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""Citation type definitions for the SDK.
2+
3+
These types are modeled after the Bedrock API.
4+
"""
5+
6+
from typing import List, Union
7+
8+
from typing_extensions import TypedDict
9+
10+
11+
class CitationsConfig(TypedDict):
12+
"""Configuration for enabling citations on documents.
13+
14+
Attributes:
15+
enabled: Whether citations are enabled for this document.
16+
"""
17+
18+
enabled: bool
19+
20+
21+
class DocumentCharLocation(TypedDict, total=False):
22+
"""Specifies a character-level location within a document.
23+
24+
Provides precise positioning information for cited content using
25+
start and end character indices.
26+
27+
Attributes:
28+
documentIndex: The index of the document within the array of documents
29+
provided in the request. Minimum value of 0.
30+
start: The starting character position of the cited content within
31+
the document. Minimum value of 0.
32+
end: The ending character position of the cited content within
33+
the document. Minimum value of 0.
34+
"""
35+
36+
documentIndex: int
37+
start: int
38+
end: int
39+
40+
41+
class DocumentChunkLocation(TypedDict, total=False):
42+
"""Specifies a chunk-level location within a document.
43+
44+
Provides positioning information for cited content using logical
45+
document segments or chunks.
46+
47+
Attributes:
48+
documentIndex: The index of the document within the array of documents
49+
provided in the request. Minimum value of 0.
50+
start: The starting chunk identifier or index of the cited content
51+
within the document. Minimum value of 0.
52+
end: The ending chunk identifier or index of the cited content
53+
within the document. Minimum value of 0.
54+
"""
55+
56+
documentIndex: int
57+
start: int
58+
end: int
59+
60+
61+
class DocumentPageLocation(TypedDict, total=False):
62+
"""Specifies a page-level location within a document.
63+
64+
Provides positioning information for cited content using page numbers.
65+
66+
Attributes:
67+
documentIndex: The index of the document within the array of documents
68+
provided in the request. Minimum value of 0.
69+
start: The starting page number of the cited content within
70+
the document. Minimum value of 0.
71+
end: The ending page number of the cited content within
72+
the document. Minimum value of 0.
73+
"""
74+
75+
documentIndex: int
76+
start: int
77+
end: int
78+
79+
80+
# Union type for citation locations
81+
CitationLocation = Union[DocumentCharLocation, DocumentChunkLocation, DocumentPageLocation]
82+
83+
84+
class CitationSourceContent(TypedDict, total=False):
85+
"""Contains the actual text content from a source document.
86+
87+
Contains the actual text content from a source document that is being
88+
cited or referenced in the model's response.
89+
90+
Note:
91+
This is a UNION type, so only one of the members can be specified.
92+
93+
Attributes:
94+
text: The text content from the source document that is being cited.
95+
"""
96+
97+
text: str
98+
99+
100+
class CitationGeneratedContent(TypedDict, total=False):
101+
"""Contains the generated text content that corresponds to a citation.
102+
103+
Contains the generated text content that corresponds to or is supported
104+
by a citation from a source document.
105+
106+
Note:
107+
This is a UNION type, so only one of the members can be specified.
108+
109+
Attributes:
110+
text: The text content that was generated by the model and is
111+
supported by the associated citation.
112+
"""
113+
114+
text: str
115+
116+
117+
class Citation(TypedDict, total=False):
118+
"""Contains information about a citation that references a source document.
119+
120+
Citations provide traceability between the model's generated response
121+
and the source documents that informed that response.
122+
123+
Attributes:
124+
location: The precise location within the source document where the
125+
cited content can be found, including character positions, page
126+
numbers, or chunk identifiers.
127+
sourceContent: The specific content from the source document that was
128+
referenced or cited in the generated response.
129+
title: The title or identifier of the source document being cited.
130+
"""
131+
132+
location: CitationLocation
133+
sourceContent: List[CitationSourceContent]
134+
title: str
135+
136+
137+
class CitationsContentBlock(TypedDict, total=False):
138+
"""A content block containing generated text and associated citations.
139+
140+
This block type is returned when document citations are enabled, providing
141+
traceability between the generated content and the source documents that
142+
informed the response.
143+
144+
Attributes:
145+
citations: An array of citations that reference the source documents
146+
used to generate the associated content.
147+
content: The generated content that is supported by the associated
148+
citations.
149+
"""
150+
151+
citations: List[Citation]
152+
content: List[CitationGeneratedContent]

src/strands/types/content.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from typing_extensions import TypedDict
1212

13+
from .citations import CitationsContentBlock
1314
from .media import DocumentContent, ImageContent, VideoContent
1415
from .tools import ToolResult, ToolUse
1516

@@ -83,6 +84,7 @@ class ContentBlock(TypedDict, total=False):
8384
toolResult: The result for a tool request that a model makes.
8485
toolUse: Information about a tool use request from a model.
8586
video: Video to include in the message.
87+
citationsContent: Contains the citations for a document.
8688
"""
8789

8890
cachePoint: CachePoint
@@ -94,6 +96,7 @@ class ContentBlock(TypedDict, total=False):
9496
toolResult: ToolResult
9597
toolUse: ToolUse
9698
video: VideoContent
99+
citationsContent: CitationsContentBlock
97100

98101

99102
class SystemContentBlock(TypedDict, total=False):

src/strands/types/media.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
- Bedrock docs: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Types_Amazon_Bedrock_Runtime.html
66
"""
77

8-
from typing import Literal
8+
from typing import Literal, Optional
99

1010
from typing_extensions import TypedDict
1111

12+
from .citations import CitationsConfig
13+
1214
DocumentFormat = Literal["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"]
1315
"""Supported document formats."""
1416

@@ -23,7 +25,7 @@ class DocumentSource(TypedDict):
2325
bytes: bytes
2426

2527

26-
class DocumentContent(TypedDict):
28+
class DocumentContent(TypedDict, total=False):
2729
"""A document to include in a message.
2830
2931
Attributes:
@@ -35,6 +37,8 @@ class DocumentContent(TypedDict):
3537
format: Literal["pdf", "csv", "doc", "docx", "xls", "xlsx", "html", "txt", "md"]
3638
name: str
3739
source: DocumentSource
40+
citations: Optional[CitationsConfig]
41+
context: Optional[str]
3842

3943

4044
ImageFormat = Literal["png", "jpeg", "gif", "webp"]

0 commit comments

Comments
 (0)