Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions api/api_tests/internal/transform/test_embed_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,10 +369,12 @@ def test_async_request_handler_empty_prompts_list(mock_make_async_request):
assert result == []


@patch("nv_ingest_api.internal.transform.embed_text.infer_microservice")
def test_make_async_request_happy_path(im_mock):
@patch("nv_ingest_api.util.nim.infer_microservice")
@patch(f"{MODULE_UNDER_TEST}.infer_microservice", create=True)
def test_make_async_request_happy_path(module_im_mock, nim_im_mock):
# Assign
im_mock.return_value = [[0.1, 0.2, 0.3]]
nim_im_mock.return_value = [[0.1, 0.2, 0.3]]
module_im_mock.return_value = [[0.1, 0.2, 0.3]]
# Act
result = module_under_test._make_async_request(
prompts=["Hello world"],
Expand All @@ -385,8 +387,8 @@ def test_make_async_request_happy_path(im_mock):
filter_errors=False,
dimensions=None,
)
# Assert: client called as expected
im_mock.assert_called_once_with(
# Assert: client called as expected (module-level import is used in embed_text)
module_im_mock.assert_called_once_with(
["Hello world"],
"dummy_model",
embedding_endpoint="http://dummy-endpoint",
Expand All @@ -403,10 +405,12 @@ def test_make_async_request_happy_path(im_mock):
assert result == {"embedding": [[0.1, 0.2, 0.3]], "info_msg": None}


@patch("nv_ingest_api.internal.transform.embed_text.infer_microservice")
def test_make_async_request_failure_returns_none_embedding_and_info_message(im_mock):
@patch("nv_ingest_api.util.nim.infer_microservice")
@patch(f"{MODULE_UNDER_TEST}.infer_microservice", create=True)
def test_make_async_request_failure_returns_none_embedding_and_info_message(module_im_mock, nim_im_mock):
# Arrange
im_mock.side_effect = RuntimeError("Simulated client failure")
nim_im_mock.side_effect = RuntimeError("Simulated client failure")
module_im_mock.side_effect = RuntimeError("Simulated client failure")

# Act & Assert
with pytest.raises(RuntimeError) as excinfo:
Expand Down
10 changes: 5 additions & 5 deletions api/src/nv_ingest_api/internal/primitives/tracing/tagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

def traceable(trace_name: Optional[str] = None):
"""
A decorator that adds entry and exit trace timestamps to a IngestControlMessage's metadata
A decorator that adds entry and exit trace timestamps to an IngestControlMessage's metadata
based on the presence of a 'config::add_trace_tagging' flag.

This decorator checks if the 'config::add_trace_tagging' flag is set to True in the
Expand All @@ -37,7 +37,7 @@ def traceable(trace_name: Optional[str] = None):

Notes
-----
The decorated function must accept a IngestControlMessage object as one of its arguments.
The decorated function must accept an IngestControlMessage object as one of its arguments.
For a regular function, this is expected to be the first argument; for a class method,
this is expected to be the second argument (after 'self'). The IngestControlMessage object
must implement `has_metadata`, `get_metadata`, and `set_metadata` methods used by the decorator
Expand All @@ -51,7 +51,7 @@ def traceable(trace_name: Optional[str] = None):
--------
Automatic stage name detection (recommended):

>>> @traceable() # Uses self.stage_name automatically
>>> @traceable() # Uses self.stage_name automatically
... def process_message(self, message):
... pass

Expand Down Expand Up @@ -253,14 +253,14 @@ def set_trace_timestamps_with_parent_context(control_message, execution_trace_lo
--------
Basic usage in a stage:

>>> execution_trace_log = {"trace::entry::yolox_inference": ts1, "trace::exit::yolox_inference": ts2}
>>> execution_trace_log = {"trace::entry::yolox_inference": ts1, "trace::exit::yolox_inference": ts2} # noqa
>>> set_trace_timestamps_with_parent_context(
... control_message, execution_trace_log, "pdf_extractor", logger
... )

This transforms:
- trace::entry::yolox_inference -> trace::entry::pdf_extractor::yolox_inference
- trace::exit::yolox_inference -> trace::exit::pdf_extractor::yolox_inference
- trace::exit::yolox_inference -> trace::exit::pdf_extractor::yolox_inference
"""
if not execution_trace_log:
return
Expand Down
111 changes: 111 additions & 0 deletions assets/style.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/* Global fonts and tokens */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');

:root {
--bg: #0f1115;
--surface: #151821;
--surface-2: #1b2030;
--text: #e5e7eb;
--text-muted: #9aa3b2;
--border: #2a2f3d;
--accent: #4f46e5;
--accent-2: #06b6d4;
--success: #10b981;
--danger: #ef4444;
--warning: #f59e0b;
--radius: 8px;
--shadow: 0 1px 2px rgba(0,0,0,0.25), 0 8px 24px rgba(0,0,0,0.18);
--gap-xs: 4px; --gap-sm: 8px; --gap-md: 12px; --gap-lg: 16px; --gap-xl: 24px;
}

/* Light theme support via data-theme=light on body (optional) */
body[data-theme="light"] {
--bg: #ffffff;
--surface: #f6f7fb;
--surface-2: #eef1f7;
--text: #111827;
--text-muted: #6b7280;
--border: #e5e7eb;
}

html, body { height: 100%; }
body {
margin: 0;
background: var(--bg);
color: var(--text);
font-family: Inter, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.5;
}

/* Layout */
.grid { display: grid; grid-template-columns: 300px 1fr; gap: var(--gap-lg); align-items: start; }
.sidebar {
position: sticky; top: var(--gap-lg);
padding: var(--gap-lg);
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
box-shadow: var(--shadow);
}
.section-title { font-size: 16px; font-weight: 600; margin: var(--gap-sm) 0; }
.label { color: var(--text-muted); font-size: 12px; text-transform: uppercase; letter-spacing: 0.04em; margin-bottom: 4px; display: block; }
.help, .muted { color: var(--text-muted); font-size: 12px; }
.control { margin: var(--gap-md) 0; }
.kpis { display: grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); gap: var(--gap-md); margin: var(--gap-sm) 0 var(--gap-lg); }
.graph { margin-bottom: var(--gap-lg); background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: var(--gap-sm); }

/* Inputs & Buttons */
input, textarea {
background: var(--surface-2);
color: var(--text);
border: 1px solid var(--border);
border-radius: 6px;
padding: 8px 10px;
}
input::placeholder, textarea::placeholder { color: var(--text-muted); }
button, .button {
appearance: none; border: 1px solid var(--border); background: var(--surface-2);
color: var(--text); padding: 6px 12px; border-radius: 6px; cursor: pointer;
}
button:hover { border-color: color-mix(in srgb, var(--accent) 35%, var(--border)); }
button.primary { background: var(--accent); border-color: var(--accent); color: #fff; }
button.primary:hover { background: color-mix(in srgb, var(--accent) 85%, #000); }

/* Dash core components */
/* RadioItems */
input[type="radio"] { accent-color: var(--accent); }
input[type="checkbox"] { accent-color: var(--accent); }

/* Dropdown (react-select) */
.Select-control { background: var(--surface-2); border-color: var(--border); color: var(--text); }
.Select--single > .Select-control .Select-value, .Select-placeholder { color: var(--text); }
.Select-menu-outer { background: var(--surface-2); border-color: var(--border); color: var(--text); z-index: 1000; }
.Select-option { background: var(--surface-2); color: var(--text); }
.Select-option.is-focused { background: color-mix(in srgb, var(--accent) 14%, var(--surface-2)); }
.Select-option.is-selected { background: color-mix(in srgb, var(--accent) 28%, var(--surface-2)); }

/* Slider (rc-slider) */
.rc-slider { padding: 6px 0; }
.rc-slider-rail { background: var(--border); }
.rc-slider-track { background: var(--accent); }
.rc-slider-handle { border-color: var(--accent); background: #fff; }

/* Tabs */
.tabs, .dash-tabs { background: transparent; }
.tab { background: var(--surface); border: 1px solid var(--border) !important; color: var(--text); border-radius: 6px 6px 0 0; margin-right: 4px; padding: 8px 12px; }
.tab--selected { border-bottom-color: transparent !important; background: var(--surface-2); }

/* Upload */
.dccUpload { border: 1px dashed var(--border); border-radius: 6px; padding: 6px 10px; color: var(--text-muted); }

/* Cytoscape container */
#proctree-cyto-container { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); }
#proctree-graph { background: var(--surface); }

/* Helper spacing */
hr { border: none; border-top: 1px solid var(--border); margin: var(--gap-md) 0; opacity: 0.7; }

/* Plotly figure background harmonization */
.js-plotly-plot .plotly .bg, .js-plotly-plot .plotly .bglayer, .js-plotly-plot .plotly .plot { background: transparent !important; }
.js-plotly-plot .plotly .infolayer { color: var(--text); }
53 changes: 53 additions & 0 deletions client/src/nv_ingest_client/client/ingest_job_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,59 @@ def _save_response_data(
clean_doc_name = os.path.basename(doc_name)
output_name = f"{clean_doc_name}.metadata.json"

# Additionally, write out parallel files for trace timings and annotations (if present)
try:
os.makedirs(output_directory, exist_ok=True)
except Exception:
# Best-effort; directory should generally exist already
pass

# Build primitive breakdown from response_data
primitive_total: int = 0
primitive_counts_by_type: Dict[str, int] = defaultdict(int)
structured_by_subtype: Dict[str, int] = defaultdict(int)
try:
for document in response_data:
# Each document is treated as one primitive
primitive_total += 1
meta: Dict[str, Any] = document.get("metadata", {})
content_meta: Dict[str, Any] = meta.get("content_metadata", {})
doc_type: str = content_meta.get("type", "unknown")
primitive_counts_by_type[doc_type] += 1
if doc_type == "structured":
subtype: str = content_meta.get("subtype", "unknown")
structured_by_subtype[subtype] += 1
except Exception:
# Be resilient; don't let counting failures block output
pass

# Merge trace (if any) with primitive counts and always write a traces file
try:
trace_obj = response.get("trace") or response.get("traces") or {}
trace_out = dict(trace_obj)
trace_out["primitive_counts"] = {
"total": primitive_total,
"by_type": dict(primitive_counts_by_type),
"structured_by_subtype": dict(structured_by_subtype),
}

trace_path = os.path.join(output_directory, f"{clean_doc_name}.traces.json")
with open(trace_path, "w") as f:
f.write(json.dumps(trace_out, indent=2))
logger.debug("Wrote trace output to %s", trace_path)
except Exception as e:
logger.error("Failed to write traces for %s: %s", clean_doc_name, e)

annotations_obj = response.get("annotations")
if annotations_obj:
try:
annotations_path = os.path.join(output_directory, f"{clean_doc_name}.annotations.json")
with open(annotations_path, "w") as f:
f.write(json.dumps(annotations_obj, indent=2))
logger.debug("Wrote annotations output to %s", annotations_path)
except Exception as e:
logger.error("Failed to write annotations for %s: %s", clean_doc_name, e)

# Organize by document type
doc_map: Dict[str, List[Dict[str, Any]]] = {}
for document in response_data:
Expand Down
Empty file added scripts/support/__init__.py
Empty file.
Loading
Loading