diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index d320fd6061b..338f6b9e91d 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -222,6 +222,8 @@ module TraceContext = struct let empty = {traceparent= None; baggage= None} + let depth_key = "span.depth" + let with_traceparent traceparent ctx = {ctx with traceparent} let with_baggage baggage ctx = {ctx with baggage} @@ -230,6 +232,20 @@ module TraceContext = struct let baggage_of ctx = ctx.baggage + let baggage_depth_of ctx = + Option.bind (baggage_of ctx) (List.assoc_opt depth_key) + |> Option.value ~default:"1" + |> int_of_string + + let update_with_baggage k v ctx = + let new_baggage = + baggage_of ctx + |> Option.value ~default:[] + |> List.remove_assoc k + |> List.cons (k, v) + in + with_baggage (Some new_baggage) ctx + let parse input = let open Astring.String in let trim_pair (key, value) = (trim key, trim value) in @@ -322,22 +338,36 @@ module Span = struct let start ?(attributes = Attributes.empty) ?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () = - let trace_id, extra_context = + let trace_id, extra_context, depth = match parent with | None -> - (Trace_id.make (), TraceContext.empty) + (Trace_id.make (), TraceContext.empty, 1) | Some span_parent -> - (span_parent.context.trace_id, span_parent.context.trace_context) + ( span_parent.context.trace_id + , span_parent.context.trace_context + , TraceContext.baggage_depth_of span_parent.context.trace_context + 1 + ) in let span_id = Span_id.make () in + let extra_context_with_depth = + TraceContext.( + update_with_baggage depth_key (string_of_int depth) extra_context + ) + in let context : SpanContext.t = - {trace_id; span_id; trace_context= extra_context} + {trace_id; span_id; trace_context= extra_context_with_depth} in let context = - (* If trace_context is provided to the call, override any inherited trace context. *) - trace_context - |> Option.fold ~none:context - ~some:(Fun.flip SpanContext.with_trace_context context) + (* If trace_context is provided to the call, override any inherited trace + context except span.depth which should still be maintained. *) + match trace_context with + | Some tc -> + let tc_with_depth = + TraceContext.(update_with_baggage depth_key (string_of_int depth) tc) + in + SpanContext.with_trace_context tc_with_depth context + | None -> + context in (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in @@ -473,6 +503,11 @@ module Spans = struct let set_max_traces x = Atomic.set max_traces x + (* Default is much larger than the largest current traces, so effectively off *) + let max_depth = Atomic.make 100 + + let set_max_depth x = Atomic.set max_depth x + let finished_spans = Atomic.make ([], 0) let span_hashtbl_is_empty () = TraceMap.is_empty (Atomic.get spans) @@ -713,12 +748,18 @@ module Tracer = struct let get_tracer ~name:_ = TracerProvider.get_current () let span_of_span_context context name : Span.t = + let tc = SpanContext.context_of_span_context context in + let new_depth = TraceContext.baggage_depth_of tc in + let new_tc = + TraceContext.(update_with_baggage depth_key (string_of_int new_depth) tc) + in + let context = SpanContext.with_trace_context new_tc context in { context ; status= {status_code= Status.Unset; _description= None} ; name ; parent= None - ; span_kind= SpanKind.Client (* This will be the span of the client call*) + ; span_kind= SpanKind.Client (* This will be the span of the client call *) ; begin_time= Unix.gettimeofday () ; end_time= None ; links= [] @@ -730,10 +771,23 @@ module Tracer = struct ?(span_kind = SpanKind.Internal) ~name ~parent () : (Span.t option, exn) result = let open TracerProvider in - (* Do not start span if the TracerProvider is disabled*) + let parent_depth = + Option.fold ~none:1 + ~some:(fun parent -> + parent.Span.context + |> SpanContext.context_of_span_context + |> TraceContext.baggage_depth_of + ) + parent + in + (* Do not start span if the TracerProvider is disabled *) if not t.enabled then + ok_none (* Do not start span if the max depth has been reached *) + else if parent_depth >= Atomic.get Spans.max_depth then ( + let parent_trace_id = Option.fold ~none:"None" ~some:(fun p -> p.Span.context |> SpanContext.span_id_of_span_context |> Span_id.to_string) parent in + debug "Max_span_depth limit reached, not creating span %s (parent %s)" name parent_trace_id ; ok_none - else + ) else let attributes = Attributes.merge_into t.attributes attributes in let span = Span.start ~attributes ?trace_context ~name ~parent ~span_kind () @@ -750,8 +804,17 @@ module Tracer = struct |> Spans.remove_from_spans |> Option.map (fun existing_span -> let old_context = Span.get_context existing_span in + let parent_trace_context = Span.get_trace_context parent in + let new_depth = + TraceContext.baggage_depth_of parent_trace_context + 1 + in let new_context : SpanContext.t = - let trace_context = span.Span.context.trace_context in + let trace_context = + TraceContext.( + update_with_baggage depth_key (string_of_int new_depth) + span.Span.context.trace_context + ) + in SpanContext.context (SpanContext.trace_id_of_span_context parent.context) old_context.span_id @@ -759,7 +822,6 @@ module Tracer = struct in let updated_span = {existing_span with parent= Some parent} in let updated_span = {updated_span with context= new_context} in - let () = Spans.add_to_spans ~span:updated_span in updated_span ) @@ -926,7 +988,15 @@ module Propagator = struct let trace_context' = TraceContext.with_traceparent (Some traceparent) trace_context in - let carrier' = P.inject_into trace_context' carrier in + let new_depth = + TraceContext.baggage_depth_of trace_context' + 1 |> string_of_int + in + let trace_context'' = + TraceContext.( + update_with_baggage depth_key new_depth trace_context' + ) + in + let carrier' = P.inject_into trace_context'' carrier in f carrier' | _ -> f carrier diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index 8323346a443..ec33f4ac5ff 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -165,6 +165,8 @@ module Spans : sig val set_max_traces : int -> unit + val set_max_depth : int -> unit + val span_count : unit -> int val since : unit -> Span.t list * int diff --git a/ocaml/libs/tracing/tracing_export.ml b/ocaml/libs/tracing/tracing_export.ml index 1162202b611..3da46fe1d90 100644 --- a/ocaml/libs/tracing/tracing_export.ml +++ b/ocaml/libs/tracing/tracing_export.ml @@ -24,6 +24,10 @@ let export_interval = ref 30. let set_export_interval t = export_interval := t +let export_chunk_size = Atomic.make 10000 + +let set_export_chunk_size x = Atomic.set export_chunk_size x + let host_id = ref "localhost" let set_host_id id = host_id := id @@ -289,6 +293,22 @@ module Destination = struct with exn -> debug "Tracing: unable to export span : %s" (Printexc.to_string exn) + (*TODO move this into Spans.since itself? *) + let rec span_info_chunks span_info batch_size = + let rec list_to_chunks_inner l n curr chunks = + if n = 0 then + if List.length l > 0 then + list_to_chunks_inner l batch_size [] ((curr, batch_size) :: chunks) + else + (curr, batch_size) :: chunks + else + match l with + | [] -> (curr, List.length curr) :: chunks + | h :: t -> + list_to_chunks_inner t (n-1) (h :: curr) chunks + in + list_to_chunks_inner (fst span_info) batch_size [] [] + let flush_spans () = let ((_span_list, span_count) as span_info) = Spans.since () in let attributes = [("export.traces.count", string_of_int span_count)] in @@ -296,10 +316,14 @@ module Destination = struct with_tracing ~span_kind:Server ~trace_context:TraceContext.empty ~parent:None ~attributes ~name:"Tracing.flush_spans" in - TracerProvider.get_tracer_providers () + let endpoints = TracerProvider.get_tracer_providers () |> List.filter TracerProvider.get_enabled |> List.concat_map TracerProvider.get_endpoints - |> List.iter (export_to_endpoint parent span_info) + in + let span_info_chunks = span_info_chunks span_info (Atomic.get export_chunk_size) in + List.iter (fun s_i -> + List.iter (export_to_endpoint parent s_i) endpoints + ) span_info_chunks let delay = Delay.make () diff --git a/ocaml/libs/tracing/tracing_export.mli b/ocaml/libs/tracing/tracing_export.mli index f322bd2404c..a857a4f523d 100644 --- a/ocaml/libs/tracing/tracing_export.mli +++ b/ocaml/libs/tracing/tracing_export.mli @@ -23,6 +23,13 @@ val set_export_interval : float -> unit Default is every [30.] seconds. *) +val set_export_chunk_size : int -> unit +(** [set_export_chunk_size size] sets the maximum number of finished spans that + can be exported in one chunk to [size]. + + Default is 10000 spans. + *) + val set_host_id : string -> unit (** [set_host_id id] sets the id of the host to [id]. diff --git a/ocaml/tests/test_observer.ml b/ocaml/tests/test_observer.ml index 2e2f8e6aa29..07d746e81c0 100644 --- a/ocaml/tests/test_observer.ml +++ b/ocaml/tests/test_observer.ml @@ -305,6 +305,7 @@ let verify_json_fields_and_values ~json = ; ("xs.host.uuid", `String _) ; ("xs.host.name", `String _) ; ("service.name", `String _) + ; ("span.depth", `String _) ] ) ; ("annotations", `List _) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index 490045f871f..f4b8c155157 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1051,10 +1051,14 @@ let trace_log_dir = ref "/var/log/dt/zipkinv2/json" let export_interval = ref 30. +let export_chunk_size = ref 10000 + let max_spans = ref 10000 let max_traces = ref 10000 +let max_span_depth = ref 100 + let use_xmlrpc = ref true let compress_tracing_files = ref true @@ -1671,6 +1675,11 @@ let other_options = , (fun () -> string_of_float !export_interval) , "The interval for exports in Tracing" ) + ; ( "export-chunk-size" + , Arg.Set_int export_chunk_size + , (fun () -> string_of_int !export_chunk_size) + , "The span chunk size for exports in Tracing" + ) ; ( "max-spans" , Arg.Set_int max_spans , (fun () -> string_of_int !max_spans) @@ -1778,6 +1787,11 @@ let other_options = , (fun () -> string_of_float !vm_sysprep_wait) , "Time in seconds to wait for VM to recognise inserted CD" ) + ; ( "max-span-depth" + , Arg.Set_int max_span_depth + , (fun () -> string_of_int !max_span_depth) + , "The maximum depth to which spans are recorded in a trace in Tracing" + ) ] (* The options can be set with the variable xapiflags in /etc/sysconfig/xapi. diff --git a/ocaml/xapi/xapi_observer.ml b/ocaml/xapi/xapi_observer.ml index 62d3ea4359c..7a7163ff42f 100644 --- a/ocaml/xapi/xapi_observer.ml +++ b/ocaml/xapi/xapi_observer.ml @@ -599,6 +599,8 @@ let initialise_observer ~__context component = initialise_observer_component ~__context component let initialise ~__context = + Tracing.Spans.set_max_depth !Xapi_globs.max_span_depth ; + Tracing_export.set_export_chunk_size !Xapi_globs.export_chunk_size ; List.iter (initialise_observer_meta ~__context) (startup_components ()) ; Db.Observer.get_all ~__context |> List.iter (fun self ->