From 1b6224563519cd08d8367c4d80e08c8661470741 Mon Sep 17 00:00:00 2001
From: MadivB <67230306+MadivB@users.noreply.github.com>
Date: Tue, 6 Jan 2026 15:59:09 -0800
Subject: [PATCH 1/2] Create flow_validation_v2.py

---
 run-validation/flow_validation_v2.py | 693 +++++++++++++++++++++++++++
 1 file changed, 693 insertions(+)
 create mode 100644 run-validation/flow_validation_v2.py

diff --git a/run-validation/flow_validation_v2.py b/run-validation/flow_validation_v2.py
new file mode 100644
index 0000000..cd5708d
--- /dev/null
+++ b/run-validation/flow_validation_v2.py
@@ -0,0 +1,693 @@
+#!/usr/bin/env python
+
+import matplotlib as mlp
+import matplotlib.pyplot as plt
+import numpy as np
+import h5py
+import argparse
+from matplotlib.backends.backend_pdf import PdfPages
+
+from validation_utils import rasterize_plots
+rasterize_plots()
+
+SPILL_PERIOD = 1.2e7 # units = ticks
+RESET_PERIOD = 1.0e7 # units = ticks
+
+def main(flow_file, charge_only):
+
+    flow_h5 = h5py.File(flow_file,'r')
+    plt.rcParams["figure.figsize"] = (10,8)
+
+    print('\n----------------- File content -----------------')
+    print('File:',flow_file)
+    print('Keys in file:',list(flow_h5.keys()))
+    for key in flow_h5.keys():
+        print('Number of',key,'entries in file:', len(flow_h5[key]))
+        if isinstance(flow_h5[key],h5py.Group):
+            for key2 in flow_h5[key].keys():
+                full_key = key+'/'+key2+'/data'
+                if full_key in flow_h5:
+                    print('  ** ',full_key,'entries in file:', len(flow_h5[full_key]))
+    print('------------------------------------------------\n')
+
+    output_pdf_name = flow_file.split('.h5')[0]+'_validations.pdf'
+    # temperarily, put output in this directory, not the same as the
+    # simulation file itself
+    output_pdf_name = output_pdf_name.split('/')[-1] # !!
+    with PdfPages(output_pdf_name) as output:
+
+        hits = flow_h5['/charge/calib_prompt_hits/data']
+        final_hits = flow_h5['/charge/calib_final_hits/data']
+
+        print("Making light plots")
+        if not charge_only:
+           ### Event display
+           sipm_hits_data = flow_h5['light/sipm_hits/data']
+
+           # Extracting channel IDs and maximum values
+           channel_ids = sipm_hits_data['chan'][:]
+           max_values = sipm_hits_data['max'][:]
+           pos_data = sipm_hits_data['pos'][:]
+           z_coordinates = pos_data[:, 2]
+           x_coordinates = pos_data[:, 0]
+           y_coordinates = pos_data[:, 1]
+
+           unique_x, xcounts = np.unique(x_coordinates, return_counts=True)
+           unique_y, ycounts = np.unique(y_coordinates, return_counts=True)
+           unique_z, zcounts = np.unique(z_coordinates, return_counts=True)
+
+           plt.figure(figsize=(10, 6))
+           plt.bar(unique_x, xcounts)
+           plt.xlabel('X-coordinate')
+           plt.ylabel('Counts')
+           plt.title('X-coordinate')
+           plt.grid(True)
+           output.savefig()  
+           plt.close()
+
+           plt.figure(figsize=(10, 6))
+           plt.bar(unique_y, ycounts)
+           plt.xlabel('Y-coordinate')
+           plt.ylabel('Counts')
+           plt.title('Y-coordinate')
+           plt.grid(True)
+           output.savefig()  
+           plt.close()
+ 
+           plt.figure(figsize=(10, 6))
+           plt.bar(unique_z, zcounts)
+           plt.xlabel('Z-coordinate')
+           plt.ylabel('Counts')
+           plt.title('Z-coordinate')
+           plt.grid(True)
+           output.savefig()  
+           plt.close()
+
+
+           # Plot scatter plot for channel IDs versus maximum values
+           plt.figure(figsize=(10, 6))
+           plt.scatter(channel_ids, max_values, marker='.', color='blue')
+           plt.xlabel('Channel ID')
+           plt.ylabel('Maximum Value')
+           plt.title('Maximum Values vs. Channel IDs')
+           plt.grid(True)
+           plt.xlim(0, 64)
+           output.savefig()  
+           plt.close()    
+
+           # Plot density of channel IDs versus maximum values
+           plt.figure(figsize=(10, 6))
+           plt.hist2d(channel_ids, max_values, bins=[64, 70], range=[[0, 64], [np.min(max_values), np.max(max_values)]], cmap='viridis', cmin=1)
+           plt.colorbar(label='Counts')
+           plt.xlabel('Channel ID')
+           plt.ylabel('Maximum Value')
+           plt.title('Density of Maximum Values vs. Channel IDs')
+           # plt.grid(True) # Grid can be distracting on 2D hist, but user likes grids. Leaving it off for cleaner density map usually, or overlay. I will omit grid for density map clarity unless requested.
+           output.savefig()
+           plt.close()
+
+           sum_hits_data = flow_h5['light/sum_hits/data']
+           tpc_values = sum_hits_data['tpc'][:]
+           det_values = sum_hits_data['det'][:]
+           unique_tpc, counts_tpc = np.unique(tpc_values, return_counts=True)
+           unique_det, counts_det = np.unique(det_values, return_counts=True)
+
+           plt.figure(figsize=(8, 6))
+           plt.bar(unique_tpc, counts_tpc, color='purple')
+           plt.xlabel('tpc_values')
+           plt.ylabel('Counts')
+           plt.title(f'Histogram of TPC index')
+           plt.grid(True)
+           output.savefig()  
+           plt.close()    
+
+           plt.figure(figsize=(8, 6))
+           plt.bar(unique_det, counts_det, color='purple')
+           plt.xlabel('det_values')
+           plt.ylabel('Counts')
+           plt.title(f'Histogram of detector index')
+           plt.grid(True)
+           output.savefig()  
+           plt.close()   
+
+
+        print("Making a 3D plot of all spills")
+        # 3D - all spills
+        fig = plt.figure(figsize=(10,10))
+        ax = fig.add_subplot(projection='3d')
+        ax.set_facecolor('none')
+        fig.tight_layout()        
+        dat = ax.scatter(hits['z'],hits['x'],
+                   hits['y'],c=hits['Q'],
+                   s=1,cmap='viridis',norm=mlp.colors.LogNorm())
+                   #norm=mpl.colors.LogNorm())#, cmap='Greys')
+        fig.colorbar(dat,ax=ax,label="detected charge",shrink=0.5)
+        ax.set_title("charge hits",fontsize=20)
+        ax.set_xlabel('z [cm]')
+        ax.set_ylabel('x [cm]')
+        ax.set_zlabel('y [cm]')
+        #ax.set_xlim([-65.,65])
+        #ax.set_ylim([-65.,65])
+        #ax.set_zlim([-65.,65])
+        del ax, fig
+        output.savefig()
+        plt.close()
+
+        print("Making 2D plots of all spills")
+        # 2D hit projections
+        fig = plt.figure(figsize=(10,6))
+        gs  = fig.add_gridspec(1,3)
+        ax1 = fig.add_subplot(gs[0,0],aspect=1.0)
+        ax2 = fig.add_subplot(gs[0,1],aspect=1.0)
+        ax3 = fig.add_subplot(gs[0,2],aspect=1.0)
+
+        io_group_count = 1
+        io_groups_uniq = set(hits['io_group'])
+        for iog in io_groups_uniq:
+            iog_mask = hits['io_group'] == iog
+            iog_hits = hits[iog_mask]      
+            ax1.scatter(iog_hits['z'],iog_hits['y'],s=0.5,alpha=0.1,label='IO Group '+str(iog))
+            ax1.set_xlabel(r'z [cm]')
+            ax1.set_ylabel(r'y [cm]')
+
+            ax2.scatter(iog_hits['x'],iog_hits['z'],s=0.5,alpha=0.1)#,label='IO Group'+str(iog))
+            ax2.set_xlabel(r'x [cm]')
+            ax2.set_ylabel(r'z [cm]')
+
+            ax3.scatter(iog_hits['x'],iog_hits['y'],s=0.5,alpha=0.1)#,label='IO Group'+str(iog))
+            ax3.set_xlabel(r'x [cm]')
+            ax3.set_ylabel(r'y [cm]')
+
+            if io_group_count % 8 == 0 or io_group_count == len(io_groups_uniq):
+                leg = fig.legend(bbox_to_anchor=(0.2, 0.8), loc='lower left', ncols=4, markerscale=10.,fontsize=13)
+                for lh in leg.legend_handles:
+                    lh.set_alpha(1)
+                plt.tight_layout()
+                output.savefig()
+                plt.close()
+
+                fig = plt.figure(figsize=(10,6))
+                gs  = fig.add_gridspec(1,3)
+                ax1 = fig.add_subplot(gs[0,0],aspect=1.0)
+                ax2 = fig.add_subplot(gs[0,1],aspect=1.0)
+                ax3 = fig.add_subplot(gs[0,2],aspect=1.0)
+                
+            io_group_count += 1
+
+        print("Making 1D hit distributions plots")
+        ### Hit level 1D position distributions
+        fig = plt.figure(figsize=(10,10),layout="constrained")
+        gs = fig.add_gridspec(2,2)
+        ax_x = fig.add_subplot(gs[0,0])
+        ax_y = fig.add_subplot(gs[0,1])
+        ax_z = fig.add_subplot(gs[1,0])
+        #fig.title('/charge/calib_prompt_hits/data')
+
+        ax_z.hist(hits['z'],bins=298) # assuming hits at min/max z, each bin is approx 1 pixel = 0.434 cm
+        ax_z.set_xlabel('z [cm]',fontsize=16)
+        ax_z.set_ylabel('number of hits per bin',fontsize=16)
+        ax_z.set_yscale('log')
+        #ax_z.set_ylim([1,2e3])
+
+        ax_y.hist(hits['y'],bins=287) # assuming hits at min/max y, each bin is approx 1 pixel = 0.434 cm
+        ax_y.set_xlabel('y [cm]',fontsize=16)
+        ax_y.set_ylabel('number of hits per bin',fontsize=16)
+        ax_y.set_yscale('log')
+        #ax_y.set_ylim([1,2e3])
+
+        ax_x.hist(hits['x'],bins=256) # assuming hits at min/max x, each bin is approx 0.5 cm
+        ax_x.set_xlabel('x [cm]',fontsize=16)
+        ax_x.set_ylabel('number of hits per bin',fontsize=16)
+        ax_x.set_yscale('log')
+        #ax_x.set_ylim([1,2e3])
+        output.savefig()
+        plt.close()
+
+        print("Making plot of io_group contributions from each spill")
+        # io_group contribution for each spill
+        n_evts = len(flow_h5['charge/events/ref/charge/calib_final_hits/ref_region'])
+        io_group_contrib = np.zeros(shape=(n_evts,len(io_groups_uniq)))
+        for a in range(n_evts):
+            hit_ref_slice = flow_h5['charge/events/ref/charge/calib_final_hits/ref_region'][a]
+            spill_hits = final_hits[hit_ref_slice[0]:hit_ref_slice[1]]
+            event_charge = np.sum(spill_hits['Q'])
+            if event_charge==0:
+                plt.close()
+                continue
+            for iog in io_groups_uniq:
+                iog_mask = spill_hits['io_group'] == iog
+                iog_hits = spill_hits[iog_mask]
+                iog_evt_charge = 0.
+                if len(iog_hits) > 0:
+                    iog_evt_charge = np.sum(iog_hits['Q'])
+                io_group_contrib[a][int(iog-1)] = float(iog_evt_charge)/float(event_charge)
+
+        fig = plt.figure(figsize=(10,8))
+        ax = fig.add_subplot()
+        ax.set_facecolor('none')
+        bottom = np.zeros(n_evts)
+        for iog in io_groups_uniq:
+            iog_cont = (io_group_contrib[:,int(iog-1):iog]).flatten()
+            ax.bar(range(n_evts), iog_cont, bottom=bottom, label='IO Group '+str(iog), width=1.0)
+            bottom += iog_cont
+        #ax.legend(ncols=4,fontsize=13)
+        ax.set_xlabel('charge event number',fontsize=18)
+        ax.set_ylabel('io_group contribution',fontsize=18)
+        #ax.set_ylim([-0.1,1.5])
+        del ax, fig
+        output.savefig()
+        plt.close()
+
+        print("Making plot comparing reco event ID vs true event ID")
+        # true spill ID vs. reconstructed charge event
+        fig = plt.figure(figsize=(10,8))
+        ax = fig.add_subplot()
+        flow_evt_to_hit = flow_h5['/charge/events/ref/charge/calib_final_hits/ref_region']
+        flow_evts = flow_h5['/charge/events/data']
+        final_hits = flow_h5['/charge/calib_final_hits/data']
+        hits_bt = flow_h5['mc_truth/calib_final_hit_backtrack/data']
+        segments = flow_h5['mc_truth/segments/data']
+
+        # Construct a lookup array from segment ID to segment index :`(
+        min_segment_id = np.min(segments['segment_id'])
+        max_segment_id = np.max(segments['segment_id'])
+        n_segment_ids = max_segment_id - min_segment_id + 1
+        segment_indices = np.array([-1]*n_segment_ids, dtype=np.int32)
+        for i, seg_id in enumerate(segments['segment_id']):
+            segment_indices[seg_id - min_segment_id] = i
+
+        true_ids = []
+        reco_ids = []
+        for c_evt in flow_evts['id']:
+            hit_ref_slice = flow_evt_to_hit[c_evt]
+            hts = final_hits[hit_ref_slice[0]:hit_ref_slice[1]]
+            hts_bt = hits_bt[hit_ref_slice[0]:hit_ref_slice[1]]
+            spill_ids = {}
+            for h in hts_bt:
+                for cont in range(len(h['fraction'])):
+                    if abs(h['fraction'][cont]) > 0.0001:
+                        seg_id = h['segment_ids'][cont]
+                        seg = segments[segment_indices[seg_id - min_segment_id]]
+                        if not seg['segment_id'] == seg_id:
+                            print('WARNING: segment id not the same as segment index!')
+                        sid = seg['event_id']
+                        if not sid in spill_ids.keys():
+                            spill_ids[sid] = 1
+                        else: spill_ids[sid] += 1
+            reco_ids.extend([c_evt]*len(spill_ids))
+            true_ids.extend(spill_ids)
+        ax.scatter(reco_ids,true_ids)
+        ax.set_xlabel('reco event ID',fontsize=18)
+        ax.set_ylabel('true event ID',fontsize=18)
+        del ax, fig
+        output.savefig()
+        plt.close()
+
+        print("Making 3D plot of selected spills")
+        # 3D - several individual spills
+        for i in range(int(np.ceil(n_evts/9))):
+            fig = plt.figure(figsize=(10,15),layout="constrained")
+            gs = fig.add_gridspec(3,3)
+            ax = []
+            for a in range(9):
+                if 9*i + a >= n_evts:
+                    break
+                hit_ref_slice = flow_h5['charge/events/ref/charge/calib_final_hits/ref_region'][9*i + a]
+                spill_hits = final_hits[hit_ref_slice[0]:hit_ref_slice[1]]
+                ax.append(fig.add_subplot(gs[a//3,a%3],projection='3d'))
+                dat = ax[a].scatter(spill_hits['z'],spill_hits['x'],
+                           spill_hits['y'],c=spill_hits['Q'],
+                           s=1,cmap='viridis',norm=mlp.colors.LogNorm())
+                #cb = fig.colorbar(dat, ax=ax[a], label="detected charge",
+                #                  shrink=0.5, location='left', pad = 0.)
+                #cb.ax.yaxis.set_ticks([matplotlib.ticker.FixedLocator([])])
+                ax[a].set_title(f"spill {9*i + a + 1}",fontsize=12)
+                ax[a].set_xlabel('z [cm]')
+                ax[a].set_ylabel('x [cm]')
+                ax[a].set_zlabel('y [cm]')
+                #ax[a].set_xlim([-65.,65])
+                #ax[a].set_ylim([-65.,65])
+                #ax[a].set_zlim([-65.,65])
+            output.savefig()
+            plt.close()
+
+
+        print("Making event timing and nhits plot")
+        ### charge/event information
+        fig = plt.figure(figsize=(10,6))
+        gs  = fig.add_gridspec(3,1)
+        # share x-axis = event id
+        ax1 = fig.add_subplot(gs[0,0])
+        ax2 = fig.add_subplot(gs[1,0],sharex=ax1)
+        ax3 = fig.add_subplot(gs[2,0],sharex=ax1)
+        fig.subplots_adjust(left=0.15,bottom=0.1,wspace=None, hspace=0.)
+        event_data = flow_h5['charge/events/data']
+        ax1.plot(event_data['id'],event_data['unix_ts'],linestyle='None',marker='o',ms=3)
+        ax1.set_ylim([0,500])
+        ax1.set_ylabel('unix_ts',fontsize=18)
+        ax1.set_title('charge/events/data',fontsize=18)
+        ax1.grid()
+        ax2.plot(event_data['id'],event_data['ts_start'],linestyle='None',marker='o',ms=3)
+        ax2.set_ylabel('ts_start',fontsize=18)
+        ax2.grid()
+        ax3.plot(event_data['id'],event_data['nhit'],linestyle='None',marker='o',ms=3)
+        ax3.set_ylabel('nhit',fontsize=18)
+        ax3.set_xlabel('event ID',fontsize=18)
+        ax3.grid()
+        ax1.tick_params(labelbottom=False)
+        ax2.tick_params(labelbottom=False)
+        output.savefig()
+        plt.close()
+
+        # get the packet data, references to calib_prompt_hits, and create some masks:
+        packets = flow_h5['/charge/packets/data']
+        packets_hits_ref = flow_h5['charge/calib_prompt_hits/ref/charge/packets/ref']
+        packets_hits = packets[:][packets_hits_ref[:,1]]
+        packet_index = np.array(list(range(0,len(packets))))
+        data_packet_mask = packets['packet_type'] == 0
+        trig_packet_mask = packets['packet_type'] == 7
+        sync_packet_mask = packets['packet_type'] == 4
+        other_packet_mask= ~(data_packet_mask | trig_packet_mask | sync_packet_mask)
+
+        print ("Plotting time structure of packets")
+        ### Plot time structure of packets: 
+        plt.clf()
+        plt.plot(packets['timestamp'][data_packet_mask],packet_index[data_packet_mask],'o',label='data packets',linestyle='None')
+        plt.plot(packets['timestamp'][trig_packet_mask],packet_index[trig_packet_mask],'o',label='lrs triggers',linestyle='None')
+        plt.plot(packets['timestamp'][sync_packet_mask],packet_index[sync_packet_mask],'o',label='PPS packets',linestyle='None')
+        plt.plot(packets['timestamp'][other_packet_mask],packet_index[other_packet_mask],'o',label='other',linestyle='None')
+        plt.title('/charge/packets/data')
+        plt.xlabel('timestamp')
+        plt.ylabel('packet index')
+        plt.legend()
+        output.savefig()
+        plt.close()
+
+        plt.hist(packets['timestamp'],bins=100)
+        plt.title('/charge/packets/data')
+        plt.xlabel('timestamp')
+        plt.ylabel('number of packets per bin')
+        output.savefig()
+        plt.close()
+
+        # New 2D plot: Packet Type vs Timestamp
+        plt.figure(figsize=(10, 8))
+        plt.hist2d(packets['timestamp'], packets['packet_type'], bins=[100, 8], range=[[min(packets['timestamp']), max(packets['timestamp'])], [0, 8]], cmap='viridis', cmin=1)
+        plt.colorbar(label='Number of Packets')
+        plt.title('Packet Type vs Timestamp')
+        plt.xlabel('Timestamp')
+        plt.ylabel('Packet Type')
+        plt.yticks(range(8))
+        output.savefig()
+        plt.close()
+
+        plt.plot(packets['receipt_timestamp'][data_packet_mask],packet_index[data_packet_mask],'o',label='data packets',linestyle='None')
+        plt.plot(packets['receipt_timestamp'][trig_packet_mask],packet_index[trig_packet_mask],'o',label='lrs triggers',linestyle='None')
+        plt.plot(packets['receipt_timestamp'][sync_packet_mask],packet_index[sync_packet_mask],'o',label='PPS packets',linestyle='None')
+        plt.plot(packets['receipt_timestamp'][other_packet_mask],packet_index[other_packet_mask],'o',label='other',linestyle='None')
+        plt.title('/charge/packets/data')
+        plt.xlabel('receipt_timestamp')
+        plt.ylabel('packet index')
+        plt.legend()
+        output.savefig()
+        plt.close()
+
+        plt.hist(packets['receipt_timestamp'],bins=100)
+        plt.title('/charge/packets/data')
+        plt.xlabel('receipt_timestamp')
+        plt.ylabel('number of packets per bin')
+        output.savefig()
+        plt.close()
+
+        ### Plot charge vs. time per io_group/tpc
+        packets_stack = []
+        weights_stack = []
+        for iog in io_groups_uniq:
+            iog_mask = (packets['io_group'] == iog) & data_packet_mask
+            packets_stack.append(packets['timestamp'][iog_mask]%(SPILL_PERIOD%RESET_PERIOD))
+            weights_stack.append(packets['dataword'][iog_mask])
+        plt.hist(packets_stack, weights=weights_stack, stacked=True, bins=200, label='io_group '+str(iog),alpha=0.5)
+        plt.title('/charge/packets/data')
+        plt.xlabel('timestamp%(spill_period%reset_period)')
+        plt.ylabel('charge [ADC]')
+        #plt.legend(ncol=4,bbox_to_anchor=(-0.05,1.00),loc='lower left')
+        output.savefig()
+        plt.close()
+
+        print("Plotting comparisons of prompt vs final hits")
+        # comparisons of prompt hits and final hits
+        # currently correspondes to comparisons before and after merging multi-hits
+        fig = plt.figure(figsize=(10,8),layout='constrained')
+        gs = fig.add_gridspec(2,3)
+        ax1 = fig.add_subplot(gs[0,0])
+        ax2 = fig.add_subplot(gs[0,1])
+        ax3 = fig.add_subplot(gs[0,2])
+        ax4 = fig.add_subplot(gs[1,0])
+        ax5 = fig.add_subplot(gs[1,1])
+        ax6 = fig.add_subplot(gs[1,2])
+        
+        ax1.hist(flow_h5['charge/calib_prompt_hits/data']['x'],bins=100,alpha=0.5,label='prompt hits')
+        ax1.hist(flow_h5['charge/calib_final_hits/data']['x'],bins=100,alpha=0.5,label='merged hits')
+        ax1.set_xlabel('x [cm]')
+        ax1.set_ylabel('N hits')
+        #ax1.legend()
+        
+        ax2.hist(flow_h5['charge/calib_prompt_hits/data']['y'],bins=100,alpha=0.5,label='prompt hits')
+        ax2.hist(flow_h5['charge/calib_final_hits/data']['y'],bins=100,alpha=0.5,label='merged hits')
+        ax2.set_xlabel('y [cm]')
+        ax2.set_ylabel('N hits')
+        ax2.legend()
+        
+        ax3.hist(flow_h5['charge/calib_prompt_hits/data']['z'],bins=100,alpha=0.5,label='prompt hits')
+        ax3.hist(flow_h5['charge/calib_final_hits/data']['z'],bins=100,alpha=0.5,label='merged hits')
+        ax3.set_xlabel('z [cm]')
+        ax3.set_ylabel('N hits')
+        #ax3.legend()
+        
+        ax4.hist(flow_h5['charge/calib_prompt_hits/data']['x'],weights=flow_h5['charge/calib_prompt_hits/data']['E'],bins=100,alpha=0.5,label='prompt hits')
+        ax4.hist(flow_h5['charge/calib_final_hits/data']['x'],weights=flow_h5['charge/calib_final_hits/data']['E'],bins=100,alpha=0.5,label='merged hits')
+        ax4.set_xlabel('x [cm]')
+        ax4.set_ylabel('Energy [MeV]')
+        #ax4.legend()
+        
+        ax5.hist(flow_h5['charge/calib_prompt_hits/data']['y'],weights=flow_h5['charge/calib_prompt_hits/data']['E'],bins=100,alpha=0.5,label='prompt hits')
+        ax5.hist(flow_h5['charge/calib_final_hits/data']['y'],weights=flow_h5['charge/calib_final_hits/data']['E'],bins=100,alpha=0.5,label='merged hits')
+        ax5.set_xlabel('y [cm]')
+        ax5.set_ylabel('Energy [MeV]')
+        #ax5.legend()
+        
+        ax6.hist(flow_h5['charge/calib_prompt_hits/data']['z'],weights=flow_h5['charge/calib_prompt_hits/data']['E'],bins=100,alpha=0.5,label='prompt hits')
+        ax6.hist(flow_h5['charge/calib_final_hits/data']['z'],weights=flow_h5['charge/calib_final_hits/data']['E'],bins=100,alpha=0.5,label='merged hits')
+        ax6.set_xlabel('z [cm]')
+        ax6.set_ylabel('Energy [MeV]')
+        #ax6.legend()
+        
+        output.savefig()
+        plt.close()
+        
+        #ax4.hist(flow_h5['charge/calib_prompt_hits/data']['ts_pps'],bins=100,alpha=0.5,label='prompt hits')
+        #ax4.hist(flow_h5['charge/calib_final_hits/data']['ts_pps'],bins=100,alpha=0.5,label='merged hits')
+        #ax4.set_xlabel('ts_pps [ticks = 0.1 us]')
+        #ax4.set_ylabel('N hits')
+        #ax4.legend()
+
+        # Add if statement for io_group hit dataset validations for backwards compatibility
+        if 'io_group' in list(hits.dtype.names):
+            fig = plt.figure(figsize=(10,8), layout='constrained')
+            gs = fig.add_gridspec(2,1)
+            ax1 = fig.add_subplot(gs[0,0])
+            ax2 = fig.add_subplot(gs[1,0])
+
+            # Histograms of io_group in hits and packet datasets
+            #ax1.set_ylim(0,30000)
+            ax1.hist(packets_hits['io_group'], 
+                     label="packets", bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1), 
+                     alpha=1.0, color='#377eb8', 
+                     edgecolor='#377eb8', linestyle='-')
+            ax1.hist(hits['io_group'], 
+                     label="calib_prompt_hits", bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1),
+                     alpha=1.0, color='#ff7f00', 
+                     edgecolor='#ff7f00', linestyle='-', 
+                     linewidth=1.5,fill=False)
+            ax1.hist(final_hits['io_group'], 
+                     label="calib_final_hits", bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1),
+                     alpha=0.8, color='#4daf4a', 
+                     edgecolor='#4daf4a', linestyle='--', 
+                     linewidth=1.5,fill=False)
+            ax1.set_title("Hits per IO Group Distribution in Different Datasets")
+            ax1.set_xlabel("IO Group")
+            ax1.set_ylabel("Hits / IO Group")
+            ax1.legend()
+
+            p_iog, p_iog_bins = np.histogram(hits['io_group'], bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1),)
+            f_iog, f_iog_bins = np.histogram(final_hits['io_group'], bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1),)
+            iog_resid = 100*(p_iog - f_iog)/p_iog
+            mean_iog_resid = np.mean(iog_resid)
+            final_prompt_resid = 100*(len(hits['io_group']) - len(final_hits['io_group']))/len(hits['io_group'])
+
+            ax2.set_ylim(0,np.max(iog_resid)+10)
+            ax2.plot(np.arange(len(io_groups_uniq)+1)+1, np.ones(len(io_groups_uniq)+1)*mean_iog_resid, \
+                     linestyle='--', color='blue', \
+                     label="Mean % Decr. over IO Groups ("+str(round(mean_iog_resid, 1))+"%)")
+            ax2.plot(np.arange(len(io_groups_uniq)+1)+1, np.ones(len(io_groups_uniq)+1)*final_prompt_resid, \
+                     linestyle='--', color='black', \
+                     label="Total % Decr. Prompt to Final Hits ("+str(round(final_prompt_resid, 1))+"%)")
+            ax2.hist(np.arange(len(io_groups_uniq))+1, weights=iog_resid, bins=len(io_groups_uniq), range=(1, len(io_groups_uniq)+1),
+                    alpha=1.0, color='#f781bf', 
+                    edgecolor='#f781bf', linestyle='-', 
+                    linewidth=1.5)
+            ax2.set_title("Percent Decrease in Hits per IO Group from Prompt to Final Hits")
+            ax2.set_xlabel("IO Group")
+            ax2.set_ylabel("Percent Decrease in Hits / IO Group")
+            ax2.legend()
+
+            output.savefig()
+            plt.close()
+
+        # Add if statement for io_channel hit dataset validations for backwards compatibility
+        if 'io_channel' in list(hits.dtype.names):
+            fig = plt.figure(figsize=(10,8), layout='constrained')
+            gs = fig.add_gridspec(2,1)
+            ax1 = fig.add_subplot(gs[0,0])
+            ax2 = fig.add_subplot(gs[1,0])
+
+            # Histograms of io_channel in hits datasets
+            io_channel_uniq = set(packets_hits['io_channel'])
+            #ax1.set_ylim(0,12000)
+            ax1.hist(packets_hits['io_channel'], 
+                     label="packets", bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1),
+                     alpha=1.0, color='#377eb8', 
+                     edgecolor='#377eb8', linestyle='-')
+            ax1.hist(hits['io_channel'], 
+                     label="calib_prompt_hits", bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1),
+                     alpha=1.0, color='#ff7f00', 
+                     edgecolor='#ff7f00', linestyle='-', 
+                     linewidth=1.5,fill=False)
+            ax1.hist(final_hits['io_channel'], 
+                     label="calib_final_hits", bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1), 
+                     alpha=0.8, color='#4daf4a', 
+                     edgecolor='#4daf4a', linestyle='--', 
+                     linewidth=1.5,fill=False)
+            ax1.set_title("Hits per IO Channel Distribution in Different Datasets")
+            ax1.set_xlabel("IO Channel")
+            ax1.set_ylabel("Hits / IO Channel")
+            ax1.legend()
+
+            p_io_channel, p_io_channel_bins = np.histogram(hits['io_channel'], bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1))
+            f_io_channel, f_io_channel_bins = np.histogram(final_hits['io_channel'], bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1))
+            io_channel_resid = 100* (p_io_channel - f_io_channel)/p_io_channel
+            mean_io_channel_resid = np.mean(io_channel_resid)
+
+            ax2.set_ylim(0,np.max(io_channel_resid)+10)
+            ax2.plot(np.arange(len(io_channel_uniq)+1)+1, np.ones(len(io_channel_uniq)+1)*mean_io_channel_resid, \
+                     linestyle='--', color='blue', \
+                     label="Mean % Decr. over IO Channels ("+str(round(mean_io_channel_resid, 1))+"%)")
+            ax2.plot(np.arange(len(io_channel_uniq)+1)+1, np.ones(len(io_channel_uniq)+1)*final_prompt_resid, \
+                     linestyle='--', color='black', \
+                     label="Total % Decr. Prompt to Final Hits ("+str(round(final_prompt_resid, 1))+"%)")
+            ax2.hist(np.arange(len(io_channel_uniq))+1, weights=io_channel_resid, bins=len(io_channel_uniq), range=(1, len(io_channel_uniq)+1),
+                     alpha=1.0, color='#f781bf', 
+                     edgecolor='#f781bf', linestyle='-', 
+                     linewidth=1.5)
+            ax2.set_title("Percent Decrease in Hits per IO Channel from Prompt to Final Hits")
+            ax2.set_xlabel("IO Channel")
+            ax2.set_ylabel("Percent Decrease in Hits / IO Channel")
+            ax2.legend()
+
+            output.savefig()
+            plt.close()
+
+        # Check the sums of the fraction field for charge deposition. They should add to 1.
+        if 'mc_truth' in flow_h5.keys():
+            print("Plotting packet fractions sum for each calib hit")
+            fractions = flow_h5['mc_truth/calib_prompt_hit_backtrack/data']['fraction']
+            summed_fractions = fractions.sum(axis=-1)
+            fig, ax = plt.subplots(constrained_layout = True)
+            ax.hist(summed_fractions, bins= np.arange(-0.05, 10, 0.1))
+            ax.set_title("Sum of packet fractions for each calib hit, range = [-0.05, 10]")
+            ax.set_yscale('log')
+            ax.set_xlabel("Sum")
+            ax.set_ylabel("Count")
+
+            output.savefig()
+            plt.close()
+
+            fig, ax = plt.subplots(constrained_layout = True)
+            ax.hist(summed_fractions, bins= 50)
+            ax.set_title("Sum of packet fractions for each calib hit")
+            ax.set_yscale('log')
+            ax.set_xlabel("Sum")
+            ax.set_ylabel("Count")
+
+            output.savefig()
+            plt.close() 
+
+            # Pie chart for packet fractions
+            epsilon = 1e-5
+            less_than_one = np.sum(summed_fractions < (1.0 - epsilon))
+            greater_than_one = np.sum(summed_fractions > (1.0 + epsilon))
+            exactly_one = len(summed_fractions) - less_than_one - greater_than_one
+            
+            labels = [f'< 1 ({less_than_one})', f'> 1 ({greater_than_one})', f'= 1 ({exactly_one})']
+            sizes = [less_than_one, greater_than_one, exactly_one]
+            colors = ['gold', 'lightcoral', 'lightskyblue']
+            explode = (0.1, 0.1, 0.1)  # explode all slices
+
+            fig, ax = plt.subplots(figsize=(8, 6))
+            ax.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
+                    shadow=True, startangle=140)
+            ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
+            ax.set_title("Proportion of hits with fraction sum < 1, > 1, and = 1")
+            output.savefig()
+            plt.close()
+
+            print("Plotting number of backtracked segments per calib hit")
+            bt = flow_h5['/mc_truth/calib_prompt_hit_backtrack/data']
+            n_backtracked = (bt['segment_ids']!=-1).sum(axis=-1)
+
+            fig, ax = plt.subplots(constrained_layout = True)
+            ax.hist(n_backtracked, bins= np.arange(-0.5, n_backtracked.max()+0.5, 1))
+            ax.set_title("Number of backtracked segments in each calib hit")
+            ax.set_xlabel("Number of backtracked segments")
+            ax.set_ylabel("Count")
+
+            output.savefig()
+            plt.close()
+
+            print("Plotting comparison of true segment position vs hit position")
+            segs = flow_h5['/mc_truth/segments/data']
+            bt = flow_h5['/mc_truth/calib_prompt_hit_backtrack/data']
+
+            bt_ids = bt['segment_ids'][:,0]
+            bt_mask = bt_ids != -1
+            hits = hits[bt_mask]
+            bt = bt[bt_mask]
+            bt_ids = bt_ids[bt_mask]
+
+            seg_ids = segs['segment_id']
+            min_seg_id = seg_ids.min()
+            nkeys = seg_ids.max() - min_seg_id + 1
+            seg_id2idx = np.full(nkeys, -1)
+            for idx, seg_id in enumerate(seg_ids):
+                seg_id2idx[seg_id - min_seg_id] = idx 
+
+            for field in ['x', 'y', 'z']:
+                fig, ax = plt.subplots(constrained_layout = True)
+                x_true = segs[field][seg_id2idx[bt_ids - min_seg_id]]
+                x_hits = hits[field]
+                ax.hist(x_true - x_hits, bins=100)
+                ax.set_yscale('linear')
+                ax.set_xlabel(f'$\Delta${field} [cm]')
+                ax.set_ylabel("Count")
+                ax.set_title("True backtracked segment position - Reco hit position: {}".format(field))
+                output.savefig()
+                plt.close()
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--flow_file', default=None, type=str,help='''string corresponding to the path of the ndlar_flow output file to be considered''')
+    parser.add_argument('--charge_only', action='store_true', help='''boolean to flag that light has not been simualted''')
+    args = parser.parse_args()
+    main(**vars(args))
+
+

From 42c6894b45b88f298b84f07af2b1c0da2e16a58b Mon Sep 17 00:00:00 2001
From: MadivB <67230306+MadivB@users.noreply.github.com>
Date: Tue, 6 Jan 2026 16:00:57 -0800
Subject: [PATCH 2/2] Update flow_validation_v2.py

---
 run-validation/flow_validation_v2.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/run-validation/flow_validation_v2.py b/run-validation/flow_validation_v2.py
index cd5708d..ece1446 100644
--- a/run-validation/flow_validation_v2.py
+++ b/run-validation/flow_validation_v2.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python
+#This script can be used to validate flow outputs for either 2x2 or ND-LAr full.
+#Example run: python flow_validation_v2.py --flow_file /global/cfs/cdirs/dune/www/data/2x2/simulation/productions/MiniRun6.4_1E19_RHC/MiniRun6.4_1E19_RHC.flow/FLOW/0000000/MiniRun6.4_1E19_RHC.flow.0000000.FLOW.hdf5
 
 import matplotlib as mlp
 import matplotlib.pyplot as plt