Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions fixedTarget/batch/ganga.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# -----------------------------------------------------
# Put the functions defined here into your ~/.ganga.py
# and it will be available in the Ganga namespace.
# Then you can just do mergeOutput(jobNo) and it will
# submit the job.
# -----------------------------------------------------

def args_list_to_dict(args_list):
result = {}
i = 0
while i < len(args_list):
key = str(args_list[i]).lstrip("-")
# Check if next item exists and is not another flag
if i + 1 < len(args_list) and isinstance(args_list[i + 1], str) and not args_list[i + 1].startswith("-"):
result[key] = args_list[i + 1]
i += 2
else:
# Flag without value → set to True
result[key] = True
i += 1
return result

def mergeOutput(jNo):

script_path = "/vols/lhcb/masmith/SHiP/htcondor_submission_scripts/fixedTarget/batch/" # set this to wherever you checked out the scripts package

j = Job(name = f"Merge output of production job {jNo}")
j.virtualization = Apptainer(image="/cvmfs/unpacked.cern.ch/registry.cern.ch/ship/gha-runner:latest/")
j.virtualization.mounts = {'/cvmfs':'/cvmfs', '/home/hep':'/home/hep'} # Adjust for your own setup
j.application = Executable(exe = File(script_path + 'wn_script.py'))
j.inputfiles = [LocalFile(script_path + "merger.py")]

# Copy over the args but change the run file from run_fixedTarget.py to merger
j_arg_dict = args_list_to_dict(jobs(jNo).application.args)
this_site = j_arg_dict["site"]
myargs = ['merger.py' if _a=='run_fixedTarget.py' else _a for _a in jobs(jNo).application.args]
Comment thread
mesmith75 marked this conversation as resolved.
myargs.insert(0,"--useLocalFile")
myargs.extend(['-j', jNo, '--prodSite', this_site])
myargs.append('-i')

# Now grab the files
if this_site == "GRIDPP": # Do something different for GRIDPP where the files are on Dirac
inputfiles = jobs(jNo).backend.getOutputDataAccessURLs(protocol='"root"')
config["DIRAC"]["userVO"] = 'gridpp'
config["DIRAC"]["allDiracSE"] = ["UKI-LT2-IC-HEP-disk"]
else:
inputfiles = []
for sj in jobs(jNo).subjobs.select(status="completed"):
for _f in sj.outputfiles:
if isinstance(_f, MassStorageFile):
inputfiles.append(_f.locations[0])
myargs.extend(inputfiles)
j.application.args = myargs

j.outputfiles = [DiracFile('pythia8_*.root')]
j.backend = Condor()
j.backend.cdf_options["+MaxRuntime"] = '3000' # Set this to something reasonable
j.submit()
32 changes: 19 additions & 13 deletions fixedTarget/batch/gangaScript.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,34 +19,40 @@
run_min = 600000000
run_max = 900000000

evtsPerJob = 800000 # 200000
nJ = 70 # in total want 14000 subjobs per week as a first try
nSJ = 200 # fixed number of subjobs per job to register all subjobs on rucio at the same time
evtsPerJob =10000# 500000# 800000 # 200000
nJ = 1# 70 # in total want 14000 subjobs per week as a first try
nSJ = 1#400# 200 # fixed number of subjobs per job to register all subjobs on rucio at the same time
ecut = 30 # 5

startRun = random.randint(run_min, run_max - nJ * nSJ)
totalEvts = evtsPerJob * nSJ

startRun = random.randint(run_min, run_max - nJ * nSJ)

for J in range(nJ):
j = Job(name = f'run fixed target production number {J} - {nSJ * evtsPerJob} events')
j.application = Executable(exe = File('wn_script.py'), args = ['--runfile', 'run_fixedTarget.py', '--cvmfs_version', '26.03', '--site', SITE, '--', '-o', '"./"', '-n', evtsPerJob, '-e', str(ecut)])

j.virtualization = Apptainer(image="/cvmfs/unpacked.cern.ch/registry.cern.ch/ship/gha-runner:latest/")
j.virtualization.mounts = {'/cvmfs':'/cvmfs'}
j.application = Executable(exe = File('wn_script.py'), args = ['--runfile', 'run_fixedTarget.py', '--cvmfs_version', '26.03', '--site', SITE, '--', '-o', './', '-n', evtsPerJob, '-e', str(ecut)])
# IMPORTANT: Only put the run seed in the splitter arguments
j.splitter = ArgSplitter(args = [['-r', startRun + J * nSJ + _i] for _i in range(nSJ)], append = True)
j.outputfiles = [MassStorageFile('pythia8_evtgen_Geant4_*.root')]
j.backend = Condor()
j.backend.cdf_options['+MaxRuntime'] = '86000'
j.outputfiles = [DiracFile('pythia8_evtgen_Geant4_*.root')]
if SITE == 'GRIDPP':
j.backend = Dirac()
j.backend.settings['CPUTime'] = '40000'
else:
j.backend = Condor()
j.backend.cdf_options['+MaxRuntime'] = '86000'

# For running at CERN only
if SITE == 'CERN':
j.backend.env['EOS_MGM_URL'] = "root://eospublic.cern.ch"
j.backend.cdf_options['accounting_group'] = 'group_u_SHIP.u_ship_cg'

# Add in the postprocessor to do the file registration
# cc = CustomChecker(module = 'postprocessor.py')
cc = CustomChecker(module = 'postprocessor_master.py', checkSubjobs=False)
fc = FileChecker(files = ['stdout'], searchStrings = ['Macro finished successfully.'], failIfFound = False, checkMaster=False)
j.postprocessors.append(fc)
j.postprocessors.append(cc)
j.comment = f'{evtsPerJob} events in each of {nSJ} subjobs'
if SITE == 'CERN':
cc = CustomChecker(module = 'postprocessor_master.py', checkSubjobs=False)
j.postprocessors.append(cc)
j.comment = f'%s events in each of %s subjobs, %.2f million total' % (evtsPerJob, nSJ, totalEvts/1.e6)
j.submit()
19 changes: 19 additions & 0 deletions fixedTarget/batch/gangaScript_merger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
jNo = 61

Comment thread
mesmith75 marked this conversation as resolved.
j = Job(name = f"Merge output of production job {jNo}")
j.virtualization = Apptainer(image="/cvmfs/unpacked.cern.ch/registry.cern.ch/ship/gha-runner:latest/")
j.virtualization.mounts = {'/cvmfs':'/cvmfs', '/home/hep/mesmith':'/home/hep/mesmith' }
j.application = Executable(exe = File('wn_script.py'))
j.inputfiles = [LocalFile("merger.py")]
myargs = ['merger.py' if _a=='run_fixedTarget.py' else _a for _a in jobs(jNo).application.args]
myargs.extend(['-j', jNo, '--prodSite', 'GRIDPP'])
myargs.append('-i')
inputfiles = jobs(jNo).backend.getOutputDataAccessURLs(protocol='"root"')
myargs.extend(inputfiles)
j.application.args = myargs
j.outputfiles = [LocalFile('pythia_*.root')]

j.backend = Condor()
j.backend.cdf_options["+MaxRuntime"] = '3000'

j.submit()
85 changes: 85 additions & 0 deletions fixedTarget/batch/merger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python
import os
import argparse
import ROOT
import uproot
import json
from datetime import datetime

def mergeFiles(myfiles, tmpFile):
myChain = ROOT.TChain("cbmsim")
for _f in myfiles:
myChain.Add(_f)
myChain.Merge(tmpFile)


parser = argparse.ArgumentParser(
description="Run FairShip file merger"
)

parser.add_argument("-i", "--inFiles", nargs="*",
help="input files to merge")

parser.add_argument("-n", "--genEvents",
default=500000,
type=int,
help="How many events were generated to make this file"
)

parser.add_argument("-e", "--eCut",
default=30,
type=float,
help="What was the energy cut to produce the file"
)

parser.add_argument("-j", "--gangaJob",
type=int,
help="Which ganga job produced these files",
)
parser.add_argument("-s", "--site",
default="CERN",
help="Which site was this run at"
)

parser.add_argument("-o", "--outputPath",
help="A placeholder")

parser.add_argument("--prodSite",
default="CERN",
help="Which site was this run at"
)

args, _ = parser.parse_known_args()


if args.prodSite == "GRIDPP":
os.environ["X509_VOMSES"]="/cvmfs/grid.cern.ch/etc/grid-security/vomses"
os.environ["X509_VOMS_DIR"]="/cvmfs/grid.cern.ch/etc/grid-security/vomsdir"
os.environ["X509_CERT_DIR"]="/cvmfs/grid.cern.ch/etc/grid-security/certificates"

files_to_merge = args.inFiles
if not len(files_to_merge)>0:
print("ERROR: no files to merge! Doing nothing")
sys.exit()
print("INFO: Merging %s files from job %s" % (len(files_to_merge), args.gangaJob))

total_pot = len(files_to_merge) * args.genEvents
outName = f"pythia8_Geant4_eCut_{args.eCut}_PoT_{total_pot}_{args.prodSite}_j{args.gangaJob}.root"

mergeFiles(files_to_merge, outName)

fsr = {
"PoT" : total_pot,
"EnergyCut": args.eCut,
"PoTperFile": args.genEvents,
"nFilesMerged": len(files_to_merge),
"filesMerged": args.inFiles,
"prodSite": args.prodSite,
"mergeDate": datetime.today().strftime('%Y-%m-%d')
}

print("INFO: Adding the file summary")

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@THanae This is the file summary I suggested. Not FairRoot specific and easy to structure, merge, add to

with uproot.update(outName) as _f:
_f["FileSummary"] = json.dumps(fsr)

print("INFO: All done")
10 changes: 9 additions & 1 deletion fixedTarget/batch/wn_script.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ def main():
default = "run_fixedTarget.py"
)

parser.add_argument(
"--useLocalFile",
help="Use a file on the WN, rather than defaulting to FairShip/macro",
default=False,
action="store_true",
)

# Everything after this is passed through to the FairShip script
parser.add_argument(
"script_args",
Expand All @@ -61,7 +68,8 @@ def main():
WORK_DIR = args.work_dir or f"{FS_INSTALL}/sw/"
INIT_SCRIPT = args.init_script or f"{FS_INSTALL}/sw/slc9_x86-64/FairShip/latest/etc/profile.d/init.sh"
RUN_SCRIPT = f"{FS_INSTALL}/sw/slc9_x86-64/FairShip/latest/macro/{args.runfile}"

if args.useLocalFile:
RUN_SCRIPT=args.runfile
fs_version = args.cvmfs_version
fs_tag = args.FairShip_tag or args.cvmfs_version
print(f"INFO: Running at the site {args.site}")
Expand Down