#!/bin/sh

# MUST ENSURE that the application executable is built with the appropriate MPI
# . /usr/share/modules/init/bash
# module switch mpi mpi/openmpi-1.3.4_intel-11.1-f046-c046  

# script to create working directory, populate, and run text_book_simple_par
# in parallel on a subset of processors

#-----------------------------------
# CREATE TEMPORARY WORKING DIRECTORY
#
# This prevents file trampling when running concurrent jobs.
#-----------------------------------

num=$(echo $1 | awk -F. '{print $NF}')
topdir=`pwd`
workdir=$topdir/workdir.$num

mkdir workdir.$num
cp $topdir/$1 $workdir/dakota_vars
cd $workdir


# -------------------------
# INPUT FILE PRE-PROCESSING
# -------------------------

# This demo does not need file pre-processing, but normally (see
# below) APREPRO or DPREPRO is used to "cut-and-paste" data from the
# params.in.# file written by DAKOTA into the template input file for
# the user's simulation code.

# aprepro run6crh_rigid_template.i temp_rigid.new
# grep -vi aprepro temp_rigid.new > run6crh_rigid.i

# dprepro $1 application_input.template application.in 

# For this example we just prepare the application input by copying
# the parameters:
cp dakota_vars application.in


# -------------------
# RUN SIMULATION CODE
# TODO: instead of repeating the driver in every directory, could we just have
#       a separate mpi_launch_app.sh in each dir?
# -------------------

echo "$0 running text_book_simple_par on 2 processors."


# !!! Requires that APPLIC_PROCS either divide evenly into PPN or be
# !!! an integer multiple of it

# number of concurrent jobs (must agree with DAKOTA evaluation_concurrency)
# CONCURRENCY=`grep concurr dakota_pstudy.in | cut -d "=" -f2`
CONCURRENCY=4

# number of processors per node (with SLURM could use
# $SLURM_CPUS_ON_NODE or $SLURM_TASKS_PER_NODE if appropriate)
PPN=16

# number of processors per application job
APPLIC_PROCS=2

# number of nodes needed per application job ( ceil(APPLIC_PROCS/PPN) )
applic_nodes=$(( ($APPLIC_PROCS+$PPN-1) / $PPN )) 

# relative node on which the job should schedule (floor)
# this is the first of the node block for this job
relative_node=$(( (num - 1) % CONCURRENCY * APPLIC_PROCS / PPN ))

# RESERVE a node for DAKOTA (recommended, but assumes DAKOTA starts on
# the zeroth node in the allocation (true for SLURM with OpenMPI);
# some MPI start DAKOTA on the last node in which case you need not do
# anything special to these calcs, just add a node to the batch request)

# NOTE: it's not trivial to reserve a processor for DAKOTA in this
# case (due to -N1-1), though easy to reserve a _node_.  (must allow
# one extra node in submission).  It is easy to reserve one CPU for
# DAKOTA if the analysis requries only one CPU.
##relative_node=$(( (num - 1) % CONCURRENCY * APPLIC_PROCS / PPN + 1 ))

# build a node list
node_list="+n${relative_node}"
for node_increment in `seq 1 $((applic_nodes - 1))`; do
  node_list="$node_list,+n$((relative_node + node_increment))"
done

# constrain each application instance to run on nnodes nodes, ncpu
# processes, and start allocating with (zero-based) relnode (OpenMPI >= 1.3.3)
mpirun -np $APPLIC_PROCS -host $node_list text_book_simple_par \
    application.in application.out

# TODO: openmpi and/or srun exclusive mode should allow mpiexec-like
# behavior, but haven't gotten working

# use sleep command if file I/O timing is a problem
#sleep 10


# ---------------------------
# OUTPUT FILE POST PROCESSING
# ---------------------------

# Normally any application-specific post-processing to prepare the
# results.out file for DAKOTA would go here. Here we'll substitute a
# copy command:

cp application.out results2dakota

# for demo, append the node name to see on which node this task ran
# (comment out for actual application)
uname -n >> results2dakota

# When using DAKOTA's fork interface, the application can directly
# write its output (if in the right format) to results.out.$num
# (../$2) for DAKOTA, however for the system interface, use the
# following move to avoid a race condition:

mv results2dakota ../$2
cd ..

# -------------
# CLEANUP
# -------------

# uncomment to cleanup work directories as evaluations progress
#rm -rf ./workdir.$num
#rm ./results.out.$num
