Skip to content
Open
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6000775
Start updating nemo README
sergisiso Oct 15, 2025
ac6df07
Merge branch 'unresolved_as_calls' into single_nemo_script
sergisiso Nov 10, 2025
85e7249
Merge branch 'unresolved_as_calls' into single_nemo_script
sergisiso Nov 10, 2025
48f0a8c
Update NEMO readmes
sergisiso Nov 11, 2025
a9618c0
Add initial version of the unified insert_loop_parallelism.py script
sergisiso Nov 11, 2025
525e111
Fixes to NEMO insert_loop_parallelism.py
sergisiso Nov 11, 2025
8cc4582
Merge branch 'unresolved_as_calls' into single_nemo_script
sergisiso Nov 11, 2025
d037cc0
Remove NEMO omp_cpu_trans.py
sergisiso Nov 11, 2025
394f8f3
Replace NEMO omp_gpu_trans.py
sergisiso Nov 11, 2025
6fd4de1
Fix some issues with new NEMO script
sergisiso Nov 11, 2025
6a89eb9
Attempt to fix issues with NEMO OpenACC
sergisiso Nov 11, 2025
2252a48
Add allow_strings validation option to ACCParallel
sergisiso Nov 11, 2025
eeb2b10
Merge remote-tracking branch 'origin/3188_tighten_privatisation_valid…
sergisiso Nov 11, 2025
73fb247
Exlcude icb from NEMO insert_loop_parallelism and skip failing async
sergisiso Nov 11, 2025
6607aec
Reorder NEMO readme and add section about identifying problems
sergisiso Nov 11, 2025
1e57470
Update NEMO README
sergisiso Nov 11, 2025
aea4c0e
Update NEMO script to only exclude files with offloading issues in NE…
sergisiso Nov 12, 2025
782a163
Replace acc_loop_trans in NEMOv4 test
sergisiso Nov 12, 2025
ca2c06e
Remove unneeded NEMO flag
sergisiso Nov 12, 2025
1e52822
Fix wrong path in NEMO integration test
sergisiso Nov 12, 2025
d767908
Bring unresolved_as_calls changes
sergisiso Nov 13, 2025
0a7c24f
Small change of location of NEMO utils.py conditional
sergisiso Nov 13, 2025
46980be
Bring unresolved_as_calls changes
sergisiso Nov 13, 2025
0b6366d
Merge branch 'unresolved_as_calls' into single_nemo_script
sergisiso Nov 14, 2025
b29c66f
Merge branch 'unresolved_as_calls' into single_nemo_script
sergisiso Dec 1, 2025
98f5dca
Fix issues with CI and integration tests
sergisiso Dec 1, 2025
71270ce
In ECMWF NEMOv4 include mpif.h during cpp
sergisiso Dec 2, 2025
0b80c98
Test ACCParallelTrans allow_strings
sergisiso Dec 2, 2025
9125e6a
Add no-vectorize and no-fma to the NEMOv5 gcc reproducible checks
sergisiso Dec 2, 2025
17eb6c5
Reintroduce NEMOv4 compile-time MPI include
sergisiso Dec 2, 2025
bab0524
Clean up NEMO README and scritps
sergisiso Dec 2, 2025
2c1f468
Uncomment integration test for NEMO ORCA2 async
sergisiso Dec 2, 2025
3ae2c17
Update NEMOv4 with mpi include flag
sergisiso Dec 2, 2025
882c146
Try removing exclusions from NEMO tests
sergisiso Dec 3, 2025
dfb6b0a
Make NEMOv4 dynspg_ts exclusion exclusive to the ArrayAssignment2Loop…
sergisiso Dec 3, 2025
628a1cc
Merge remote-tracking branch 'origin/master' into single_nemo_script
sergisiso Dec 3, 2025
4f92604
#3244 Fix typos and add TODO
sergisiso Dec 9, 2025
1a177d2
#2144 Try to improve hybrid performance
sergisiso Dec 15, 2025
32057f1
#3244 Revert last changes and don't collapse CPU loops
sergisiso Dec 15, 2025
f0f5413
Bring to master
sergisiso Dec 15, 2025
1fc199e
Merge remote-tracking branch 'origin/master' into single_nemo_script
sergisiso Dec 27, 2025
d1dba5a
Merge branch 'master' into single_nemo_script
sergisiso Jan 12, 2026
cccb9f6
Bring to master
sergisiso Jan 20, 2026
55d44fc
Remove old statements brought by the last merge
sergisiso Jan 20, 2026
355a801
Clean up previous merge conflicts
sergisiso Jan 20, 2026
69ad36f
Revert some OpenACC changes
sergisiso Jan 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 38 additions & 18 deletions .github/workflows/nemo_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ jobs:
module load perl/${PERL_VERSION}
make clean
export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export REPRODUCIBLE=1
make -j ${NUM_PARALLEL} openmp_gpu
make -j ${NUM_PARALLEL} compile-openmp_gpu
export NV_ACC_POOL_THRESHOLD=75
Expand Down Expand Up @@ -171,27 +173,39 @@ jobs:
- name: NEMO MetOffice OpenACC loops for GPU
id: nemo_acc_loops
run: |
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
spack unload && spack load nemo-build-environment%nvhpc
. .runner_venv/bin/activate

# Set up envvars
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PROFILE_HOME=${GITHUB_WORKSPACE}/lib/profiling/nvidia/
export NEMO_DIR=${PREFIX}/UKMO-NEMOv4
cd $PSYCLONE_NEMO_DIR
module load nvidia-hpcsdk/${NVFORTRAN_VERSION}
module load hdf5/${HDF5_VERSION} netcdf-c/${NETCDF_C_VERSION} netcdf-fortran/${NETCDF_FORTRAN_VERSION}
module load perl/${PERL_VERSION}
make clean
make -j ${NUM_PARALLEL} openacc_loops
COMPILER_ARCH=linux_nvidia_acc_gpu make -j ${NUM_PARALLEL} compile-openacc_loops
export TEST_DIR=SPITZ12_ACC_LOOPS_NVHPC
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PARALLEL_DIRECTIVES="acc_offloading"
export REPRODUCIBLE=1
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py"
export FCFLAGS="-i4 -Mr8 -O2 -Mnovect -Mnofma -g -acc -mp=gpu -gpu=mem:managed,math_uniform"
export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script

# Compile
cd ${PREFIX}/UKMO-NEMOv4
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
rm -rf cfgs/${TEST_DIR}
./makenemo -r SPITZ12 -m linux_spack -n ${TEST_DIR} \
add_key "IEEE_IS_NAN=ieee_is_nan key_nosignedzero" \
del_key "key_iomput key_mpp_mpi key_si3" -j ${NUM_PARALLEL}

# Run test
export NV_ACC_POOL_THRESHOLD=75
export CUDA_VISIBLE_DEVICES=1
make run-openacc_loops
# Check the output is as expected (TODO #2895: improve numerical reproducibility)
make output-openacc_loops | grep -q " it : 10" || (echo "Error: 'it : 10' not found!" & false)
make output-openacc_loops | grep -q "|ssh|_max: 0.259483" || (echo "Error: '|ssh|_max: 0.259483' not found!" & false)
make output-openacc_loops | grep -q "|U|_max: 0.458515" || (echo "Error: '|U|_max: 0.458515' not found!" & false)
make output-openacc_loops | grep -q "S_min: 0.482686" || (echo "Error: 'S_min: 0.482686' not found!" & false)
make output-openacc_loops | grep -q "S_max: 0.407622" || (echo "Error: 'S_max: 0.407622' not found!" & false)
export VAR_TIME=$(grep -A 1 "Elapsed Time" <(make -s time-openacc_loops) | head -n 2 | tail -n 1 | awk '{print $1}')
cd cfgs/${TEST_DIR}/EXP00/
ln -sf /archive/psyclone-tests/nemo-inputs/UKMO-eORCA1_GO8_NEMOv4/*.nc .
./nemo
diff run.stat $PSYCLONE_NEMO_DIR/KGOs/run.stat.nemo4.spitz12.nvhpc.10steps
export VAR_TIME=$(grep -A 1 "Elapsed Time" timing.output | head -n 2 | tail -n 1 | awk '{print $1}')
echo "time=${VAR_TIME}" >> "${GITHUB_OUTPUT}"

# PSyclone, compile and run ECMWF NEMO with OpenMP for CPUs. This uses
Expand All @@ -205,21 +219,25 @@ jobs:
source .runner_venv/bin/activate

# Set up envvars
export TEST_DIR=SPITZ12_ACC_LOOPS_NVHPC
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py -I ${MPI_HOME}/include"
export PARALLEL_DIRECTIVES="omp_threading"
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py -I ${MPI_HOME}/include"
export FCFLAGS="-i4 -r8 -O2 -heap-arrays -fp-model=precise -g -qopenmp"
export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script

# Compile
cd ${PREFIX}/ECMWF-NEMOv4
./makenemo -r SPITZ12 -m linux_spack -n SPITZ12_psyclone \
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
rm -rf cfgs/${TEST_DIR}
./makenemo -r SPITZ12 -m linux_spack -n ${TEST_DIR} \
add_key "IEEE_IS_NAN=ieee_is_nan key_nosignedzero" \
del_key "key_iomput" -j ${NUM_PARALLEL}

# Run NEMO
cd cfgs/SPITZ12_psyclone/EXP00/
cd cfgs/${TEST_DIR}/EXP00/
ln -sf /archive/psyclone-tests/nemo-inputs/ECMWF-eORCA1_GO8/* .
export OMP_NUM_THREADS=12
./nemo
Expand All @@ -240,6 +258,8 @@ jobs:
module load perl/${PERL_VERSION}
make clean
export NEMOV4=1 # Enables specific NEMOV4 exclusions in the PSyclone transformation script
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export REPRODUCIBLE=1
export ASYNC_PARALLEL=1
make -j ${NUM_PARALLEL} openmp_gpu
make -j ${NUM_PARALLEL} compile-openmp_gpu
Expand Down
49 changes: 29 additions & 20 deletions .github/workflows/nemo_v5_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
bench_gfortran_omp_cpu: ${{ steps.bench_gfortran_omp_cpu.outputs.time }}
bench_nvfortran_omp_offload: ${{ steps.bench_nvfortran_omp_offload.outputs.time }}
bench_nvfortran_omp_offload_build: ${{ steps.bench_nvfortran_omp_offload.outputs.build_time }}
orca1_nvfortran_omp_offload: ${{ steps.orca1_nvfortran_omp_offload.outputs.time }}
orca1_nvfortran_acc_offload: ${{ steps.orca1_nvfortran_acc_offload.outputs.time }}
orca2_nvfortran_omp_offload: ${{ steps.orca2_nvfortran_omp_offload.outputs.time }}
bench_nvfortran_omp_offload_async: ${{ steps.bench_nvfortran_omp_offload_async.outputs.time }}
orca2_nvfortran_omp_offload_async: ${{ steps.orca2_nvfortran_omp_offload_async.outputs.time }}
Expand Down Expand Up @@ -107,7 +107,7 @@ jobs:
# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g"
export FCFLAGS="-fdefault-real-8 -O2 -mno-fma -fno-tree-vectorize -fcray-pointer -ffree-line-length-none -g"

# Clean up and compile
rm -rf tests/${TEST_DIR}
Expand Down Expand Up @@ -202,8 +202,9 @@ jobs:
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_cpu_trans.py"
export FCFLAGS="-fdefault-real-8 -O2 -fcray-pointer -ffree-line-length-none -g -fopenmp"
export PARALLEL_DIRECTIVES="omp_threading"
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py"
export FCFLAGS="-fdefault-real-8 -O2 -mno-fma -fno-tree-vectorize -fcray-pointer -ffree-line-length-none -g -fopenmp"

# Clean up and compile
rm -rf tests/${TEST_DIR}
Expand Down Expand Up @@ -245,7 +246,8 @@ jobs:
export REPRODUCIBLE=1
export PSYCLONE_COMPILER=$MPIF90
export MPIF90=psyclonefc
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py"
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export PSYCLONE_OPTS="--enable-cache -l output -s ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py"
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -j ${NUM_PARALLEL} -v 1
Expand Down Expand Up @@ -280,7 +282,7 @@ jobs:
echo "build_time=${BUILD_ELAPSED}" >> "${GITHUB_OUTPUT}"

- name: NEMO 5.0 nvidia OpenMP for GPUs (UKMO ORCA1 - managed memory)
id: orca1_nvfortran_omp_offload
id: orca1_nvfortran_acc_offload
run: |
# Set up environment
source /archive/psyclone-spack/psyclone-spack-Jun25/spack-repo/share/spack/setup-env.sh
Expand All @@ -289,19 +291,20 @@ jobs:
export PSYCLONE_NEMO_DIR=${GITHUB_WORKSPACE}/examples/nemo/scripts
export PSYCLONE_HOME=${PWD}/.runner_venv
export NEMO_DIR=/archive/psyclone-tests/latest-run/UKMO-NEMOv5
export TEST_DIR=ORCA1_OMP_OFFLOAD_NVHPC
export TEST_DIR=ORCA1_ACC_OFFLOAD_NVHPC

# Set up FCM: PATHs are loaded from SPACK, we only need to set the FCFLAGS
# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -acc -mp=gpu -gpu=mem:managed,math_uniform"
export PARALLEL_DIRECTIVES="acc_offloading"
export REPRODUCIBLE=1

# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
-j ${NUM_PARALLEL} -v 1
./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py -j ${NUM_PARALLEL} -v 1

# Run test
cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
Expand Down Expand Up @@ -331,11 +334,13 @@ jobs:
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export REPRODUCIBLE=1

# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py \
add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1

# Run test
Expand Down Expand Up @@ -374,12 +379,13 @@ jobs:
export ENABLE_PROFILING=1
# We compile with "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export REPRODUCIBLE=1
export ASYNC_PARALLEL=1
# Clean up and compile
rm -rf tests/${TEST_DIR}
./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
-j ${NUM_PARALLEL} -v 1
./makenemo -r BENCH -m linux_spack_profile -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py -j ${NUM_PARALLEL} -v 1

# Run reproducible test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
Expand All @@ -398,8 +404,8 @@ jobs:
rm -rf tests/${TEST_DIR}
export NV_ACC_POOL_THRESHOLD=75
export CUDA_VISIBLE_DEVICES=1
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
-j ${NUM_PARALLEL} -v 1
./makenemo -r BENCH -m linux_spack -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py -j ${NUM_PARALLEL} -v 1
# Run non-reproducible test
cd $NEMO_DIR/tests/${TEST_DIR}/EXP00
cp $PSYCLONE_NEMO_DIR/KGOs/namelist_cfg_bench_small namelist_cfg
Expand All @@ -423,13 +429,15 @@ jobs:
# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export PARALLEL_DIRECTIVES="omp_offloading+omp_threading"
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export REPRODUCIBLE=1
export ASYNC_PARALLEL=1

# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
./makenemo -r ORCA2_ICE_PISCES -m linux_spack -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py \
add_key "key_mpi_off key_nosignedzero" -j ${NUM_PARALLEL} -v 1

# Run test
Expand Down Expand Up @@ -459,14 +467,15 @@ jobs:
# We compile at "-O2 -Mnofma -Mnovect -gpu=math_uniform" to permit comparison of the results.
cd $NEMO_DIR
cp $PSYCLONE_NEMO_DIR/KGOs/arch-linux_spack.fcm arch/arch-linux_spack.fcm
export PARALLEL_DIRECTIVES="omp_offloading"
export FCFLAGS="-i4 -Mr8 -O2 -Mnofma -Mnovect -g -mp=gpu -gpu=mem:managed,math_uniform"
export REPRODUCIBLE=1
export ASYNC_PARALLEL=1

# Clean up and compile
rm -rf cfgs/${TEST_DIR}
./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} -p ${PSYCLONE_NEMO_DIR}/omp_gpu_trans.py \
-j ${NUM_PARALLEL} -v 1
./makenemo -r GOSI10p0.0_like_eORCA1 -m linux_spack -n ${TEST_DIR} \
-p ${PSYCLONE_NEMO_DIR}/insert_loop_parallelism.py -j ${NUM_PARALLEL} -v 1

# Run test
cd $NEMO_DIR/cfgs/${TEST_DIR}/EXP00
Expand Down Expand Up @@ -522,10 +531,10 @@ jobs:
'"$COMMON_FIELDS"'
},
{
ci_test: "NEMOv5 OpenMP for GPU (ORCA1)",
ci_test: "NEMOv5 OpenACC for GPU (ORCA1)",
nemo_version: "NEMO 5.0-RC MO patch",
compiler:"nvhpc-'"$NVFORTRAN_VERSION"'",
elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca1_nvfortran_omp_offload}}"',
elapsed_time: '"${{needs.run_if_on_mirror.outputs.orca1_nvfortran_acc_offload}}"',
'"$COMMON_FIELDS"'
},
{
Expand Down
60 changes: 3 additions & 57 deletions examples/nemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
Author A. R. Porter, STFC Daresbury Lab
Modified by R. W. Ford, STFC Daresbury Lab
Modified by J. Henrichs, Bureau of Meteorology
Modified by S. Siso, STFC Daresbury Lab

-->

Expand All @@ -54,63 +55,8 @@ Contains:

## Scripts

Contains the scripts used to process the NEMO code base and to add profiling
instrumentation (https://psyclone.readthedocs.io/en/latest/user_guide/profiling.html)
and OpenACC or OpenMP directives:

1. `process_nemo.py` is a driver script that allows the user to specify
which files to process with PSyclone, the transformation script to use
and where to put the outputs:

$ ./process_nemo.py -h
usage: process_nemo.py [-h] [-o OUT_DIR] [-s SCRIPT_FILE] [-x]
input_file [input_file ...]

Process the specified NEMO source files using PSyclone

positional arguments:
input_file One or more NEMO pre-processed source files

optional arguments:
-h, --help show this help message and exit
-o OUT_DIR Destination directory for processed source files
-s SCRIPT_FILE PSyclone transformation script
-x exit immediately if PSyclone fails
-p add profiling instrumentation to the PROFILE_ONLY file
list. Note that files processed by the SCRIPT_FILE may
be introducing profiling instrumentation as part of
that script.

In addition to the command-line flags, the script itself contains two
variables that may be used to control its behaviour:

- `EXCLUDED_FILES`: list of filenames that PSyclone will not attempt to process.
- `PROFILE_ONLY`: list of filenames to add profiling instrumentation but
do not attempt to further process by PSyclone.

Finally, the precise invocation to use when running PSyclone may be
specified by setting the `PSYCLONE` environment variable. If this is not set
then `psyclone` must be in the user's PATH.

2. PSyclone transformation scripts:
- `kernels_trans.py` adds OpenACC kernel directives and places fine-grained
profiling instrumentation around any regions that haven't had OpenACC
added.
- `omp_cpu_trans.py` adds OpenMP directives for CPU threading parallelism.
- `omp_gpu_trans.py` adds OpenMP offloading directives for GPU acceleration.

These scripts are a *work in progress* and are being developed to work on the
MO_GO8 configuration of NEMO supplied by the Met Office. This configuration is
based on version 4.0.2 of NEMO and is compiled using:

./makenemo -n MO_GO8_GPU -r SPITZ12 -m linux_nvfortran_gpu \
del_key "key_iomput key_mpp_mpi" add_key "key_nosignedzero"

(where you will need an `arch/arch-linux_nvfortran_gpu.fcm` FCM configuration
file specifying how to use the NVIDIA compiler).

If you are applying PSyclone to any other version or configuration of NEMO then
these scripts should serve as a useful starting point.
Contains a collection of example scripts and the instructions to process the NEMO code. These
are testend in our integration test against NEMOv4.0.2 and NEMOv5.0.

## Example 1

Expand Down
4 changes: 2 additions & 2 deletions examples/nemo/eg1/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ include ../../common.mk
transform:
${PSYCLONE} -s ./openmp_cpu_levels_trans.py ../code/tra_adv.F90
${PSYCLONE} -s ./openmp_gpu_levels_trans.py ../code/tra_adv.F90
${PSYCLONE} -s ../scripts/omp_cpu_trans.py ../code/tra_adv.F90
${PSYCLONE} -s ../scripts/omp_gpu_trans.py ../code/tra_adv.F90
PARALLEL_DIRECTICVES="omp_threading" ${PSYCLONE} -s ../scripts/insert_loop_parallelism.py ../code/tra_adv.F90
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be PARLLEL_DIRECTIVES I think.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ups, fixed

PARALLEL_DIRECTICVES="omp_offloading" ${PSYCLONE} -s ../scripts/insert_loop_parallelism.py ../code/tra_adv.F90

compile: transform
@echo "No compilation supported for nemo/eg1"
Expand Down
4 changes: 2 additions & 2 deletions examples/nemo/eg2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ transform: omp_levels

omp_levels:
${PSYCLONE} -s ./omp_levels_trans.py ../code/traldf_iso.F90
${PSYCLONE} -s ../scripts/omp_cpu_trans.py ../code/traldf_iso.F90
${PSYCLONE} -s ../scripts/omp_gpu_trans.py ../code/traldf_iso.F90
PARALLEL_DIRECTICVES="omp_threading" ${PSYCLONE} -s ../scripts/insert_loop_parallelism.py ../code/traldf_iso.F90
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also should be DIRECTIVES

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

PARALLEL_DIRECTICVES="omp_offloading" ${PSYCLONE} -s ../scripts/insert_loop_parallelism.py ../code/traldf_iso.F90


compile: transform
Expand Down
2 changes: 1 addition & 1 deletion examples/nemo/scripts/KGOs/arch-linux_spack.fcm
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
%NCDF_INC -I${NCDF_F_HOME}/include -I${NCDF_C_HOME}/include -I${HDF5_HOME}/include
%NCDF_LIB -L${NCDF_F_HOME}/lib -lnetcdff -L${NCDF_C_HOME}/lib -lnetcdf

%CPP cpp -Dkey_nosignedzero
%CPP cpp -Dkey_nosignedzero -I${MPI_HOME}/include
%FC ${MPIF90} -c
%FCFLAGS ${FCFLAGS}
%FFLAGS %FCFLAGS
Expand Down
4 changes: 1 addition & 3 deletions examples/nemo/scripts/KGOs/arch-linux_spack_profile.fcm
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
%PROFILE_INC -I${PROFILING_DIR}
%PROFILE_LIB -L${PROFILING_DIR} -lnvtx_prof -L${CUDA_HOME}/lib64 -cuda -lnvToolsExt



%CPP cpp -Dkey_nosignedzero
%CPP cpp -Dkey_nosignedzero -I${MPI_HOME}/include
%FC ${MPIF90} -c
%FCFLAGS ${FCFLAGS}
%FFLAGS %FCFLAGS
Expand Down
Loading
Loading