Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
52b962a
Changes to OpenMP scripts to extract arguments from iom_put
LonelyCat124 Mar 13, 2026
dae4e86
transformation can't always work so catch exception
LonelyCat124 Mar 13, 2026
34aeaac
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Mar 13, 2026
a95ece0
fix to datanode to temp tarns to handle case sensitivity correctly vi…
LonelyCat124 Mar 16, 2026
d79fae1
Merge branch 'iom_put_to_temp_' of github.com:stfc/PSyclone into iom_…
LonelyCat124 Mar 16, 2026
46d6dfa
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Mar 16, 2026
6339e55
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Mar 17, 2026
6ce8a38
Try extending the iom_put transformation
LonelyCat124 Mar 17, 2026
7c98fd4
Merge branch 'iom_put_to_temp_' of github.com:stfc/PSyclone into iom_…
LonelyCat124 Mar 17, 2026
dd3e62a
fix
LonelyCat124 Mar 17, 2026
e06176b
Fix datanodetotemptrans for ifblock statements and similar
LonelyCat124 Mar 17, 2026
ed69361
Use elemental_type in intrinsic_call
LonelyCat124 Mar 17, 2026
af9ea3d
Merge master and fix intrinsic call change
LonelyCat124 Apr 1, 2026
a912e88
Fixed the TypeError branch
LonelyCat124 Apr 2, 2026
78127da
Changed the InternalError to be a fallthrough
LonelyCat124 Apr 2, 2026
c3804eb
Store the error to fallthrough to internal error correctly
LonelyCat124 Apr 2, 2026
0420aa4
precision handling for the TypeError
LonelyCat124 Apr 2, 2026
470ed98
Added an if allocated test and check that we don't block potential lo…
LonelyCat124 Apr 7, 2026
cf4947e
[skip-ci] Some changes to add test (that fails) for the datanode_to_t…
LonelyCat124 Apr 7, 2026
c6c718c
Don't try to move the allocate statement which wasn't very feasible f…
LonelyCat124 Apr 7, 2026
03e0ffa
Hoist the allocate statement if we think its safe
LonelyCat124 Apr 8, 2026
93d814c
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Apr 8, 2026
f147e94
updated script error
LonelyCat124 Apr 8, 2026
339f8b4
Revert to only apply to iom_put
LonelyCat124 Apr 16, 2026
d9cced4
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Apr 16, 2026
51f5e71
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Apr 21, 2026
4fb5d7b
Remaining test coverage and fixed a missing fstring in fparser2 frontend
LonelyCat124 Apr 21, 2026
65fa992
Merg master
LonelyCat124 Apr 21, 2026
19d77ef
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Apr 21, 2026
fa35a58
Some changes towards review
LonelyCat124 Apr 23, 2026
78b4345
Revert changes due to issue with structure reference's datatype
LonelyCat124 Apr 24, 2026
93c0b20
linting
LonelyCat124 Apr 24, 2026
51c250a
DataNodeToTempTrans will sometimes not make an allocatable if the com…
LonelyCat124 Apr 24, 2026
5ad2f3e
Merge branch 'master' into iom_put_to_temp_
LonelyCat124 Apr 24, 2026
18dae4e
Changes for review
LonelyCat124 Apr 27, 2026
b89289f
changes for review
LonelyCat124 Apr 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion examples/nemo/scripts/omp_cpu_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@
import os
from utils import (
insert_explicit_loop_parallelism, normalise_loops, add_profiling,
iom_put_argument_to_temporary,
PARALLELISATION_ISSUES, NEMO_MODULES_TO_IMPORT)
from psyclone.psyir.nodes import Routine
from psyclone.psyir.nodes import Routine, Call
from psyclone.transformations import OMPLoopTrans

# Enable the insertion of profiling hooks during the transformation script
Expand Down Expand Up @@ -107,6 +108,10 @@ def trans(psyir):
for subroutine in psyir.walk(Routine):
print(f"Adding OpenMP threading to subroutine: {subroutine.name}")

# Extract any array operations from iom_put calls to temporary
# expressions that can be parallelised.
iom_put_argument_to_temporary(subroutine.walk(Call))

if PROFILING_ENABLED:
add_profiling(subroutine.children)

Expand Down
9 changes: 7 additions & 2 deletions examples/nemo/scripts/omp_gpu_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@
import os
from utils import (
add_profiling, inline_calls, insert_explicit_loop_parallelism,
normalise_loops, PARALLELISATION_ISSUES, NEMO_MODULES_TO_IMPORT)
from psyclone.psyir.nodes import Routine, Loop
normalise_loops, iom_put_argument_to_temporary,
PARALLELISATION_ISSUES, NEMO_MODULES_TO_IMPORT)
from psyclone.psyir.nodes import Routine, Loop, Call
from psyclone.psyir.transformations import (
OMPTargetTrans, OMPDeclareTargetTrans)
from psyclone.transformations import (
Expand Down Expand Up @@ -199,6 +200,10 @@ def trans(psyir):
if "pp_len" not in symtab:
symtab.add(symtab.lookup("pp_len"))

# Extract any array operations from iom_put calls to temporary
# expressions that can be parallelised.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Delete this now?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah.

iom_put_argument_to_temporary(subroutine.walk(Call))

Comment thread
sergisiso marked this conversation as resolved.
Outdated
normalise_loops(
subroutine,
hoist_local_arrays=False,
Expand Down
23 changes: 19 additions & 4 deletions examples/nemo/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,15 @@
from psyclone.domain.common.transformations import KernelModuleInlineTrans
from psyclone.psyir.nodes import (
Assignment, Loop, Directive, Node, Reference, CodeBlock, Call,
Routine, Schedule, IntrinsicCall, StructureReference, IfBlock)
from psyclone.psyir.symbols import DataSymbol
Routine, Schedule, IntrinsicCall, StructureReference, IfBlock,
Operation)
from psyclone.psyir.symbols import DataSymbol, ArrayType
from psyclone.psyir.transformations import (
ArrayAssignment2LoopsTrans, HoistLoopBoundExprTrans, HoistLocalArraysTrans,
HoistTrans, InlineTrans, Maxval2LoopTrans, ProfileTrans,
OMPMinimiseSyncTrans, Reference2ArrayRangeTrans,
ScalarisationTrans, IncreaseRankLoopArraysTrans, MaximalRegionTrans)
from psyclone.transformations import TransformationError
ScalarisationTrans, IncreaseRankLoopArraysTrans, MaximalRegionTrans,
DataNodeToTempTrans, TransformationError)

# USE statements to chase to gather additional symbol information.
NEMO_MODULES_TO_IMPORT = [
Expand Down Expand Up @@ -526,3 +527,17 @@ def _satisfies_minimum_region_rules(self, region: list[Node]) -> bool:
routine_name = parent_routine.name if parent_routine else ""
if routine_name not in PROFILING_IGNORE:
MaximalProfilingOutsideDirectivesTrans().apply(children)


def iom_put_argument_to_temporary(calls: list[Call]):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"param:" missing

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. I also have slightly changed the behaviour so I will rerun integration.

'''Extracts the second argument of all iom_put calls and puts them
in a temporary if they are an Operation with an array datatype.'''
for call in calls:
if call.symbol.name == "iom_put":
arg = call.arguments[1]
dtype = arg.datatype
if isinstance(dtype, ArrayType) and isinstance(arg, Operation):
try:
DataNodeToTempTrans().apply(arg)
except TransformationError:
pass
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The integration tests don't show any performance advantage which is not what we expected (we are changing from multiple gpu->cpu reads to one, and to maybe preventing the data touched from the gpu to be brought back):

  • I can check with a grep how many more loops are offloaded but for the places that it was not applied, could you add as preceding comment the reason why not (if not all transformation errors provide useful information, a verbose option like other transformation have can help)
  • There is nothing specific of iom_put, other than we know it is a common pattern. We want to avoid touching things from the CPU as much as possible, could this be applied to all subroutine calls (not functions)?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a preceding comment now. I'll try generalising it as well.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This currently causes stuff to fail, I'll try to see if I can get my VPN to start working again and see if I can try manually building NEMO5 to find the cause.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This has shown a few issues with the DataNodeToTempTrans (partly because some things are Statements that I didn't think, e.g. an IfBlock's condition).

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed the bugs now

22 changes: 13 additions & 9 deletions src/psyclone/psyir/transformations/datanode_to_temp_trans.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,11 @@ def validate(self, node: DataNode, **kwargs):
symbols.update(element.upper.get_all_accessed_symbols())
# Compare the symbols in the array bounds with the symbols
# already in the scope.
scope_symbols = node.scope.symbol_table.get_symbols()
scope_table = node.scope.symbol_table
for sym in symbols:
scoped_name_sym = scope_symbols.get(sym.name, None)
scoped_name_sym = scope_table.lookup(
sym.name, otherwise=None
)
# If sym is not scoped_name_sym, then there is a
# symbol collision from an imported symbol.
if scoped_name_sym and sym is not scoped_name_sym:
Expand All @@ -176,9 +178,9 @@ def validate(self, node: DataNode, **kwargs):
# If its an imported symbol we need to check if its
# the same import interface.
if isinstance(sym.interface, ImportInterface):
scoped_name_sym = scope_symbols.get(
sym.interface.container_symbol.name,
None
scoped_name_sym = scope_table.lookup(
sym.interface.container_symbol.name,
otherwise=None
)
if scoped_name_sym and not isinstance(
scoped_name_sym, ContainerSymbol):
Expand Down Expand Up @@ -248,18 +250,20 @@ def apply(self, node: DataNode, storage_name: str = "", **kwargs):
symbols.update(element.lower.get_all_accessed_symbols())
if isinstance(element.upper, DataNode):
symbols.update(element.upper.get_all_accessed_symbols())
scope_symbols = node.scope.symbol_table.get_symbols()
scope_table = node.scope.symbol_table
for sym in symbols:
scoped_name_sym = scope_symbols.get(sym.name, None)
scoped_name_sym = scope_table.lookup(
sym.name, otherwise=None
)
# If no symbol with the name exists then create one.
if not scoped_name_sym:
sym_copy = sym.copy()
if isinstance(sym_copy.interface, ImportInterface):
# Check if the ContainerSymbol is already in the
# interface
container = scope_symbols.get(
container = scope_table.lookup(
sym_copy.interface.container_symbol.name,
None
otherwise=None
)
if container is None:
# Add the container symbol to the symbol table
Expand Down
Loading