Skip to content

Commit fd3ec9c

Browse files
committed
Improve CFP by removing rawNewInfos
For the purposes of CFP, there's nothing fundamentally different between a set on an exact reference and a value set by allocation. CFP's use of the allocation values without considering all exact sets was therefore an unnecessary complication that restricted CFP's optimizing power. Expand optimizeUsingRefTest to optimize mutable fields, including those that have been set, by using the full available information instead of just the allocation values. Handle copies more judiciously by propagating once to find copied values and then propagate again while taking those copied values into account. This scheme can be extended in the future to precisely handle copies between different fields and types as well. Also optimize siblings better by propagating first down and then up rather than propagating in both directions at once. This avoid unnecessarily propagating set values to siblings.
1 parent c567e37 commit fd3ec9c

3 files changed

Lines changed: 321 additions & 113 deletions

File tree

src/passes/ConstantFieldPropagation.cpp

Lines changed: 63 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,15 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
9191
// subtyping and new infos (information about struct.news).
9292
std::unique_ptr<Pass> create() override {
9393
return std::make_unique<FunctionOptimizer>(
94-
propagatedInfos, subTypes, rawNewInfos, refTest);
94+
propagatedInfos, refTestInfos, subTypes, refTest);
9595
}
9696

9797
FunctionOptimizer(const PCVStructValuesMap& propagatedInfos,
98+
const PCVStructValuesMap& refTestInfos,
9899
const SubTypes& subTypes,
99-
const PCVStructValuesMap& rawNewInfos,
100100
bool refTest)
101-
: propagatedInfos(propagatedInfos), subTypes(subTypes),
102-
rawNewInfos(rawNewInfos), refTest(refTest) {}
101+
: propagatedInfos(propagatedInfos), refTestInfos(refTestInfos),
102+
subTypes(subTypes), refTest(refTest) {}
103103

104104
template<typename T> std::optional<HeapType> getRelevantHeapType(T* ref) {
105105
auto type = ref->type;
@@ -210,7 +210,7 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
210210
// on simply applying a constant. However, we can try to use a ref.test, if
211211
// that is allowed.
212212
if (!info.isConstant()) {
213-
if (refTest) {
213+
if (refTest && !ref->type.isExact()) {
214214
optimizeUsingRefTest(curr, ref, index);
215215
}
216216
return;
@@ -233,22 +233,6 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
233233
auto refType = ref->type;
234234
auto refHeapType = refType.getHeapType();
235235

236-
// We only handle immutable fields in this function, as we will be looking
237-
// at |rawNewInfos|. That is, we are trying to see when a type and its
238-
// subtypes have different values (so that we can differentiate between them
239-
// using a ref.test), and those differences are lost in |propagatedInfos|,
240-
// which has propagated to relevant types so that we can do a single check
241-
// to see what value could be there. So we need to use something more
242-
// precise, |rawNewInfos|, which tracks the values written to struct.news,
243-
// where we know the type exactly (unlike with a struct.set). But for that
244-
// reason the field must be immutable, so that it is valid to only look at
245-
// the struct.news. (A more complex flow analysis could do better here, but
246-
// would be far beyond the scope of this pass.)
247-
if (index != StructUtils::DescriptorIndex &&
248-
GCTypeUtils::getField(refType, index)->mutable_ == Mutable) {
249-
return;
250-
}
251-
252236
// We seek two possible constant values. For each we track the constant and
253237
// the types that have that constant. For example, if we have types A, B, C
254238
// and A and B have 42 in their field, and C has 1337, then we'd have this:
@@ -283,13 +267,17 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
283267
return;
284268
}
285269

286-
auto iter = rawNewInfos.find({type, Exact});
287-
if (iter == rawNewInfos.end()) {
288-
// This type has no struct.news, so we can ignore it: it is abstract.
270+
auto iter = refTestInfos.find({type, Exact});
271+
if (iter == refTestInfos.end()) {
272+
// This type has no allocations, so we can ignore it: it is abstract.
289273
return;
290274
}
291275

292276
auto value = iter->second[index];
277+
if (!value.hasNoted()) {
278+
// Also abstract and ignorable.
279+
return;
280+
}
293281
if (!value.isConstant()) {
294282
// The value here is not constant, so give up entirely.
295283
fail = true;
@@ -409,8 +397,8 @@ struct FunctionOptimizer : public WalkerPass<PostWalker<FunctionOptimizer>> {
409397

410398
private:
411399
const PCVStructValuesMap& propagatedInfos;
400+
const PCVStructValuesMap& refTestInfos;
412401
const SubTypes& subTypes;
413-
const PCVStructValuesMap& rawNewInfos;
414402
const bool refTest;
415403

416404
bool changed = false;
@@ -492,20 +480,13 @@ struct ConstantFieldPropagation : public Pass {
492480
scanner.runOnModuleCode(runner, module);
493481

494482
// Combine the data from the functions.
495-
PCVStructValuesMap combinedNewInfos, combinedSetInfos;
496-
functionNewInfos.combineInto(combinedNewInfos);
483+
PCVStructValuesMap combinedSetInfos;
484+
functionNewInfos.combineInto(combinedSetInfos);
497485
functionSetInfos.combineInto(combinedSetInfos);
498486
BoolStructValuesMap combinedCopyInfos;
499487
functionCopyInfos.combineInto(combinedCopyInfos);
500488

501-
// Prepare data we will need later.
502-
SubTypes subTypes(*module);
503-
504-
// Copy the unpropagated data before we propagate. We use this in precise
505-
// lookups.
506-
auto rawNewInfos = combinedNewInfos;
507-
508-
// Handle subtyping. |combinedInfo| so far contains data that represents
489+
// Handle subtyping. |combinedSetInfos| so far contains data that represents
509490
// each struct.new and struct.set's operation on the struct type used in
510491
// that instruction. That is, if we do a struct.set to type T, the value was
511492
// noted for type T. But our actual goal is to answer questions about
@@ -532,10 +513,11 @@ struct ConstantFieldPropagation : public Pass {
532513
// efficient, we therefore propagate information about the possible values
533514
// in each field to both subtypes and supertypes.
534515
//
535-
// struct.new on the other hand knows exactly what type is being written to,
536-
// and so given a get of $A and a new of $B, the new is relevant for the get
537-
// iff $A is a subtype of $B, so we only need to propagate in one direction
538-
// there, to supertypes.
516+
// Values written in struct.news are equivalent to values written to exact
517+
// references. In both cases, the propagation to subtypes will not do
518+
// anything because an exact reference has no non-trivial subtypes. This
519+
// works out because a set of a field of an exact reference (or an
520+
// allocation) cannot ever affect the value read out of a subtype's field.
539521
//
540522
// An exception to the above are copies. If a field is copied then even
541523
// struct.new information cannot be assumed to be precise:
@@ -549,36 +531,57 @@ struct ConstantFieldPropagation : public Pass {
549531
// foo(A->f0); // These can contain 20,
550532
// foo(C->f0); // if the copy read from B.
551533
//
552-
// To handle that, copied fields are treated like struct.set ones (by
553-
// copying the struct.new data to struct.set). Note that we must propagate
554-
// copying to subtypes first, as in the example above the struct.new values
555-
// of subtypes must be taken into account (that is, A or a subtype is being
556-
// copied, so we want to do the same thing for B and C as well as A, since
557-
// a copy of A means it could be a copy of B or C).
558-
StructUtils::TypeHierarchyPropagator<StructUtils::CombinableBool>
559-
boolPropagator(subTypes);
560-
boolPropagator.propagateToSubTypesWithExact(combinedCopyInfos);
534+
// The handling of copies is explained below.
535+
SubTypes subTypes(*module);
536+
StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
537+
subTypes);
538+
539+
// Compute the values without accounting for copies.
540+
PCVStructValuesMap noCopySetInfos = combinedSetInfos;
541+
propagator.propagateToSubTypesWithExact(noCopySetInfos);
542+
propagator.propagateToSuperTypes(noCopySetInfos);
543+
544+
// Now account for copies. A copy takes a value from any subtype
545+
// of the copy source to any subtype of the copy destination. Since we last
546+
// propagated to supertypes, we know the propagated values increase
547+
// monotonically as you go up the type hierarchy. The propagated value in a
548+
// field therefore overapproximates the values in the corresponding field in
549+
// all the subtypes. So for each copy, we can use the propagated value as
550+
// the copied value. Then we will propagate set values again, this time
551+
// including the copied values. We only need to repeat the propagation once;
552+
// if the second propagation discovers greater values in the copied fields,
553+
// it can only be because those greater values were propagated from a
554+
// supertype. In that case, the greater value has also been propagated to
555+
// all subtypes, so repeating the process will not further change anything.
556+
//
557+
// TODO: Track separate sources and destinations of copies rather than
558+
// special-casing copies to self. This would let propagation discover
559+
// greater copied values from unrelated types or even different field
560+
// indices, so we would have to repeatedly propagate taking into account the
561+
// latest discovered copied values until reaching a fixed point.
561562
for (auto& [type, copied] : combinedCopyInfos) {
562-
for (Index i = 0; i < copied.size(); i++) {
563+
for (Index i = 0; i < copied.size(); ++i) {
563564
if (copied[i]) {
564-
combinedSetInfos[type][i].combine(combinedNewInfos[type][i]);
565+
combinedSetInfos[type][i].combine(noCopySetInfos[type][i]);
565566
}
566567
}
567568
}
568569

569-
StructUtils::TypeHierarchyPropagator<PossibleConstantValues> propagator(
570-
subTypes);
571-
propagator.propagateToSuperTypes(combinedNewInfos);
572-
propagator.propagateToSuperAndSubTypesWithExact(combinedSetInfos);
573-
574-
// Combine both sources of information to the final information that gets
575-
// care about.
576-
PCVStructValuesMap combinedInfos = std::move(combinedNewInfos);
577-
combinedSetInfos.combineInto(combinedInfos);
570+
// Propagate the values again, now including values readable by copies.
571+
// RefTest optimization manually checks the values in every subtype to
572+
// make sure they match, so there's no need to propagate values up for that.
573+
// Snapshot the info before propagating up for use in RefTest
574+
// optimization.
575+
PCVStructValuesMap refTestInfos;
576+
propagator.propagateToSubTypesWithExact(combinedSetInfos);
577+
if (refTest) {
578+
refTestInfos = combinedSetInfos;
579+
}
580+
propagator.propagateToSuperTypes(combinedSetInfos);
578581

579582
// Optimize.
580583
// TODO: Skip this if we cannot optimize anything
581-
FunctionOptimizer(combinedInfos, subTypes, rawNewInfos, refTest)
584+
FunctionOptimizer(combinedSetInfos, refTestInfos, subTypes, refTest)
582585
.run(runner, module);
583586
}
584587
};

test/lit/passes/cfp-reftest.wast

Lines changed: 165 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -279,18 +279,18 @@
279279
)
280280
)
281281

282-
;; Almost optimizable, but the field is mutable, so we can't.
282+
;; The field is mutable, but we can still optimize.
283283
(module
284284
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
285285
(type $struct (sub (struct (mut i32))))
286-
;; CHECK: (type $1 (func))
287-
288286
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
289287
(type $substruct (sub $struct (struct (mut i32) f64)))
290288

289+
;; CHECK: (type $2 (func))
290+
291291
;; CHECK: (type $3 (func (param (ref null $struct)) (result i32)))
292292

293-
;; CHECK: (func $create (type $1)
293+
;; CHECK: (func $create (type $2)
294294
;; CHECK-NEXT: (drop
295295
;; CHECK-NEXT: (struct.new $struct
296296
;; CHECK-NEXT: (i32.const 10)
@@ -317,6 +317,167 @@
317317
)
318318
)
319319
;; CHECK: (func $get (type $3) (param $struct (ref null $struct)) (result i32)
320+
;; CHECK-NEXT: (select
321+
;; CHECK-NEXT: (i32.const 20)
322+
;; CHECK-NEXT: (i32.const 10)
323+
;; CHECK-NEXT: (ref.test (ref $substruct)
324+
;; CHECK-NEXT: (ref.as_non_null
325+
;; CHECK-NEXT: (local.get $struct)
326+
;; CHECK-NEXT: )
327+
;; CHECK-NEXT: )
328+
;; CHECK-NEXT: )
329+
;; CHECK-NEXT: )
330+
(func $get (param $struct (ref null $struct)) (result i32)
331+
;; We cannot optimize here.
332+
(struct.get $struct 0
333+
(local.get $struct)
334+
)
335+
)
336+
)
337+
338+
;; No-op sets do not inhibit optimization.
339+
(module
340+
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
341+
(type $struct (sub (struct (mut i32))))
342+
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
343+
(type $substruct (sub $struct (struct (mut i32) f64)))
344+
345+
;; CHECK: (type $2 (func))
346+
347+
;; CHECK: (type $3 (func (param (ref null (exact $struct)) (ref null $substruct))))
348+
349+
;; CHECK: (type $4 (func (param (ref null $struct)) (result i32)))
350+
351+
;; CHECK: (func $create (type $2)
352+
;; CHECK-NEXT: (drop
353+
;; CHECK-NEXT: (struct.new $struct
354+
;; CHECK-NEXT: (i32.const 10)
355+
;; CHECK-NEXT: )
356+
;; CHECK-NEXT: )
357+
;; CHECK-NEXT: (drop
358+
;; CHECK-NEXT: (struct.new $substruct
359+
;; CHECK-NEXT: (i32.const 20)
360+
;; CHECK-NEXT: (f64.const 3.14159)
361+
;; CHECK-NEXT: )
362+
;; CHECK-NEXT: )
363+
;; CHECK-NEXT: )
364+
(func $create
365+
(drop
366+
(struct.new $struct
367+
(i32.const 10)
368+
)
369+
)
370+
(drop
371+
(struct.new $substruct
372+
(i32.const 20)
373+
(f64.const 3.14159)
374+
)
375+
)
376+
)
377+
378+
;; CHECK: (func $sets (type $3) (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
379+
;; CHECK-NEXT: (struct.set $struct 0
380+
;; CHECK-NEXT: (local.get $struct-exact)
381+
;; CHECK-NEXT: (i32.const 10)
382+
;; CHECK-NEXT: )
383+
;; CHECK-NEXT: (struct.set $substruct 0
384+
;; CHECK-NEXT: (local.get $substruct)
385+
;; CHECK-NEXT: (i32.const 20)
386+
;; CHECK-NEXT: )
387+
;; CHECK-NEXT: )
388+
(func $sets (param $struct-exact (ref null (exact $struct))) (param $substruct (ref null $substruct))
389+
(struct.set $struct 0
390+
(local.get $struct-exact)
391+
(i32.const 10)
392+
)
393+
(struct.set $substruct 0
394+
(local.get $substruct)
395+
(i32.const 20)
396+
)
397+
)
398+
399+
;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32)
400+
;; CHECK-NEXT: (select
401+
;; CHECK-NEXT: (i32.const 20)
402+
;; CHECK-NEXT: (i32.const 10)
403+
;; CHECK-NEXT: (ref.test (ref $substruct)
404+
;; CHECK-NEXT: (ref.as_non_null
405+
;; CHECK-NEXT: (local.get $struct)
406+
;; CHECK-NEXT: )
407+
;; CHECK-NEXT: )
408+
;; CHECK-NEXT: )
409+
;; CHECK-NEXT: )
410+
(func $get (param $struct (ref null $struct)) (result i32)
411+
;; We cannot optimize here.
412+
(struct.get $struct 0
413+
(local.get $struct)
414+
)
415+
)
416+
)
417+
418+
;; Same as above, except now the set to $struct is inexact so we cannot
419+
;; optimize.
420+
(module
421+
;; CHECK: (type $struct (sub (struct (field (mut i32)))))
422+
(type $struct (sub (struct (mut i32))))
423+
;; CHECK: (type $substruct (sub $struct (struct (field (mut i32)) (field f64))))
424+
(type $substruct (sub $struct (struct (mut i32) f64)))
425+
426+
;; CHECK: (type $2 (func))
427+
428+
;; CHECK: (type $3 (func (param (ref null $struct) (ref null $substruct))))
429+
430+
;; CHECK: (type $4 (func (param (ref null $struct)) (result i32)))
431+
432+
;; CHECK: (func $create (type $2)
433+
;; CHECK-NEXT: (drop
434+
;; CHECK-NEXT: (struct.new $struct
435+
;; CHECK-NEXT: (i32.const 10)
436+
;; CHECK-NEXT: )
437+
;; CHECK-NEXT: )
438+
;; CHECK-NEXT: (drop
439+
;; CHECK-NEXT: (struct.new $substruct
440+
;; CHECK-NEXT: (i32.const 20)
441+
;; CHECK-NEXT: (f64.const 3.14159)
442+
;; CHECK-NEXT: )
443+
;; CHECK-NEXT: )
444+
;; CHECK-NEXT: )
445+
(func $create
446+
(drop
447+
(struct.new $struct
448+
(i32.const 10)
449+
)
450+
)
451+
(drop
452+
(struct.new $substruct
453+
(i32.const 20)
454+
(f64.const 3.14159)
455+
)
456+
)
457+
)
458+
459+
;; CHECK: (func $sets (type $3) (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
460+
;; CHECK-NEXT: (struct.set $struct 0
461+
;; CHECK-NEXT: (local.get $struct)
462+
;; CHECK-NEXT: (i32.const 10)
463+
;; CHECK-NEXT: )
464+
;; CHECK-NEXT: (struct.set $substruct 0
465+
;; CHECK-NEXT: (local.get $substruct)
466+
;; CHECK-NEXT: (i32.const 20)
467+
;; CHECK-NEXT: )
468+
;; CHECK-NEXT: )
469+
(func $sets (param $struct (ref null $struct)) (param $substruct (ref null $substruct))
470+
(struct.set $struct 0
471+
(local.get $struct)
472+
(i32.const 10)
473+
)
474+
(struct.set $substruct 0
475+
(local.get $substruct)
476+
(i32.const 20)
477+
)
478+
)
479+
480+
;; CHECK: (func $get (type $4) (param $struct (ref null $struct)) (result i32)
320481
;; CHECK-NEXT: (struct.get $struct 0
321482
;; CHECK-NEXT: (local.get $struct)
322483
;; CHECK-NEXT: )

0 commit comments

Comments
 (0)