@@ -1524,7 +1524,7 @@ void SplitPHIs(llvm::Function &F) {
1524
1524
// returns if newly legal, subject to the pending calls
1525
1525
bool DetectReadonlyOrThrowFn (llvm::Function &F,
1526
1526
SmallPtrSetImpl<Function *> &calls_todo,
1527
- llvm::TargetLibraryInfo &TLI) {
1527
+ llvm::TargetLibraryInfo &TLI, bool &local ) {
1528
1528
if (isReadOnlyOrThrow (&F))
1529
1529
return false ;
1530
1530
if (F.empty ())
@@ -1539,8 +1539,10 @@ bool DetectReadonlyOrThrowFn(llvm::Function &F,
1539
1539
continue ;
1540
1540
if (hasMetadata (&I, " enzyme_ReadOnlyOrThrow" ))
1541
1541
continue ;
1542
+ if (hasMetadata (&I, " enzyme_LocalReadOnlyOrThrow" ))
1543
+ continue ;
1542
1544
if (auto CI = dyn_cast<CallBase>(&I)) {
1543
- if (isReadOnlyOrThrow (CI)) {
1545
+ if (isLocalReadOnlyOrThrow (CI)) {
1544
1546
continue ;
1545
1547
}
1546
1548
if (isAllocationCall (CI, TLI)) {
@@ -1574,26 +1576,80 @@ bool DetectReadonlyOrThrowFn(llvm::Function &F,
1574
1576
// seen outside the function. Note, even if one stored into x =
1575
1577
// malloc(..), and stored x into a global/arg pointer, that second store
1576
1578
// would trigger not readonly.
1577
- if (isa<AllocaInst>(Obj) || isAllocationCall (Obj, TLI))
1579
+ if (isa<AllocaInst>(Obj))
1580
+ continue ;
1581
+ if (isAllocationCall (Obj, TLI)) {
1582
+ if (local)
1583
+ continue ;
1584
+ if (notCaptured (Obj))
1585
+ continue ;
1586
+ local = true ;
1578
1587
continue ;
1588
+ }
1589
+ if (auto arg = dyn_cast<Argument>(Obj)) {
1590
+ if (arg->hasStructRetAttr () ||
1591
+ arg->getParent ()
1592
+ ->getAttribute (arg->getArgNo () + AttributeList::FirstArgIndex,
1593
+ " enzymejl_returnRoots" )
1594
+ .isValid ()) {
1595
+ local = true ;
1596
+ continue ;
1597
+ }
1598
+ }
1579
1599
}
1580
1600
if (auto MTI = dyn_cast<MemTransferInst>(&I)) {
1581
1601
auto Obj = getBaseObject (MTI->getOperand (0 ));
1582
1602
// Storing into local memory is fine since it definitionally will not be
1583
1603
// seen outside the function. Note, even if one stored into x =
1584
1604
// malloc(..), and stored x into a global/arg pointer, that second store
1585
1605
// would trigger not readonly.
1586
- if (isa<AllocaInst>(Obj) || isAllocationCall (Obj, TLI))
1606
+ if (isa<AllocaInst>(Obj))
1607
+ continue ;
1608
+ if (isAllocationCall (Obj, TLI)) {
1609
+ if (local)
1610
+ continue ;
1611
+ if (notCaptured (Obj))
1612
+ continue ;
1613
+ local = true ;
1587
1614
continue ;
1615
+ }
1616
+ if (auto arg = dyn_cast<Argument>(Obj)) {
1617
+ if (arg->hasStructRetAttr () ||
1618
+ arg->getParent ()
1619
+ ->getAttribute (arg->getArgNo () + AttributeList::FirstArgIndex,
1620
+ " enzymejl_returnRoots" )
1621
+ .isValid ()) {
1622
+ local = true ;
1623
+ continue ;
1624
+ }
1625
+ }
1588
1626
}
1589
1627
if (auto MSI = dyn_cast<MemSetInst>(&I)) {
1590
1628
auto Obj = getBaseObject (MSI->getOperand (0 ));
1591
1629
// Storing into local memory is fine since it definitionally will not be
1592
1630
// seen outside the function. Note, even if one stored into x =
1593
1631
// malloc(..), and stored x into a global/arg pointer, that second store
1594
1632
// would trigger not readonly.
1595
- if (isa<AllocaInst>(Obj) || isAllocationCall (Obj, TLI))
1633
+ if (isa<AllocaInst>(Obj))
1634
+ continue ;
1635
+ if (isAllocationCall (Obj, TLI)) {
1636
+ if (local)
1637
+ continue ;
1638
+ if (notCaptured (Obj))
1639
+ continue ;
1640
+ local = true ;
1596
1641
continue ;
1642
+ }
1643
+ if (auto arg = dyn_cast<Argument>(Obj)) {
1644
+ if (arg->hasStructRetAttr () ||
1645
+ arg->getParent ()
1646
+ ->getAttribute (arg->getArgNo () + AttributeList::FirstArgIndex,
1647
+ " enzymejl_returnRoots" )
1648
+ .isValid ()) {
1649
+ local = true ;
1650
+ continue ;
1651
+ }
1652
+ }
1597
1653
}
1598
1654
// ignore atomic load impacts
1599
1655
if (isa<LoadInst>(&I))
@@ -1620,7 +1676,10 @@ bool DetectReadonlyOrThrowFn(llvm::Function &F,
1620
1676
}
1621
1677
1622
1678
if (calls_todo.size () == 0 ) {
1623
- F.addFnAttr (" enzyme_ReadOnlyOrThrow" );
1679
+ if (local)
1680
+ F.addFnAttr (" enzyme_LocalReadOnlyOrThrow" );
1681
+ else
1682
+ F.addFnAttr (" enzyme_ReadOnlyOrThrow" );
1624
1683
}
1625
1684
return true ;
1626
1685
}
@@ -1629,6 +1688,17 @@ bool DetectReadonlyOrThrow(Module &M) {
1629
1688
1630
1689
bool changed = false ;
1631
1690
1691
+ PassBuilder PB;
1692
+ LoopAnalysisManager LAM;
1693
+ FunctionAnalysisManager FAM;
1694
+ CGSCCAnalysisManager CGAM;
1695
+ ModuleAnalysisManager MAM;
1696
+ PB.registerModuleAnalyses (MAM);
1697
+ PB.registerFunctionAnalyses (FAM);
1698
+ PB.registerLoopAnalyses (LAM);
1699
+ PB.registerCGSCCAnalyses (CGAM);
1700
+ PB.crossRegisterProxies (LAM, FAM, CGAM, MAM);
1701
+
1632
1702
// Set of functions newly deduced readonlyorthrow by this pass
1633
1703
SmallVector<llvm::Function *> todo;
1634
1704
@@ -1640,21 +1710,15 @@ bool DetectReadonlyOrThrow(Module &M) {
1640
1710
// prerequisite for being readonly. Inverse of `todo_map`
1641
1711
DenseMap<llvm::Function *, SmallPtrSet<Function *, 1 >> inverse_todo_map;
1642
1712
1643
- PassBuilder PB;
1644
- LoopAnalysisManager LAM;
1645
- FunctionAnalysisManager FAM;
1646
- CGSCCAnalysisManager CGAM;
1647
- ModuleAnalysisManager MAM;
1648
- PB.registerModuleAnalyses (MAM);
1649
- PB.registerFunctionAnalyses (FAM);
1650
- PB.registerLoopAnalyses (LAM);
1651
- PB.registerCGSCCAnalyses (CGAM);
1652
- PB.crossRegisterProxies (LAM, FAM, CGAM, MAM);
1713
+ SmallPtrSet<Function *, 1 > LocalReadOnlyFunctions;
1653
1714
1654
1715
for (Function &F : M) {
1655
1716
SmallPtrSet<Function *, 1 > calls_todo;
1656
1717
auto &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
1657
- if (DetectReadonlyOrThrowFn (F, calls_todo, TLI)) {
1718
+ bool local = false ;
1719
+ if (DetectReadonlyOrThrowFn (F, calls_todo, TLI, local)) {
1720
+ if (local)
1721
+ LocalReadOnlyFunctions.insert (&F);
1658
1722
if (calls_todo.size () == 0 ) {
1659
1723
changed = true ;
1660
1724
todo.push_back (&F);
@@ -1681,7 +1745,10 @@ bool DetectReadonlyOrThrow(Module &M) {
1681
1745
auto &fwd_set = found2->second ;
1682
1746
fwd_set.erase (cur);
1683
1747
if (fwd_set.size () == 0 ) {
1684
- F2->addFnAttr (" enzyme_ReadOnlyOrThrow" );
1748
+ if (LocalReadOnlyFunctions.contains (F2))
1749
+ F2->addFnAttr (" enzyme_LocalReadOnlyOrThrow" );
1750
+ else
1751
+ F2->addFnAttr (" enzyme_ReadOnlyOrThrow" );
1685
1752
todo.push_back (F2);
1686
1753
todo_map.erase (F2);
1687
1754
}
@@ -2340,8 +2407,9 @@ Function *PreProcessCache::preprocessForClone(Function *F,
2340
2407
2341
2408
{
2342
2409
SmallPtrSet<Function *, 1 > calls_todo;
2410
+ bool local = false ;
2343
2411
DetectReadonlyOrThrowFn (*NewF, calls_todo,
2344
- FAM.getResult <TargetLibraryAnalysis>(*NewF));
2412
+ FAM.getResult <TargetLibraryAnalysis>(*NewF), local );
2345
2413
}
2346
2414
2347
2415
if (EnzymePrint)
0 commit comments