@@ -566,7 +566,7 @@ static int CeedOperatorAssembleLinearQFunction_Ref(CeedOperator op,
566
566
// LCOV_EXCL_STOP
567
567
568
568
// Create output restriction
569
- CeedInt strides [3 ] = {1 , Q , numactivein * numactiveout * Q };
569
+ CeedInt strides [3 ] = {1 , Q , numactivein * numactiveout * Q }; /* *NOPAD* */
570
570
ierr = CeedElemRestrictionCreateStrided (ceedparent , numelements , Q ,
571
571
numactivein * numactiveout ,
572
572
numactivein * numactiveout * numelements * Q ,
@@ -664,11 +664,51 @@ static inline void CeedOperatorGetBasisPointer_Ref(const CeedScalar **basisptr,
664
664
}
665
665
}
666
666
667
+ //------------------------------------------------------------------------------
668
+ // Create point block restriction
669
+ //------------------------------------------------------------------------------
670
+ static int CreatePBRestriction (CeedElemRestriction rstr ,
671
+ CeedElemRestriction * pbRstr ) {
672
+ int ierr ;
673
+ Ceed ceed ;
674
+ ierr = CeedElemRestrictionGetCeed (rstr , & ceed ); CeedChk (ierr );
675
+ const CeedInt * offsets ;
676
+ ierr = CeedElemRestrictionGetOffsets (rstr , CEED_MEM_HOST , & offsets );
677
+ CeedChk (ierr );
678
+
679
+ // Expand offsets
680
+ CeedInt nelem , ncomp , elemsize , compstride , max = 1 , * pbOffsets ;
681
+ ierr = CeedElemRestrictionGetNumElements (rstr , & nelem ); CeedChk (ierr );
682
+ ierr = CeedElemRestrictionGetNumComponents (rstr , & ncomp ); CeedChk (ierr );
683
+ ierr = CeedElemRestrictionGetElementSize (rstr , & elemsize ); CeedChk (ierr );
684
+ ierr = CeedElemRestrictionGetCompStride (rstr , & compstride ); CeedChk (ierr );
685
+ CeedInt shift = ncomp ;
686
+ if (compstride != 1 )
687
+ shift *= ncomp ;
688
+ ierr = CeedCalloc (nelem * elemsize , & pbOffsets ); CeedChk (ierr );
689
+ for (CeedInt i = 0 ; i < nelem * elemsize ; i ++ ) {
690
+ pbOffsets [i ] = offsets [i ]* shift ;
691
+ if (pbOffsets [i ] > max )
692
+ max = pbOffsets [i ];
693
+ }
694
+
695
+ // Create new restriction
696
+ ierr = CeedElemRestrictionCreate (ceed , nelem , elemsize , ncomp * ncomp , 1 ,
697
+ max + ncomp * ncomp , CEED_MEM_HOST ,
698
+ CEED_OWN_POINTER , pbOffsets , pbRstr );
699
+ CeedChk (ierr );
700
+
701
+ // Cleanup
702
+ ierr = CeedElemRestrictionRestoreOffsets (rstr , & offsets ); CeedChk (ierr );
703
+
704
+ return 0 ;
705
+ }
706
+
667
707
//------------------------------------------------------------------------------
668
708
// Assemble Linear Diagonal
669
709
//------------------------------------------------------------------------------
670
- static int CeedOperatorAssembleLinearDiagonal_Ref (CeedOperator op ,
671
- CeedVector * assembled , CeedRequest * request ) {
710
+ static int CeedOperatorAssembleDiagonalCore_Ref (CeedOperator op ,
711
+ CeedVector * assembled , CeedRequest * request , const bool pointBlock ) {
672
712
int ierr ;
673
713
674
714
// Assemble QFunction
@@ -764,9 +804,15 @@ static int CeedOperatorAssembleLinearDiagonal_Ref(CeedOperator op,
764
804
}
765
805
}
766
806
807
+ // Assemble point-block diagonal restriction, if needed
808
+ CeedElemRestriction diagrstr = rstrout ;
809
+ if (pointBlock ) {
810
+ ierr = CreatePBRestriction (rstrout , & diagrstr ); CeedChk (ierr );
811
+ }
812
+
767
813
// Create diagonal vector
768
814
CeedVector elemdiag ;
769
- ierr = CeedElemRestrictionCreateVector (rstrin , assembled , & elemdiag );
815
+ ierr = CeedElemRestrictionCreateVector (diagrstr , assembled , & elemdiag );
770
816
CeedChk (ierr );
771
817
772
818
// Assemble element operator diagonals
@@ -777,7 +823,7 @@ static int CeedOperatorAssembleLinearDiagonal_Ref(CeedOperator op,
777
823
ierr = CeedVectorGetArray (assembledqf , CEED_MEM_HOST , & assembledqfarray );
778
824
CeedChk (ierr );
779
825
CeedInt nelem , nnodes , nqpts ;
780
- ierr = CeedElemRestrictionGetNumElements (rstrin , & nelem ); CeedChk (ierr );
826
+ ierr = CeedElemRestrictionGetNumElements (diagrstr , & nelem ); CeedChk (ierr );
781
827
ierr = CeedBasisGetNumNodes (basisin , & nnodes ); CeedChk (ierr );
782
828
ierr = CeedBasisGetNumQuadraturePoints (basisin , & nqpts ); CeedChk (ierr );
783
829
// Basis matrices
@@ -816,17 +862,30 @@ static int CeedOperatorAssembleLinearDiagonal_Ref(CeedOperator op,
816
862
CeedOperatorGetBasisPointer_Ref (& b , emodein [ein ], identity , interpin ,
817
863
& gradin [din * nqpts * nnodes ]);
818
864
// Each component
819
- for (CeedInt comp = 0 ; comp < ncomp ; comp ++ )
865
+ for (CeedInt compOut = 0 ; compOut < ncomp ; compOut ++ )
820
866
// Each qpoint/node pair
821
- for (CeedInt q = 0 ; q < nqpts ; q ++ ) {
822
- const CeedScalar qfvalue =
823
- assembledqfarray [((((e * numemodein + ein )* ncomp + comp )*
824
- numemodeout + eout )* ncomp + comp )* nqpts + q ];
825
- if (fabs (qfvalue ) > maxnorm * 1e-12 )
826
- for (CeedInt n = 0 ; n < nnodes ; n ++ )
827
- elemdiagarray [(e * ncomp + comp )* nnodes + n ] += bt [q * nnodes + n ] *
828
- qfvalue * b [q * nnodes + n ];
829
- }
867
+ for (CeedInt q = 0 ; q < nqpts ; q ++ )
868
+ if (pointBlock ) {
869
+ // Point Block Diagonal
870
+ for (CeedInt compIn = 0 ; compIn < ncomp ; compIn ++ ) {
871
+ const CeedScalar qfvalue =
872
+ assembledqfarray [((((e * numemodein + ein )* ncomp + compIn )*
873
+ numemodeout + eout )* ncomp + compOut )* nqpts + q ];
874
+ if (fabs (qfvalue ) > maxnorm * 1e-12 )
875
+ for (CeedInt n = 0 ; n < nnodes ; n ++ )
876
+ elemdiagarray [((e * ncomp + compOut )* ncomp + compIn )* nnodes + n ] +=
877
+ bt [q * nnodes + n ] * qfvalue * b [q * nnodes + n ];
878
+ }
879
+ } else {
880
+ // Diagonal Only
881
+ const CeedScalar qfvalue =
882
+ assembledqfarray [((((e * numemodein + ein )* ncomp + compOut )*
883
+ numemodeout + eout )* ncomp + compOut )* nqpts + q ];
884
+ if (fabs (qfvalue ) > maxnorm * 1e-12 )
885
+ for (CeedInt n = 0 ; n < nnodes ; n ++ )
886
+ elemdiagarray [(e * ncomp + compOut )* nnodes + n ] +=
887
+ bt [q * nnodes + n ] * qfvalue * b [q * nnodes + n ];
888
+ }
830
889
}
831
890
}
832
891
}
@@ -835,10 +894,13 @@ static int CeedOperatorAssembleLinearDiagonal_Ref(CeedOperator op,
835
894
836
895
// Assemble local operator diagonal
837
896
ierr = CeedVectorSetValue (* assembled , 0.0 ); CeedChk (ierr );
838
- ierr = CeedElemRestrictionApply (rstrout , CEED_TRANSPOSE , elemdiag ,
897
+ ierr = CeedElemRestrictionApply (diagrstr , CEED_TRANSPOSE , elemdiag ,
839
898
* assembled , request ); CeedChk (ierr );
840
899
841
900
// Cleanup
901
+ if (pointBlock ) {
902
+ ierr = CeedElemRestrictionDestroy (& diagrstr ); CeedChk (ierr );
903
+ }
842
904
ierr = CeedVectorDestroy (& assembledqf ); CeedChk (ierr );
843
905
ierr = CeedVectorDestroy (& elemdiag ); CeedChk (ierr );
844
906
ierr = CeedFree (& emodein ); CeedChk (ierr );
@@ -848,6 +910,22 @@ static int CeedOperatorAssembleLinearDiagonal_Ref(CeedOperator op,
848
910
return 0 ;
849
911
}
850
912
913
+ //------------------------------------------------------------------------------
914
+ // Assemble Linear Diagonal
915
+ //------------------------------------------------------------------------------
916
+ static int CeedOperatorAssembleLinearDiagonal_Ref (CeedOperator op ,
917
+ CeedVector * assembled , CeedRequest * request ) {
918
+ return CeedOperatorAssembleDiagonalCore_Ref (op , assembled , request , false);
919
+ }
920
+
921
+ //------------------------------------------------------------------------------
922
+ // Assemble Linear Point Block Diagonal
923
+ //------------------------------------------------------------------------------
924
+ static int CeedOperatorAssembleLinearPointBlockDiagonal_Ref (CeedOperator op ,
925
+ CeedVector * assembled , CeedRequest * request ) {
926
+ return CeedOperatorAssembleDiagonalCore_Ref (op , assembled , request , true);
927
+ }
928
+
851
929
//------------------------------------------------------------------------------
852
930
// Create FDM Element Inverse
853
931
//------------------------------------------------------------------------------
@@ -1101,6 +1179,10 @@ int CeedOperatorCreate_Ref(CeedOperator op) {
1101
1179
ierr = CeedSetBackendFunction (ceed , "Operator" , op , "AssembleLinearDiagonal" ,
1102
1180
CeedOperatorAssembleLinearDiagonal_Ref );
1103
1181
CeedChk (ierr );
1182
+ ierr = CeedSetBackendFunction (ceed , "Operator" , op ,
1183
+ "AssembleLinearPointBlockDiagonal" ,
1184
+ CeedOperatorAssembleLinearPointBlockDiagonal_Ref );
1185
+ CeedChk (ierr );
1104
1186
ierr = CeedSetBackendFunction (ceed , "Operator" , op , "CreateFDMElementInverse" ,
1105
1187
CeedOperatorCreateFDMElementInverse_Ref );
1106
1188
CeedChk (ierr );
0 commit comments