@@ -35,7 +35,7 @@ CREATE_SAMPLER(SampleTempTex0, TempTex0_RGBA8, LINEAR, MIRROR, MIRROR, MIRROR)
35
35
36
36
float GetIntensity (float3 Color)
37
37
{
38
- return dot (Color, CColor_Rec709_Coefficients );
38
+ return dot (Color, 1.0 / 3.0 );
39
39
}
40
40
41
41
float4 PS_Prefilter (CShade_VS2PS_Quad Input) : SV_TARGET0
@@ -45,21 +45,21 @@ float4 PS_Prefilter(CShade_VS2PS_Quad Input) : SV_TARGET0
45
45
float4 EdgeTex1 = Input.Tex0.xyxy + (float4 (0.0 , -1.0 , 0.0 , 1.0 ) * Delta.xyxy);
46
46
47
47
float3 Neighborhood[4 ];
48
- float3 Center = tex2D (CShade_SampleGammaTex, Input.Tex0).rgb;
49
- Neighborhood[0 ] = tex2D (CShade_SampleGammaTex, EdgeTex0.xy).rgb;
50
- Neighborhood[1 ] = tex2D (CShade_SampleGammaTex, EdgeTex0.zw).rgb;
51
- Neighborhood[2 ] = tex2D (CShade_SampleGammaTex, EdgeTex1.xy).rgb;
52
- Neighborhood[3 ] = tex2D (CShade_SampleGammaTex, EdgeTex1.zw).rgb;
48
+ float3 Center = tex2Dlod (CShade_SampleGammaTex, float4 ( Input.Tex0, 0.0 , 0.0 ) ).rgb;
49
+ Neighborhood[0 ] = tex2Dlod (CShade_SampleGammaTex, float4 ( EdgeTex0.xy, 0.0 , 0.0 ) ).rgb;
50
+ Neighborhood[1 ] = tex2Dlod (CShade_SampleGammaTex, float4 ( EdgeTex0.zw, 0.0 , 0.0 ) ).rgb;
51
+ Neighborhood[2 ] = tex2Dlod (CShade_SampleGammaTex, float4 ( EdgeTex1.xy, 0.0 , 0.0 ) ).rgb;
52
+ Neighborhood[3 ] = tex2Dlod (CShade_SampleGammaTex, float4 ( EdgeTex1.zw, 0.0 , 0.0 ) ).rgb;
53
53
54
54
// Compass edge detection on N/S/E/W
55
55
float3 Edges = 0.0 ;
56
56
Edges = max (Edges, abs (Center - Neighborhood[0 ]));
57
57
Edges = max (Edges, abs (Center - Neighborhood[1 ]));
58
58
Edges = max (Edges, abs (Center - Neighborhood[2 ]));
59
59
Edges = max (Edges, abs (Center - Neighborhood[3 ]));
60
- float EdgesLuma = smoothstep (0.0 , 0.25 , GetIntensity (Edges));
61
60
62
- return float4 (Center, EdgesLuma);
61
+ // It costs more ALU, but we should do the multiplication in the sampling pass for precision reasons
62
+ return float4 (Center, smoothstep (0.0 , 0.25 , GetIntensity (Edges)));
63
63
}
64
64
65
65
float4 PS_DLAA (CShade_VS2PS_Quad Input) : SV_TARGET0
@@ -77,17 +77,17 @@ float4 PS_DLAA(CShade_VS2PS_Quad Input) : SV_TARGET0
77
77
float4 ShortEdgeTex2 = Input.Tex0.xyxy + (float4 (-1.0 , 0.0 , 1.0 , 0.0 ) * Delta.xyxy);
78
78
float4 ShortEdgeTex3 = Input.Tex0.xyxy + (float4 (0.0 , -1.0 , 0.0 , 1.0 ) * Delta.xyxy);
79
79
80
- float4 Center = tex2D (CShade_SampleGammaTex, Input.Tex0);
80
+ float4 Center = tex2Dlod (CShade_SampleGammaTex, float4 ( Input.Tex0, 0.0 , 0.0 ) );
81
81
82
- float4 Left01 = tex2D (SampleTempTex0, ShortEdgeTex0.xy);
83
- float4 Right01 = tex2D (SampleTempTex0, ShortEdgeTex0.zw);
84
- float4 Top01 = tex2D (SampleTempTex0, ShortEdgeTex1.xy);
85
- float4 Bottom01 = tex2D (SampleTempTex0, ShortEdgeTex1.zw);
82
+ float4 Left01 = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex0.xy, 0.0 , 0.0 ) );
83
+ float4 Right01 = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex0.zw, 0.0 , 0.0 ) );
84
+ float4 Top01 = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex1.xy, 0.0 , 0.0 ) );
85
+ float4 Bottom01 = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex1.zw, 0.0 , 0.0 ) );
86
86
87
- float4 Left = tex2D (SampleTempTex0, ShortEdgeTex2.xy);
88
- float4 Right = tex2D (SampleTempTex0, ShortEdgeTex2.zw);
89
- float4 Top = tex2D (SampleTempTex0, ShortEdgeTex3.xy);
90
- float4 Bottom = tex2D (SampleTempTex0, ShortEdgeTex3.zw);
87
+ float4 Left = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex2.xy, 0.0 , 0.0 ) );
88
+ float4 Right = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex2.zw, 0.0 , 0.0 ) );
89
+ float4 Top = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex3.xy, 0.0 , 0.0 ) );
90
+ float4 Bottom = tex2Dlod (SampleTempTex0, float4 ( ShortEdgeTex3.zw, 0.0 , 0.0 ) );
91
91
92
92
float4 WH = 2.0 * (Left01 + Right01);
93
93
float4 WV = 2.0 * (Top01 + Bottom01);
@@ -126,23 +126,23 @@ float4 PS_DLAA(CShade_VS2PS_Quad Input) : SV_TARGET0
126
126
float4 LTex6 = Input.Tex0.xyxy + (float4 (-5.5 , 0.0 , 0.0 , -5.5 ) * Delta.xyxy);
127
127
float4 LTex7 = Input.Tex0.xyxy + (float4 (-7.5 , 0.0 , 0.0 , -7.5 ) * Delta.xyxy);
128
128
129
- float4 H0 = tex2D (SampleTempTex0, LTex0.xy);
130
- float4 H1 = tex2D (SampleTempTex0, LTex1.xy);
131
- float4 H2 = tex2D (SampleTempTex0, LTex2.xy);
132
- float4 H3 = tex2D (SampleTempTex0, LTex3.xy);
133
- float4 H4 = tex2D (SampleTempTex0, LTex4.xy);
134
- float4 H5 = tex2D (SampleTempTex0, LTex5.xy);
135
- float4 H6 = tex2D (SampleTempTex0, LTex6.xy);
136
- float4 H7 = tex2D (SampleTempTex0, LTex7.xy);
137
-
138
- float4 V0 = tex2D (SampleTempTex0, LTex0.zw);
139
- float4 V1 = tex2D (SampleTempTex0, LTex1.zw);
140
- float4 V2 = tex2D (SampleTempTex0, LTex2.zw);
141
- float4 V3 = tex2D (SampleTempTex0, LTex3.zw);
142
- float4 V4 = tex2D (SampleTempTex0, LTex4.zw);
143
- float4 V5 = tex2D (SampleTempTex0, LTex5.zw);
144
- float4 V6 = tex2D (SampleTempTex0, LTex6.zw);
145
- float4 V7 = tex2D (SampleTempTex0, LTex7.zw);
129
+ float4 H0 = tex2Dlod (SampleTempTex0, float4 ( LTex0.xy, 0.0 , 0.0 ) );
130
+ float4 H1 = tex2Dlod (SampleTempTex0, float4 ( LTex1.xy, 0.0 , 0.0 ) );
131
+ float4 H2 = tex2Dlod (SampleTempTex0, float4 ( LTex2.xy, 0.0 , 0.0 ) );
132
+ float4 H3 = tex2Dlod (SampleTempTex0, float4 ( LTex3.xy, 0.0 , 0.0 ) );
133
+ float4 H4 = tex2Dlod (SampleTempTex0, float4 ( LTex4.xy, 0.0 , 0.0 ) );
134
+ float4 H5 = tex2Dlod (SampleTempTex0, float4 ( LTex5.xy, 0.0 , 0.0 ) );
135
+ float4 H6 = tex2Dlod (SampleTempTex0, float4 ( LTex6.xy, 0.0 , 0.0 ) );
136
+ float4 H7 = tex2Dlod (SampleTempTex0, float4 ( LTex7.xy, 0.0 , 0.0 ) );
137
+
138
+ float4 V0 = tex2Dlod (SampleTempTex0, float4 ( LTex0.zw, 0.0 , 0.0 ) );
139
+ float4 V1 = tex2Dlod (SampleTempTex0, float4 ( LTex1.zw, 0.0 , 0.0 ) );
140
+ float4 V2 = tex2Dlod (SampleTempTex0, float4 ( LTex2.zw, 0.0 , 0.0 ) );
141
+ float4 V3 = tex2Dlod (SampleTempTex0, float4 ( LTex3.zw, 0.0 , 0.0 ) );
142
+ float4 V4 = tex2Dlod (SampleTempTex0, float4 ( LTex4.zw, 0.0 , 0.0 ) );
143
+ float4 V5 = tex2Dlod (SampleTempTex0, float4 ( LTex5.zw, 0.0 , 0.0 ) );
144
+ float4 V6 = tex2Dlod (SampleTempTex0, float4 ( LTex6.zw, 0.0 , 0.0 ) );
145
+ float4 V7 = tex2Dlod (SampleTempTex0, float4 ( LTex7.zw, 0.0 , 0.0 ) );
146
146
147
147
// In CShade, we take .rgb out of branch
148
148
float4 LongBlurH = (H0 + H1 + H2 + H3 + H4 + H5 + H6 + H7) / 8.0 ;
@@ -186,17 +186,17 @@ float4 PS_DLAA(CShade_VS2PS_Quad Input) : SV_TARGET0
186
186
187
187
// Preserve high frequencies
188
188
float4 RTex = Input.Tex0.xyxy + (Delta.xyxy * float4 (-1.5 , -1.5 , 1.5 , 1.5 ));
189
- float4 R0 = tex2D (SampleTempTex0, RTex.xw);
190
- float4 R1 = tex2D (SampleTempTex0, RTex.zw);
191
- float4 R2 = tex2D (SampleTempTex0, RTex.xy);
192
- float4 R3 = tex2D (SampleTempTex0, RTex.zy);
189
+ float4 R0 = tex2Dlod (SampleTempTex0, float4 ( RTex.xw, 0.0 , 0.0 ) );
190
+ float4 R1 = tex2Dlod (SampleTempTex0, float4 ( RTex.zw, 0.0 , 0.0 ) );
191
+ float4 R2 = tex2Dlod (SampleTempTex0, float4 ( RTex.xy, 0.0 , 0.0 ) );
192
+ float4 R3 = tex2Dlod (SampleTempTex0, float4 ( RTex.zy, 0.0 , 0.0 ) );
193
193
194
194
float4 R = (4.0 * (R0 + R1 + R2 + R3) + Center + Top01 + Bottom01 + Left01 + Right01) / 25.0 ;
195
195
Color = lerp (Color, Center, saturate (R.a * 3.0 - 1.5 ));
196
196
197
197
if (_RenderMode == 1 )
198
198
{
199
- return tex2D (SampleTempTex0, Input.Tex0).a;
199
+ return tex2Dlod (SampleTempTex0, float4 ( Input.Tex0, 0.0 , 0.0 ) ).a;
200
200
}
201
201
202
202
return CBlend_OutputChannels (float4 (Color.rgb, _CShadeAlphaFactor));
0 commit comments