Skip to content

Commit

Permalink
Use buffer pool on avs+ MT.
Browse files Browse the repository at this point in the history
also, disable AVX2 always on Avisynth2.6.
  • Loading branch information
chikuzen committed May 25, 2016
1 parent 254a502 commit 9196b41
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 27 deletions.
29 changes: 22 additions & 7 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ GENERAL INFO:

Syntax=>

TMM2(clip, int mode, int order, int field, int length, int mtype,
TMM2(clip, int mode, int order, int field, int length, int mtype,
int ttype, int mtqL, int mthL, int mtqC, int mthC, int nt,
int minthresh, int maxthresh, int cstr, int opt)

Expand Down Expand Up @@ -173,20 +173,35 @@ PARAMETERS:

Controls which cpu optimizations are used for create motion masks.

0 - Use C++ routine.
1 - Use SSE2 routine if possible. When SSE2 can't be used, fallback to 0.
others - Use AVX2 routine if possible. When AVX2 can't be used, fallback to 1.

Default: -1 (int)
0 - Use C++ routine.
1 - Use SSE2 routine if possible. When SSE2 can't be used, fallback to 0.
others - Use AVX2 routine if possible. When AVX2 can't be used, fallback to 1.

Default: -1 (int)


NOTE:
- TMM2_avx2.dll is compiled with /arch:AVX2.

- On Avisynth2.6, AVX2 is always disabled even if you use TMM2_avx2.dll.

- On Avisynth+ MT, TMM2 is set as MT_NICE_FILTER automatically.
You don't have to set SetFilterMTMode() yourself for this filter.

- This filter requires appropriate memory alignments.
Thus, if you want to crop the left side of your source clip before this filter,
you have to set crop(align=true).


CHANGE LIST:


v0.0 - (2015-05-20)
v0.1 - (2016-05-25)
+ Use buffer pool on avs+ MT.
+ Disable AVX2 always on Avisynth2.6.


v0.0 - (2016-05-20)
+ initial release


Expand Down
27 changes: 20 additions & 7 deletions src/CreateMM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,9 @@ combine_masks_simd(uint8_t* dstp, const uint8_t* sqp, const uint8_t* shp,
}


CreateMM::CreateMM(PClip mm1, PClip mm2, int _cstr, arch_t arch) :
GVFmod(mm1, arch), mmask2(mm2), cstr(_cstr), simd(arch != NO_SIMD)
CreateMM::CreateMM(PClip mm1, PClip mm2, int _cstr, arch_t arch, bool ip) :
GVFmod(mm1, arch), mmask2(mm2), cstr(_cstr), simd(arch != NO_SIMD),
isPlus(ip)
{
vi.height /= 2;

Expand All @@ -197,21 +198,33 @@ CreateMM::CreateMM(PClip mm1, PClip mm2, int _cstr, arch_t arch) :


struct AndBuff {
ise_t* env;
bool isPlus;
void* orig;
uint8_t* am0;
uint8_t* am1;
const int pitch;

AndBuff(int width, int height, size_t align) :
pitch((width + 2 + align - 1) & ~(align - 1))
AndBuff(int width, int height, size_t align, bool is_plus, ise_t* e) :
pitch((width + 2 + align - 1) & ~(align - 1)), env(e), isPlus(is_plus)
{
orig = _mm_malloc(pitch * (height * 2 + 1), align);
size_t size = pitch * (height * 2 + 1);
if (isPlus) {
orig = static_cast<IScriptEnvironment2*>(
env)->Allocate(size, align, AVS_POOLED_ALLOC);
} else {
orig = _mm_malloc(size, align);
}
am0 = reinterpret_cast<uint8_t*>(orig) + pitch;
am1 = am0 + pitch * height;
}
~AndBuff()
{
_mm_free(orig);
if (isPlus) {
static_cast<IScriptEnvironment2*>(env)->Free(orig);
} else {
_mm_free(orig);
}
orig = nullptr;
}
};
Expand All @@ -224,7 +237,7 @@ PVideoFrame __stdcall CreateMM::GetFrame(int n, ise_t* env)
auto src0 = child->GetFrame(n, env);
auto dst = env->NewVideoFrame(vi, align);

auto buff = AndBuff(vi.width, vi.height, align);
auto buff = AndBuff(vi.width, vi.height, align, isPlus, env);

if (!buff.orig) {
env->ThrowError("TMM: failed to allocate AndBuff.");
Expand Down
6 changes: 5 additions & 1 deletion src/TMM2.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#include <avisynth.h>


#define TMM2_VERSION "0.1"



typedef IScriptEnvironment ise_t;

Expand Down Expand Up @@ -104,6 +107,7 @@ class CreateMM : public GVFmod {
PClip mmask2;
const int cstr;
const bool simd;
const bool isPlus;

void(__stdcall *and_masks)(
uint8_t* dstp0, uint8_t* dstp1, const uint8_t* srcp0,
Expand All @@ -117,7 +121,7 @@ class CreateMM : public GVFmod {
const int cstr);

public:
CreateMM(PClip mm1, PClip mm2, int cstr, arch_t arch);
CreateMM(PClip mm1, PClip mm2, int cstr, arch_t arch, bool is_avsplus);
~CreateMM() {}
PVideoFrame __stdcall GetFrame(int n, ise_t* env);
};
Expand Down
22 changes: 13 additions & 9 deletions src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,23 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
extern bool has_sse2();
extern bool has_avx2();

static inline arch_t get_arch(int opt)
static inline arch_t get_arch(int opt, bool is_avsplus)
{
if (opt == 0 || !has_sse2()) {
return NO_SIMD;
}
#if !defined(__AVX2__)
return USE_SSE2;
#else
if (opt == 1 || !has_avx2()) {
if (opt == 1 || !has_avx2() || !is_avsplus) {
return USE_SSE2;
}
return USE_AVX2;
#endif
}


static AVSValue __cdecl create_tmm(AVSValue args, void*, ise_t* env)
static AVSValue __cdecl create_tmm(AVSValue args, void* user_data, ise_t* env)
{
try {
PClip orig = args[0].AsClip();
Expand Down Expand Up @@ -93,7 +93,8 @@ static AVSValue __cdecl create_tmm(AVSValue args, void*, ise_t* env)
int minth = clamp(args[12].AsInt(4), 0, 255);
int maxth = clamp(args[13].AsInt(75), 0, 255);
int cstr = clamp(args[14].AsInt(4), 0, 8);
arch_t arch = get_arch(args[15].AsInt(-1));
bool is_avsplus = user_data != nullptr;
arch_t arch = get_arch(args[15].AsInt(-1), is_avsplus);

orig = env->Invoke("SeparateFields", orig).AsClip();
const char* filter[] = { "SelectEven", "SelectOdd" };
Expand All @@ -116,10 +117,10 @@ static AVSValue __cdecl create_tmm(AVSValue args, void*, ise_t* env)
btmf0 = env->Invoke("InternalCache", btmf0).AsClip();
PClip btmf1 = new MotionMask(btmf, minth, maxth, nt, 2, arch);

topf = new CreateMM(topf0, topf1, cstr, arch);
topf = new CreateMM(topf0, topf1, cstr, arch, is_avsplus);
topf = env->Invoke("InternalCache", topf).AsClip();

btmf = new CreateMM(btmf0, btmf1, cstr, arch);
btmf = new CreateMM(btmf0, btmf1, cstr, arch, is_avsplus);
btmf = env->Invoke("InternalCache", btmf).AsClip();

return new BuildMM(topf, btmf, mode, order, field, length, mtype, arch, env);
Expand All @@ -138,6 +139,9 @@ extern "C" __declspec(dllexport) const char* __stdcall
AvisynthPluginInit3(ise_t* env, const AVS_Linkage* const vectors)
{
AVS_linkage = vectors;

void* is_avsplus = env->FunctionExists("SetFilterMTMode") ? "true" : nullptr;

const char* args =
"c" // 0
"[mode]i" // 1
Expand All @@ -156,12 +160,12 @@ AvisynthPluginInit3(ise_t* env, const AVS_Linkage* const vectors)
"[cstr]i" //14
"[opt]i"; //15

env->AddFunction("TMM2", args, create_tmm, nullptr);
env->AddFunction("TMM2", args, create_tmm, is_avsplus);

if (env->FunctionExists("SetFilterMTMode")) {
if (is_avsplus != nullptr) {
static_cast<IScriptEnvironment2*>(
env)->SetFilterMTMode("TMM2", MT_NICE_FILTER, true);
}

return "TMM for avs2.6/avs+";
return "TMM for avs2.6/avs+ ver. " TMM2_VERSION;
}
4 changes: 2 additions & 2 deletions src/proc_thmask.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ proc_c(uint8_t* dqp, const uint8_t* srcp, const int pitch,
}
s0 = s1;
s1 = s2;
s2 += (y < height - 1) ? pitch : -pitch;
s2 += (y < height - 2) ? pitch : -pitch;
dqp += pitch;
dhp += pitch;
}
Expand Down Expand Up @@ -257,7 +257,7 @@ proc_simd(uint8_t* dqp, const uint8_t* srcp, const int pitch,
}
s0 = s1;
s1 = s2;
s2 += (y < height - 1) ? pitch : -pitch;
s2 += (y < height - 2) ? pitch : -pitch;
dqp += pitch;
dhp += pitch;
}
Expand Down
2 changes: 1 addition & 1 deletion src/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ static F_INLINE V cmpge(const V& x, const V& y)
template <typename V>
static F_INLINE V cmpneq(const V& x, const V& y)
{
return or_reg(cmpeq(x, y), cmpeq(x, x));
return xor_reg(cmpeq(x, y), cmpeq(x, x));
}

template <typename V>
Expand Down

0 comments on commit 9196b41

Please sign in to comment.