diff --git a/pkg/driver/camera/camera_windows.cpp b/pkg/driver/camera/camera_windows.cpp index 061f21d6..3e22bc00 100644 --- a/pkg/driver/camera/camera_windows.cpp +++ b/pkg/driver/camera/camera_windows.cpp @@ -33,6 +33,27 @@ void printErr(HRESULT hr) // returned pointer must be released by free() after use. char* getCameraName(IMoniker* moniker) { + IPropertyBag* pPropBag = nullptr; + VARIANT varName; + VariantInit(&varName); + + // Try to get FriendlyName + if (SUCCEEDED(moniker->BindToStorage(nullptr, nullptr, IID_IPropertyBag, (void**)&pPropBag))) + { + if (SUCCEEDED(pPropBag->Read(L"FriendlyName", &varName, 0))) + { + std::string nameStr = utf16Decode(varName.bstrVal); + char* ret = (char*)malloc(nameStr.size() + 1); + memcpy(ret, nameStr.c_str(), nameStr.size() + 1); + VariantClear(&varName); + pPropBag->Release(); + return ret; + } + pPropBag->Release(); + } + VariantClear(&varName); + + // Fallback to display name LPOLESTR name; if (FAILED(moniker->GetDisplayName(nullptr, nullptr, &name))) return nullptr; @@ -312,20 +333,10 @@ int openCamera(camera* cam, const char** errstr) AM_MEDIA_TYPE mediaType; memset(&mediaType, 0, sizeof(mediaType)); mediaType.majortype = MEDIATYPE_Video; - mediaType.subtype = MEDIASUBTYPE_YUY2; + // Accept any format by leaving subtype as zeros (equivalent to GUID_NULL) + memset(&mediaType.subtype, 0, sizeof(GUID)); mediaType.formattype = FORMAT_VideoInfo; - mediaType.bFixedSizeSamples = 1; - mediaType.cbFormat = sizeof(VIDEOINFOHEADER); - - VIDEOINFOHEADER videoInfoHdr; - memset(&videoInfoHdr, 0, sizeof(VIDEOINFOHEADER)); - videoInfoHdr.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - videoInfoHdr.bmiHeader.biWidth = cam->width; - videoInfoHdr.bmiHeader.biHeight = cam->height; - videoInfoHdr.bmiHeader.biPlanes = 1; - videoInfoHdr.bmiHeader.biBitCount = 16; - videoInfoHdr.bmiHeader.biCompression = MAKEFOURCC('Y', 'U', 'Y', '2'); - mediaType.pbFormat = (BYTE*)&videoInfoHdr; + // Let DirectShow negotiate format automatically if (FAILED(grabber->SetMediaType(&mediaType))) { *errstr = errGrabber; @@ -366,6 +377,9 @@ int openCamera(camera* cam, const char** errstr) safeRelease(&src); safeRelease(&dst); + // FIX: Don't connect to null renderer + // Null renderer causes DirectShow to pause after one frame + /* end = getPin(grabberFilter, PINDIR_OUTPUT); nul = getPin(nullFilter, PINDIR_INPUT); if (end == nullptr || nul == nullptr || @@ -377,6 +391,7 @@ int openCamera(camera* cam, const char** errstr) safeRelease(&end); safeRelease(&nul); + */ safeRelease(&nullFilter); safeRelease(&captureFilter); @@ -422,30 +437,75 @@ HRESULT SampleGrabberCallback::BufferCB(double sampleTime, BYTE* buf, LONG len) { BYTE* gobuf = (BYTE*)cam_->buf; const int nPix = cam_->width * cam_->height; - if (len > nPix * 2) + const int expectedNV12 = nPix + nPix / 2; + const int expectedYUY2 = nPix * 2; + + if (abs(len - expectedNV12) <= 10) { - fprintf(stderr, "Wrong frame buffer size: %d > %d\n", len, nPix * 2); - return S_OK; + // NV12 → I420: Copy Y, de-interleave UV + memcpy(gobuf, buf, nPix); + const BYTE* uv = buf + nPix; + BYTE* u = gobuf + nPix; + BYTE* v = u + nPix / 4; + + int uvSize = nPix / 4; + int i = 0; + + // Process 4 pixels at a time + for (; i < uvSize - 3; i += 4) + { + u[i] = uv[i * 2]; + v[i] = uv[i * 2 + 1]; + u[i + 1] = uv[(i + 1) * 2]; + v[i + 1] = uv[(i + 1) * 2 + 1]; + u[i + 2] = uv[(i + 2) * 2]; + v[i + 2] = uv[(i + 2) * 2 + 1]; + u[i + 3] = uv[(i + 3) * 2]; + v[i + 3] = uv[(i + 3) * 2 + 1]; + } + + // Handle remaining pixels + for (; i < uvSize; i++) + { + u[i] = uv[i * 2]; + v[i] = uv[i * 2 + 1]; + } } - int yi = 0; - int cbi = cam_->width * cam_->height; - int cri = cbi + cbi / 2; - // Pack as I422 - for (int y = 0; y < cam_->height; ++y) + else if (abs(len - expectedYUY2) <= 10) { - int j = y * cam_->width * 2; - for (int x = 0; x < cam_->width / 2; ++x) + // YUY2 → I420: Extract Y, subsample and average UV vertically + BYTE* y = gobuf; + BYTE* u = gobuf + nPix; + BYTE* v = u + nPix / 4; + + for (int row = 0; row < cam_->height; row += 2) { - gobuf[yi] = buf[j]; - gobuf[cbi] = buf[j + 1]; - gobuf[yi + 1] = buf[j + 2]; - gobuf[cri] = buf[j + 3]; - j += 4; - yi += 2; - cbi++; - cri++; + const BYTE* src1 = buf + row * cam_->width * 2; + const BYTE* src2 = src1 + cam_->width * 2; + + for (int col = 0; col < cam_->width; col += 2) + { + y[0] = src1[0]; + y[1] = src1[2]; + y[cam_->width] = src2[0]; + y[cam_->width + 1] = src2[2]; + y += 2; + + *u++ = (src1[1] + src2[1]) / 2; + *v++ = (src1[3] + src2[3]) / 2; + + src1 += 4; + src2 += 4; + } + y += cam_->width; } } + else + { + fprintf(stderr, "Unexpected buffer size: %d (expected NV12=%d or YUY2=%d)\n", + len, expectedNV12, expectedYUY2); + return S_OK; + } imageCallback((size_t)cam_); return S_OK; diff --git a/pkg/driver/camera/camera_windows.go b/pkg/driver/camera/camera_windows.go index c8d9a43c..3d8b974e 100644 --- a/pkg/driver/camera/camera_windows.go +++ b/pkg/driver/camera/camera_windows.go @@ -1,6 +1,6 @@ package camera -// #cgo LDFLAGS: -lstrmiids -lole32 -lquartz +// #cgo LDFLAGS: -lstrmiids -lole32 -loleaut32 -lquartz // #include // #include "camera_windows.hpp" import "C" @@ -59,13 +59,14 @@ func Initialize() { } func (c *camera) Open() error { - c.ch = make(chan []byte) + c.ch = make(chan []byte, 2) c.cam = &C.camera{ name: C.CString(c.name), } var errStr *C.char if C.listResolution(c.cam, &errStr) != 0 { + C.free(unsafe.Pointer(c.cam.name)) return fmt.Errorf("failed to open device: %s", C.GoString(errStr)) } @@ -75,36 +76,45 @@ func (c *camera) Open() error { //export imageCallback func imageCallback(cam uintptr) { callbacksMu.RLock() - cb, ok := callbacks[uintptr(unsafe.Pointer(cam))] + cb, ok := callbacks[cam] callbacksMu.RUnlock() if !ok { return } copy(cb.bufGo, cb.buf) - cb.ch <- cb.bufGo + select { + case cb.ch <- cb.bufGo: + default: + // Channel closed or full, skip frame + } } func (c *camera) Close() error { + // Remove from callbacks first to stop receiving frames callbacksMu.Lock() key := uintptr(unsafe.Pointer(c.cam)) - if _, ok := callbacks[key]; ok { - delete(callbacks, key) - } + delete(callbacks, key) callbacksMu.Unlock() - close(c.ch) + // Stop camera before closing channel if c.cam != nil { - C.free(unsafe.Pointer(c.cam.name)) C.freeCamera(c.cam) + C.free(unsafe.Pointer(c.cam.name)) c.cam = nil } + + // Now safe to close channel + if c.ch != nil { + close(c.ch) + } + return nil } func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) { nPix := p.Width * p.Height - c.buf = make([]byte, nPix*2) // for YUY2 + c.buf = make([]byte, nPix*2) c.bufGo = make([]byte, nPix*2) c.cam.width = C.int(p.Width) c.cam.height = C.int(p.Height) @@ -127,11 +137,11 @@ func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) { return nil, func() {}, io.EOF } img.Y = b[:nPix] - img.Cb = b[nPix : nPix+nPix/2] - img.Cr = b[nPix+nPix/2 : nPix*2] + img.Cb = b[nPix : nPix+nPix/4] + img.Cr = b[nPix+nPix/4 : nPix+nPix/2] img.YStride = p.Width img.CStride = p.Width / 2 - img.SubsampleRatio = image.YCbCrSubsampleRatio422 + img.SubsampleRatio = image.YCbCrSubsampleRatio420 img.Rect = image.Rect(0, 0, p.Width, p.Height) return img, func() {}, nil }) @@ -142,8 +152,8 @@ func (c *camera) Properties() []prop.Media { properties := []prop.Media{} for i := 0; i < int(c.cam.numProps); i++ { p := C.getProp(c.cam, C.int(i)) - // TODO: support other FOURCC - if p.fcc == fourccYUY2 { + // Support both YUY2 and NV12 formats + if p.fcc == fourccYUY2 || p.fcc == fourccNV12 { properties = append(properties, prop.Media{ Video: prop.Video{ Width: int(p.width), @@ -157,5 +167,6 @@ func (c *camera) Properties() []prop.Media { } const ( - fourccYUY2 = 0x32595559 + fourccYUY2 = 0x32595559 // 'YUY2' + fourccNV12 = 0x3231564E // 'NV12' )