Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 91 additions & 31 deletions pkg/driver/camera/camera_windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,27 @@ void printErr(HRESULT hr)
// returned pointer must be released by free() after use.
char* getCameraName(IMoniker* moniker)
{
IPropertyBag* pPropBag = nullptr;
VARIANT varName;
VariantInit(&varName);

// Try to get FriendlyName
if (SUCCEEDED(moniker->BindToStorage(nullptr, nullptr, IID_IPropertyBag, (void**)&pPropBag)))
{
if (SUCCEEDED(pPropBag->Read(L"FriendlyName", &varName, 0)))
{
std::string nameStr = utf16Decode(varName.bstrVal);
char* ret = (char*)malloc(nameStr.size() + 1);
memcpy(ret, nameStr.c_str(), nameStr.size() + 1);
VariantClear(&varName);
pPropBag->Release();
return ret;
}
pPropBag->Release();
}
VariantClear(&varName);

// Fallback to display name
LPOLESTR name;
if (FAILED(moniker->GetDisplayName(nullptr, nullptr, &name)))
return nullptr;
Expand Down Expand Up @@ -312,20 +333,10 @@ int openCamera(camera* cam, const char** errstr)
AM_MEDIA_TYPE mediaType;
memset(&mediaType, 0, sizeof(mediaType));
mediaType.majortype = MEDIATYPE_Video;
mediaType.subtype = MEDIASUBTYPE_YUY2;
// Accept any format by leaving subtype as zeros (equivalent to GUID_NULL)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible that an incompatible format is chosen?

memset(&mediaType.subtype, 0, sizeof(GUID));
mediaType.formattype = FORMAT_VideoInfo;
mediaType.bFixedSizeSamples = 1;
mediaType.cbFormat = sizeof(VIDEOINFOHEADER);

VIDEOINFOHEADER videoInfoHdr;
memset(&videoInfoHdr, 0, sizeof(VIDEOINFOHEADER));
videoInfoHdr.bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
videoInfoHdr.bmiHeader.biWidth = cam->width;
videoInfoHdr.bmiHeader.biHeight = cam->height;
videoInfoHdr.bmiHeader.biPlanes = 1;
videoInfoHdr.bmiHeader.biBitCount = 16;
videoInfoHdr.bmiHeader.biCompression = MAKEFOURCC('Y', 'U', 'Y', '2');
mediaType.pbFormat = (BYTE*)&videoInfoHdr;
// Let DirectShow negotiate format automatically
if (FAILED(grabber->SetMediaType(&mediaType)))
{
*errstr = errGrabber;
Expand Down Expand Up @@ -366,6 +377,9 @@ int openCamera(camera* cam, const char** errstr)
safeRelease(&src);
safeRelease(&dst);

// FIX: Don't connect to null renderer
// Null renderer causes DirectShow to pause after one frame
Comment on lines +380 to +381
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought output pins should be terminated by connecting to null renderer when I wrote this long ago. (I'm not familiar with windows API)
If it's not necessary, please remove the code instead of commenting out.

/*
end = getPin(grabberFilter, PINDIR_OUTPUT);
nul = getPin(nullFilter, PINDIR_INPUT);
if (end == nullptr || nul == nullptr ||
Expand All @@ -377,6 +391,7 @@ int openCamera(camera* cam, const char** errstr)

safeRelease(&end);
safeRelease(&nul);
*/

safeRelease(&nullFilter);
safeRelease(&captureFilter);
Expand Down Expand Up @@ -422,30 +437,75 @@ HRESULT SampleGrabberCallback::BufferCB(double sampleTime, BYTE* buf, LONG len)
{
BYTE* gobuf = (BYTE*)cam_->buf;
const int nPix = cam_->width * cam_->height;
if (len > nPix * 2)
const int expectedNV12 = nPix + nPix / 2;
const int expectedYUY2 = nPix * 2;

if (abs(len - expectedNV12) <= 10)
{
fprintf(stderr, "Wrong frame buffer size: %d > %d\n", len, nPix * 2);
return S_OK;
// NV12 → I420: Copy Y, de-interleave UV
memcpy(gobuf, buf, nPix);
const BYTE* uv = buf + nPix;
BYTE* u = gobuf + nPix;
BYTE* v = u + nPix / 4;

int uvSize = nPix / 4;
int i = 0;

// Process 4 pixels at a time
for (; i < uvSize - 3; i += 4)
{
u[i] = uv[i * 2];
v[i] = uv[i * 2 + 1];
u[i + 1] = uv[(i + 1) * 2];
v[i + 1] = uv[(i + 1) * 2 + 1];
u[i + 2] = uv[(i + 2) * 2];
v[i + 2] = uv[(i + 2) * 2 + 1];
u[i + 3] = uv[(i + 3) * 2];
v[i + 3] = uv[(i + 3) * 2 + 1];
}

// Handle remaining pixels
for (; i < uvSize; i++)
{
u[i] = uv[i * 2];
v[i] = uv[i * 2 + 1];
}
}
int yi = 0;
int cbi = cam_->width * cam_->height;
int cri = cbi + cbi / 2;
// Pack as I422
for (int y = 0; y < cam_->height; ++y)
else if (abs(len - expectedYUY2) <= 10)
{
int j = y * cam_->width * 2;
for (int x = 0; x < cam_->width / 2; ++x)
// YUY2 → I420: Extract Y, subsample and average UV vertically
BYTE* y = gobuf;
BYTE* u = gobuf + nPix;
BYTE* v = u + nPix / 4;

for (int row = 0; row < cam_->height; row += 2)
{
gobuf[yi] = buf[j];
gobuf[cbi] = buf[j + 1];
gobuf[yi + 1] = buf[j + 2];
gobuf[cri] = buf[j + 3];
j += 4;
yi += 2;
cbi++;
cri++;
const BYTE* src1 = buf + row * cam_->width * 2;
const BYTE* src2 = src1 + cam_->width * 2;

for (int col = 0; col < cam_->width; col += 2)
{
y[0] = src1[0];
y[1] = src1[2];
y[cam_->width] = src2[0];
y[cam_->width + 1] = src2[2];
y += 2;

*u++ = (src1[1] + src2[1]) / 2;
*v++ = (src1[3] + src2[3]) / 2;

src1 += 4;
src2 += 4;
}
y += cam_->width;
}
}
else
{
fprintf(stderr, "Unexpected buffer size: %d (expected NV12=%d or YUY2=%d)\n",
len, expectedNV12, expectedYUY2);
return S_OK;
Comment on lines +505 to +507
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the whole program should fail instead of ignoring the data if an incompatible format is selected.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree

}

imageCallback((size_t)cam_);
return S_OK;
Expand Down
43 changes: 27 additions & 16 deletions pkg/driver/camera/camera_windows.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package camera

// #cgo LDFLAGS: -lstrmiids -lole32 -lquartz
// #cgo LDFLAGS: -lstrmiids -lole32 -loleaut32 -lquartz
// #include <dshow.h>
// #include "camera_windows.hpp"
import "C"
Expand Down Expand Up @@ -59,13 +59,14 @@ func Initialize() {
}

func (c *camera) Open() error {
c.ch = make(chan []byte)
c.ch = make(chan []byte, 2)
c.cam = &C.camera{
name: C.CString(c.name),
}

var errStr *C.char
if C.listResolution(c.cam, &errStr) != 0 {
C.free(unsafe.Pointer(c.cam.name))
return fmt.Errorf("failed to open device: %s", C.GoString(errStr))
}

Expand All @@ -75,36 +76,45 @@ func (c *camera) Open() error {
//export imageCallback
func imageCallback(cam uintptr) {
callbacksMu.RLock()
cb, ok := callbacks[uintptr(unsafe.Pointer(cam))]
cb, ok := callbacks[cam]
callbacksMu.RUnlock()
if !ok {
return
}

copy(cb.bufGo, cb.buf)
cb.ch <- cb.bufGo
select {
case cb.ch <- cb.bufGo:
default:
// Channel closed or full, skip frame
}
}

func (c *camera) Close() error {
// Remove from callbacks first to stop receiving frames
callbacksMu.Lock()
key := uintptr(unsafe.Pointer(c.cam))
if _, ok := callbacks[key]; ok {
delete(callbacks, key)
}
delete(callbacks, key)
callbacksMu.Unlock()
close(c.ch)

// Stop camera before closing channel
if c.cam != nil {
C.free(unsafe.Pointer(c.cam.name))
C.freeCamera(c.cam)
C.free(unsafe.Pointer(c.cam.name))
c.cam = nil
}

// Now safe to close channel
if c.ch != nil {
close(c.ch)
}

return nil
}

func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) {
nPix := p.Width * p.Height
c.buf = make([]byte, nPix*2) // for YUY2
c.buf = make([]byte, nPix*2)
c.bufGo = make([]byte, nPix*2)
c.cam.width = C.int(p.Width)
c.cam.height = C.int(p.Height)
Expand All @@ -127,11 +137,11 @@ func (c *camera) VideoRecord(p prop.Media) (video.Reader, error) {
return nil, func() {}, io.EOF
}
img.Y = b[:nPix]
img.Cb = b[nPix : nPix+nPix/2]
img.Cr = b[nPix+nPix/2 : nPix*2]
img.Cb = b[nPix : nPix+nPix/4]
img.Cr = b[nPix+nPix/4 : nPix+nPix/2]
img.YStride = p.Width
img.CStride = p.Width / 2
img.SubsampleRatio = image.YCbCrSubsampleRatio422
img.SubsampleRatio = image.YCbCrSubsampleRatio420
img.Rect = image.Rect(0, 0, p.Width, p.Height)
return img, func() {}, nil
})
Expand All @@ -142,8 +152,8 @@ func (c *camera) Properties() []prop.Media {
properties := []prop.Media{}
for i := 0; i < int(c.cam.numProps); i++ {
p := C.getProp(c.cam, C.int(i))
// TODO: support other FOURCC
if p.fcc == fourccYUY2 {
// Support both YUY2 and NV12 formats
if p.fcc == fourccYUY2 || p.fcc == fourccNV12 {
properties = append(properties, prop.Media{
Video: prop.Video{
Width: int(p.width),
Expand All @@ -157,5 +167,6 @@ func (c *camera) Properties() []prop.Media {
}

const (
fourccYUY2 = 0x32595559
fourccYUY2 = 0x32595559 // 'YUY2'
fourccNV12 = 0x3231564E // 'NV12'
)
Loading