From 58fd583d8e5ad40e8d40aa463b6e88f30f9b1308 Mon Sep 17 00:00:00 2001 From: quang_neo Date: Wed, 7 Sep 2022 10:05:41 +0700 Subject: [PATCH] Allow option to tolerate PixelData VL and expected length mismatches. (#245) This change introduces a ParseOption that allows PixelData element VL and expected lengths to mismatch without returning an error. If the given ParseOption is set, the PixelData with the aforementioned mismatch will have its raw bytes read, dropped into an encapsulated Frame, and have that Frame wrapped in a PixelDataInfo with a ParseErr property set to an error describing this scenario. (Commit description/summary set by suyashkumar@). Co-authored-by: quang18 --- cmd/dicomutil/main.go | 18 +++++--- dataset.go | 4 +- dataset_test.go | 4 +- element.go | 20 ++++++-- parse.go | 27 +++++++---- read.go | 103 +++++++++++++++++++++++++++++------------- read_test.go | 47 +++++++++++++++++-- 7 files changed, 166 insertions(+), 57 deletions(-) diff --git a/cmd/dicomutil/main.go b/cmd/dicomutil/main.go index 79797817..00d52651 100644 --- a/cmd/dicomutil/main.go +++ b/cmd/dicomutil/main.go @@ -21,10 +21,11 @@ import ( var GitVersion = "unknown" var ( - version = flag.Bool("version", false, "print current version and exit") - filepath = flag.String("path", "", "path") - extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability") - printJSON = flag.Bool("json", false, "Print dataset as JSON") + version = flag.Bool("version", false, "print current version and exit") + filepath = flag.String("path", "", "path") + extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability") + printJSON = flag.Bool("json", false, "Print dataset as JSON") + allowPixelDataVLMismatch = flag.Bool("allow-pixel-data-mismatch", false, "Allows the pixel data mismatch") ) // FrameBufferSize represents the size of the *Frame buffered channel for streaming calls @@ -57,7 +58,11 @@ func main() { if *extractImagesStream { ds = parseWithStreaming(f, info.Size()) } else { - data, err := dicom.Parse(f, info.Size(), nil) + var opts []dicom.ParseOption + if *allowPixelDataVLMismatch { + opts = append(opts, dicom.AllowMismatchPixelDataLength()) + } + data, err := dicom.Parse(f, info.Size(), nil, opts...) if err != nil { log.Fatalf("error parsing data: %v", err) } @@ -131,7 +136,8 @@ func writeStreamingFrames(frameChan chan *frame.Frame, doneWG *sync.WaitGroup) { func generateImage(fr *frame.Frame, frameIndex int, frameSuffix string, wg *sync.WaitGroup) { i, err := fr.GetImage() if err != nil { - log.Fatal("Error while getting image") + fmt.Printf("Error while getting image: %v\n", err) + return } ext := ".jpg" diff --git a/dataset.go b/dataset.go index 6009f5e0..c31a6ee4 100644 --- a/dataset.go +++ b/dataset.go @@ -23,6 +23,8 @@ var ErrorElementNotFound = errors.New("element not found") // within this Dataset (including Elements nested within Sequences). type Dataset struct { Elements []*Element `json:"elements"` + + opts parseOptSet } // FindElementByTag searches through the dataset and returns a pointer to the matching element. @@ -183,7 +185,7 @@ func (d *Dataset) String() string { b.WriteString(fmt.Sprintf("%s VR: %s\n", tabs, elem.e.ValueRepresentation)) b.WriteString(fmt.Sprintf("%s VR Raw: %s\n", tabs, elem.e.RawValueRepresentation)) b.WriteString(fmt.Sprintf("%s VL: %d\n", tabs, elem.e.ValueLength)) - b.WriteString(fmt.Sprintf("%s Value: %d\n", tabs, elem.e.Value)) + b.WriteString(fmt.Sprintf("%s Value: %s\n", tabs, elem.e.Value.String())) b.WriteString(fmt.Sprintf("%s]\n\n", tabs)) } return b.String() diff --git a/dataset_test.go b/dataset_test.go index 0607e21e..30cf9269 100644 --- a/dataset_test.go +++ b/dataset_test.go @@ -268,7 +268,7 @@ func ExampleDataset_String() { // VR: VRInt32List // VR Raw: UL // VL: 0 - // Value: &{[100]} + // Value: [100] // ] // // [ @@ -277,7 +277,7 @@ func ExampleDataset_String() { // VR: VRInt32List // VR Raw: UL // VL: 0 - // Value: &{[200]} + // Value: [200] // ] } diff --git a/element.go b/element.go index 4732f740..9512ed6f 100644 --- a/element.go +++ b/element.go @@ -296,8 +296,12 @@ func (s *sequencesValue) MarshalJSON() ([]byte, error) { // PixelDataInfo is a representation of DICOM PixelData. type PixelDataInfo struct { - Frames []frame.Frame - IsEncapsulated bool `json:"isEncapsulated"` + Frames []frame.Frame + // ParseErr indicates if there was an error when reading this Frame from the DICOM. + // If this is set, this means fallback behavior was triggered to blindly write the PixelData bytes to an encapsulated frame. + // The ParseErr will contain details about the specific error encountered. + ParseErr error `json:"parseErr"` + IsEncapsulated bool `json:"isEncapsulated"` Offsets []uint32 } @@ -310,8 +314,16 @@ func (e *pixelDataValue) isElementValue() {} func (e *pixelDataValue) ValueType() ValueType { return PixelData } func (e *pixelDataValue) GetValue() interface{} { return e.PixelDataInfo } func (e *pixelDataValue) String() string { - // TODO: consider adding more sophisticated formatting - return "" + if len(e.Frames) == 0 { + return "empty pixel data" + } + if e.IsEncapsulated { + return fmt.Sprintf("encapsulated FramesLength=%d Frame[0] size=%d", len(e.Frames), len(e.Frames[0].EncapsulatedData.Data)) + } + if e.ParseErr != nil { + return fmt.Sprintf("parseErr err=%s FramesLength=%d Frame[0] size=%d", e.ParseErr.Error(), len(e.Frames), len(e.Frames[0].EncapsulatedData.Data)) + } + return fmt.Sprintf("FramesLength=%d FrameSize rows=%d cols=%d", len(e.Frames), e.Frames[0].NativeData.Rows, e.Frames[0].NativeData.Cols) } func (e *pixelDataValue) MarshalJSON() ([]byte, error) { diff --git a/parse.go b/parse.go index 677e63ab..b2fd4b34 100644 --- a/parse.go +++ b/parse.go @@ -50,12 +50,15 @@ var ( // has been fully parsed. Users using one of the other Parse APIs should not // need to use this. ErrorEndOfDICOM = errors.New("this indicates to the caller of Next() that the DICOM has been fully parsed") + + // ErrorMismatchPixelDataLength indicates that the size calculated from DICOM mismatch the VL. + ErrorMismatchPixelDataLength = errors.New("the size calculated from DICOM elements and the PixelData element's VL are mismatched") ) // Parse parses the entire DICOM at the input io.Reader into a Dataset of DICOM Elements. Use this if you are // looking to parse the DICOM all at once, instead of element-by-element. -func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Dataset, error) { - p, err := NewParser(in, bytesToRead, frameChan) +func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) { + p, err := NewParser(in, bytesToRead, frameChan, opts...) if err != nil { return Dataset{}, err } @@ -76,7 +79,7 @@ func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Datase // ParseFile parses the entire DICOM at the given filepath. See dicom.Parse as // well for a more generic io.Reader based API. -func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) { +func ParseFile(filepath string, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) { f, err := os.Open(filepath) if err != nil { return Dataset{}, err @@ -88,7 +91,7 @@ func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) { return Dataset{}, err } - return Parse(f, info.Size(), frameChan) + return Parse(f, info.Size(), frameChan, opts...) } // Parser is a struct that allows a user to parse Elements from a DICOM element-by-element using Next(), which may be @@ -131,7 +134,7 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame, debug.Log("NewParser: readHeader complete") } - p.dataset = Dataset{Elements: elems} + p.dataset = Dataset{Elements: elems, opts: optSet} // TODO(suyashkumar): avoid storing the metadata pointers twice (though not that expensive) p.metadata = Dataset{Elements: elems} @@ -258,16 +261,24 @@ type ParseOption func(*parseOptSet) // parseOptSet represents the flattened option set after all ParseOptions have been applied. type parseOptSet struct { skipMetadataReadOnNewParserInit bool + allowMismatchPixelDataLength bool } -func toParseOptSet(opts ...ParseOption) *parseOptSet { - optSet := &parseOptSet{} +func toParseOptSet(opts ...ParseOption) parseOptSet { + optSet := parseOptSet{} for _, opt := range opts { - opt(optSet) + opt(&optSet) } return optSet } +// AllowMismatchPixelDataLength allows parser to ignore an error when the length calculated from elements do not match with value length. +func AllowMismatchPixelDataLength() ParseOption { + return func(set *parseOptSet) { + set.allowMismatchPixelDataLength = true + } +} + // SkipMetadataReadOnNewParserInit makes NewParser skip trying to parse metadata. This will make the Parser default to implicit little endian byte order. // Any metatata tags found in the dataset will still be available when parsing. func SkipMetadataReadOnNewParserInit() ParseOption { diff --git a/read.go b/read.go index d42058ed..9e12b123 100644 --- a/read.go +++ b/read.go @@ -110,9 +110,9 @@ func readValue(r dicomio.Reader, t tag.Tag, vr string, vl uint32, isImplicit boo case tag.VRUInt16List, tag.VRUInt32List, tag.VRInt16List, tag.VRInt32List, tag.VRTagList: return readInt(r, t, vr, vl) case tag.VRSequence: - return readSequence(r, t, vr, vl) + return readSequence(r, t, vr, vl, d) case tag.VRItem: - return readSequenceItem(r, t, vr, vl) + return readSequenceItem(r, t, vr, vl, d) case tag.VRPixelData: return readPixelData(r, t, vr, vl, d, fc) case tag.VRFloat32List, tag.VRFloat64List: @@ -221,14 +221,33 @@ func fillBufferSingleBitAllocated(pixelData []int, d dicomio.Reader, bo binary.B return nil } -// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information -// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc) -func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo, - bytesRead int, err error) { +func makeErrorPixelData(reader io.Reader, vl uint32, fc chan<- *frame.Frame, parseErr error) (*PixelDataInfo, error) { + data := make([]byte, vl) + _, err := io.ReadFull(reader, data) + if err != nil { + return nil, fmt.Errorf("makeErrorPixelData: read pixelData: %w", err) + } + + f := frame.Frame{ + EncapsulatedData: frame.EncapsulatedFrame{ + Data: data, + }, + } + + if fc != nil { + fc <- &f + } image := PixelDataInfo{ - IsEncapsulated: false, + ParseErr: parseErr, + Frames: []frame.Frame{f}, } + return &image, nil +} +// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information +// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc) +func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo, + bytesToRead int, err error) { // Parse information from previously parsed attributes that are needed to parse NativeData Frames: rows, err := parsedData.FindElementByTag(tag.Rows) if err != nil { @@ -269,10 +288,38 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr debug.Logf("readNativeFrames:\nRows: %d\nCols:%d\nFrames::%d\nBitsAlloc:%d\nSamplesPerPixel:%d", MustGetInts(rows.Value)[0], MustGetInts(cols.Value)[0], nFrames, bitsAllocated, samplesPerPixel) + bytesAllocated := bitsAllocated / 8 + bytesToRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames + if bitsAllocated == 1 { + bytesToRead = pixelsPerFrame * samplesPerPixel / 8 * nFrames + } + + skipFinalPaddingByte := false + if uint32(bytesToRead) != vl { + switch { + case uint32(bytesToRead) == vl-1 && vl%2 == 0: + skipFinalPaddingByte = true + case uint32(bytesToRead) == vl-1 && vl%2 != 0: + return nil, 0, fmt.Errorf("vl=%d: %w", vl, ErrorExpectedEvenLength) + default: + // calculated bytesToRead and actual VL mismatch + if !parsedData.opts.allowMismatchPixelDataLength { + return nil, 0, fmt.Errorf("expected_vl=%d actual_vl=%d %w", bytesToRead, vl, ErrorMismatchPixelDataLength) + } + image, err := makeErrorPixelData(d, vl, fc, ErrorMismatchPixelDataLength) + if err != nil { + return nil, 0, err + } + return image, int(vl), nil + } + } + // Parse the pixels: + image := PixelDataInfo{ + IsEncapsulated: false, + } image.Frames = make([]frame.Frame, nFrames) bo := d.ByteOrder() - bytesAllocated := bitsAllocated / 8 pixelBuf := make([]byte, bytesAllocated) for frameIdx := 0; frameIdx < nFrames; frameIdx++ { // Init current frame @@ -282,28 +329,27 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr BitsPerSample: bitsAllocated, Rows: MustGetInts(rows.Value)[0], Cols: MustGetInts(cols.Value)[0], - Data: make([][]int, int(pixelsPerFrame)), + Data: make([][]int, pixelsPerFrame), }, } - buf := make([]int, int(pixelsPerFrame)*samplesPerPixel) + buf := make([]int, pixelsPerFrame*samplesPerPixel) if bitsAllocated == 1 { if err := fillBufferSingleBitAllocated(buf, d, bo); err != nil { - return nil, bytesRead, err + return nil, bytesToRead, err } - for pixel := 0; pixel < int(pixelsPerFrame); pixel++ { + for pixel := 0; pixel < pixelsPerFrame; pixel++ { for value := 0; value < samplesPerPixel; value++ { currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel] } } } else { - for pixel := 0; pixel < int(pixelsPerFrame); pixel++ { + for pixel := 0; pixel < pixelsPerFrame; pixel++ { for value := 0; value < samplesPerPixel; value++ { _, err := io.ReadFull(d, pixelBuf) if err != nil { - return nil, bytesRead, + return nil, bytesToRead, fmt.Errorf("could not read uint%d from input: %w", bitsAllocated, err) } - if bitsAllocated == 8 { buf[(pixel*samplesPerPixel)+value] = int(pixelBuf[0]) } else if bitsAllocated == 16 { @@ -311,7 +357,7 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr } else if bitsAllocated == 32 { buf[(pixel*samplesPerPixel)+value] = int(bo.Uint32(pixelBuf)) } else { - return nil, bytesRead, fmt.Errorf("unsupported BitsAllocated value of: %d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated) + return nil, bytesToRead, fmt.Errorf("bitsAllocated=%d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated) } } currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel] @@ -322,31 +368,26 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr fc <- ¤tFrame // write the current frame to the frame channel } } - - bytesRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames - if vl > 0 && uint32(bytesRead) == vl-1 { - if vl%2 != 0 { - // this error should never happen if the file conforms to the DICOM spec - return nil, bytesRead, fmt.Errorf("odd number of bytes specified for PixelData violates DICOM spec: %d : %w", vl, ErrorExpectedEvenLength) - } + if skipFinalPaddingByte { err := d.Skip(1) if err != nil { - return nil, bytesRead, fmt.Errorf("could not read padding byte: %w", err) + return nil, bytesToRead, fmt.Errorf("could not read padding byte: %w", err) } - bytesRead++ + bytesToRead++ } - return &image, bytesRead, nil + return &image, bytesToRead, nil } // readSequence reads a sequence element (VR = SQ) that contains a subset of Items. Each item contains // a set of Elements. // See http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.2.html#table_7.5-1 -func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) { +func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) { var sequences sequencesValue + seqElements := &Dataset{opts: d.opts} if vl == tag.VLUndefinedLength { for { - subElement, err := readElement(r, nil, nil) + subElement, err := readElement(r, seqElements, nil) if err != nil { // Stop reading due to error log.Println("error reading subitem, ", err) @@ -373,7 +414,7 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err return nil, err } for !r.IsLimitExhausted() { - subElement, err := readElement(r, nil, nil) + subElement, err := readElement(r, seqElements, nil) if err != nil { // TODO: option to ignore errors parsing subelements? return nil, err @@ -390,12 +431,12 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err // readSequenceItem reads an item component of a sequence dicom element and returns an Element // with a SequenceItem value. -func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) { +func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) { var sequenceItem SequenceItemValue // seqElements holds items read so far. // TODO: deduplicate with sequenceItem above - var seqElements Dataset + seqElements := Dataset{opts: d.opts} if vl == tag.VLUndefinedLength { for { diff --git a/read_test.go b/read_test.go index 563d589d..895ddd22 100644 --- a/read_test.go +++ b/read_test.go @@ -5,17 +5,19 @@ import ( "bytes" "encoding/binary" "errors" - "io" "math/rand" "strconv" "testing" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/suyashkumar/dicom/pkg/vrraw" "github.com/suyashkumar/dicom/pkg/dicomio" "github.com/suyashkumar/dicom/pkg/frame" "github.com/google/go-cmp/cmp" + "github.com/suyashkumar/dicom/pkg/tag" ) @@ -339,7 +341,42 @@ func TestReadNativeFrames(t *testing.T) { }}, data: []uint16{1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3}, expectedPixelData: nil, - expectedError: io.ErrUnexpectedEOF, + expectedError: ErrorMismatchPixelDataLength, + }, + { + Name: "redundant bytes, uint32", + existingData: Dataset{Elements: []*Element{ + mustNewElement(tag.Rows, []int{2}), + mustNewElement(tag.Columns, []int{2}), + mustNewElement(tag.NumberOfFrames, []string{"1"}), + mustNewElement(tag.BitsAllocated, []int{32}), + mustNewElement(tag.SamplesPerPixel, []int{2}), + }}, + data: []uint16{1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 2}, + expectedPixelData: nil, + expectedError: ErrorMismatchPixelDataLength, + }, + { + Name: "redundant bytes, uint32 with allowing mismatch length", + existingData: Dataset{Elements: []*Element{ + mustNewElement(tag.Rows, []int{2}), + mustNewElement(tag.Columns, []int{2}), + mustNewElement(tag.NumberOfFrames, []string{"1"}), + mustNewElement(tag.BitsAllocated, []int{32}), + mustNewElement(tag.SamplesPerPixel, []int{2}), + }, opts: parseOptSet{allowMismatchPixelDataLength: true}}, + data: []uint16{1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 1, 2, 3, 2, 2}, + expectedPixelData: &PixelDataInfo{ + ParseErr: ErrorMismatchPixelDataLength, + Frames: []frame.Frame{ + { + EncapsulatedData: frame.EncapsulatedFrame{ + Data: []byte{1, 0, 2, 0, 3, 0, 2, 0, 1, 0, 2, 0, 3, 0, 2, 0, 1, 0, 2, 0, 3, 0, 2, 0, 1, 0, 2, 0, 3, 0, 2, 0, 2, 0}, + }, + }, + }, + }, + expectedError: nil, }, { Name: "missing Columns", @@ -359,10 +396,10 @@ func TestReadNativeFrames(t *testing.T) { mustNewElement(tag.Rows, []int{5}), mustNewElement(tag.Columns, []int{2}), mustNewElement(tag.NumberOfFrames, []string{"1"}), - mustNewElement(tag.BitsAllocated, []int{2}), + mustNewElement(tag.BitsAllocated, []int{24}), mustNewElement(tag.SamplesPerPixel, []int{1}), }}, - data: []uint16{1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + data: []uint16{1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, expectedPixelData: nil, expectedError: ErrorUnsupportedBitsAllocated, }, @@ -513,7 +550,7 @@ func TestReadNativeFrames(t *testing.T) { t.Errorf("TestReadNativeFrames(%v): did not read expected number of bytes. got: %d, want: %d", tc.data, bytesRead, expectedBytes) } - if diff := cmp.Diff(tc.expectedPixelData, pixelData); diff != "" { + if diff := cmp.Diff(tc.expectedPixelData, pixelData, cmpopts.EquateErrors()); diff != "" { t.Errorf("TestReadNativeFrames(%v): unexpected diff: %v", tc.data, diff) } })