Skip to content

Commit

Permalink
Allow option to tolerate PixelData VL and expected length mismatches. (
Browse files Browse the repository at this point in the history
…#245)

This change introduces a ParseOption that allows PixelData element VL and expected lengths to mismatch without returning an error. If the given ParseOption is set, the PixelData with the aforementioned mismatch will have its raw bytes read, dropped into an encapsulated Frame, and have that Frame wrapped in a PixelDataInfo with a ParseErr property set to an error describing this scenario. 

(Commit description/summary set by suyashkumar@).

Co-authored-by: quang18 <[email protected]>
  • Loading branch information
ducquangkstn and quang18 authored Sep 7, 2022
1 parent 0528e8c commit 58fd583
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 57 deletions.
18 changes: 12 additions & 6 deletions cmd/dicomutil/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ import (
var GitVersion = "unknown"

var (
version = flag.Bool("version", false, "print current version and exit")
filepath = flag.String("path", "", "path")
extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability")
printJSON = flag.Bool("json", false, "Print dataset as JSON")
version = flag.Bool("version", false, "print current version and exit")
filepath = flag.String("path", "", "path")
extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability")
printJSON = flag.Bool("json", false, "Print dataset as JSON")
allowPixelDataVLMismatch = flag.Bool("allow-pixel-data-mismatch", false, "Allows the pixel data mismatch")
)

// FrameBufferSize represents the size of the *Frame buffered channel for streaming calls
Expand Down Expand Up @@ -57,7 +58,11 @@ func main() {
if *extractImagesStream {
ds = parseWithStreaming(f, info.Size())
} else {
data, err := dicom.Parse(f, info.Size(), nil)
var opts []dicom.ParseOption
if *allowPixelDataVLMismatch {
opts = append(opts, dicom.AllowMismatchPixelDataLength())
}
data, err := dicom.Parse(f, info.Size(), nil, opts...)
if err != nil {
log.Fatalf("error parsing data: %v", err)
}
Expand Down Expand Up @@ -131,7 +136,8 @@ func writeStreamingFrames(frameChan chan *frame.Frame, doneWG *sync.WaitGroup) {
func generateImage(fr *frame.Frame, frameIndex int, frameSuffix string, wg *sync.WaitGroup) {
i, err := fr.GetImage()
if err != nil {
log.Fatal("Error while getting image")
fmt.Printf("Error while getting image: %v\n", err)
return
}

ext := ".jpg"
Expand Down
4 changes: 3 additions & 1 deletion dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ var ErrorElementNotFound = errors.New("element not found")
// within this Dataset (including Elements nested within Sequences).
type Dataset struct {
Elements []*Element `json:"elements"`

opts parseOptSet
}

// FindElementByTag searches through the dataset and returns a pointer to the matching element.
Expand Down Expand Up @@ -183,7 +185,7 @@ func (d *Dataset) String() string {
b.WriteString(fmt.Sprintf("%s VR: %s\n", tabs, elem.e.ValueRepresentation))
b.WriteString(fmt.Sprintf("%s VR Raw: %s\n", tabs, elem.e.RawValueRepresentation))
b.WriteString(fmt.Sprintf("%s VL: %d\n", tabs, elem.e.ValueLength))
b.WriteString(fmt.Sprintf("%s Value: %d\n", tabs, elem.e.Value))
b.WriteString(fmt.Sprintf("%s Value: %s\n", tabs, elem.e.Value.String()))
b.WriteString(fmt.Sprintf("%s]\n\n", tabs))
}
return b.String()
Expand Down
4 changes: 2 additions & 2 deletions dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func ExampleDataset_String() {
// VR: VRInt32List
// VR Raw: UL
// VL: 0
// Value: &{[100]}
// Value: [100]
// ]
//
// [
Expand All @@ -277,7 +277,7 @@ func ExampleDataset_String() {
// VR: VRInt32List
// VR Raw: UL
// VL: 0
// Value: &{[200]}
// Value: [200]
// ]

}
20 changes: 16 additions & 4 deletions element.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,12 @@ func (s *sequencesValue) MarshalJSON() ([]byte, error) {

// PixelDataInfo is a representation of DICOM PixelData.
type PixelDataInfo struct {
Frames []frame.Frame
IsEncapsulated bool `json:"isEncapsulated"`
Frames []frame.Frame
// ParseErr indicates if there was an error when reading this Frame from the DICOM.
// If this is set, this means fallback behavior was triggered to blindly write the PixelData bytes to an encapsulated frame.
// The ParseErr will contain details about the specific error encountered.
ParseErr error `json:"parseErr"`
IsEncapsulated bool `json:"isEncapsulated"`
Offsets []uint32
}

Expand All @@ -310,8 +314,16 @@ func (e *pixelDataValue) isElementValue() {}
func (e *pixelDataValue) ValueType() ValueType { return PixelData }
func (e *pixelDataValue) GetValue() interface{} { return e.PixelDataInfo }
func (e *pixelDataValue) String() string {
// TODO: consider adding more sophisticated formatting
return ""
if len(e.Frames) == 0 {
return "empty pixel data"
}
if e.IsEncapsulated {
return fmt.Sprintf("encapsulated FramesLength=%d Frame[0] size=%d", len(e.Frames), len(e.Frames[0].EncapsulatedData.Data))
}
if e.ParseErr != nil {
return fmt.Sprintf("parseErr err=%s FramesLength=%d Frame[0] size=%d", e.ParseErr.Error(), len(e.Frames), len(e.Frames[0].EncapsulatedData.Data))
}
return fmt.Sprintf("FramesLength=%d FrameSize rows=%d cols=%d", len(e.Frames), e.Frames[0].NativeData.Rows, e.Frames[0].NativeData.Cols)
}

func (e *pixelDataValue) MarshalJSON() ([]byte, error) {
Expand Down
27 changes: 19 additions & 8 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,15 @@ var (
// has been fully parsed. Users using one of the other Parse APIs should not
// need to use this.
ErrorEndOfDICOM = errors.New("this indicates to the caller of Next() that the DICOM has been fully parsed")

// ErrorMismatchPixelDataLength indicates that the size calculated from DICOM mismatch the VL.
ErrorMismatchPixelDataLength = errors.New("the size calculated from DICOM elements and the PixelData element's VL are mismatched")
)

// Parse parses the entire DICOM at the input io.Reader into a Dataset of DICOM Elements. Use this if you are
// looking to parse the DICOM all at once, instead of element-by-element.
func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Dataset, error) {
p, err := NewParser(in, bytesToRead, frameChan)
func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) {
p, err := NewParser(in, bytesToRead, frameChan, opts...)
if err != nil {
return Dataset{}, err
}
Expand All @@ -76,7 +79,7 @@ func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Datase

// ParseFile parses the entire DICOM at the given filepath. See dicom.Parse as
// well for a more generic io.Reader based API.
func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) {
func ParseFile(filepath string, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) {
f, err := os.Open(filepath)
if err != nil {
return Dataset{}, err
Expand All @@ -88,7 +91,7 @@ func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) {
return Dataset{}, err
}

return Parse(f, info.Size(), frameChan)
return Parse(f, info.Size(), frameChan, opts...)
}

// Parser is a struct that allows a user to parse Elements from a DICOM element-by-element using Next(), which may be
Expand Down Expand Up @@ -131,7 +134,7 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame,
debug.Log("NewParser: readHeader complete")
}

p.dataset = Dataset{Elements: elems}
p.dataset = Dataset{Elements: elems, opts: optSet}
// TODO(suyashkumar): avoid storing the metadata pointers twice (though not that expensive)
p.metadata = Dataset{Elements: elems}

Expand Down Expand Up @@ -258,16 +261,24 @@ type ParseOption func(*parseOptSet)
// parseOptSet represents the flattened option set after all ParseOptions have been applied.
type parseOptSet struct {
skipMetadataReadOnNewParserInit bool
allowMismatchPixelDataLength bool
}

func toParseOptSet(opts ...ParseOption) *parseOptSet {
optSet := &parseOptSet{}
func toParseOptSet(opts ...ParseOption) parseOptSet {
optSet := parseOptSet{}
for _, opt := range opts {
opt(optSet)
opt(&optSet)
}
return optSet
}

// AllowMismatchPixelDataLength allows parser to ignore an error when the length calculated from elements do not match with value length.
func AllowMismatchPixelDataLength() ParseOption {
return func(set *parseOptSet) {
set.allowMismatchPixelDataLength = true
}
}

// SkipMetadataReadOnNewParserInit makes NewParser skip trying to parse metadata. This will make the Parser default to implicit little endian byte order.
// Any metatata tags found in the dataset will still be available when parsing.
func SkipMetadataReadOnNewParserInit() ParseOption {
Expand Down
103 changes: 72 additions & 31 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ func readValue(r dicomio.Reader, t tag.Tag, vr string, vl uint32, isImplicit boo
case tag.VRUInt16List, tag.VRUInt32List, tag.VRInt16List, tag.VRInt32List, tag.VRTagList:
return readInt(r, t, vr, vl)
case tag.VRSequence:
return readSequence(r, t, vr, vl)
return readSequence(r, t, vr, vl, d)
case tag.VRItem:
return readSequenceItem(r, t, vr, vl)
return readSequenceItem(r, t, vr, vl, d)
case tag.VRPixelData:
return readPixelData(r, t, vr, vl, d, fc)
case tag.VRFloat32List, tag.VRFloat64List:
Expand Down Expand Up @@ -221,14 +221,33 @@ func fillBufferSingleBitAllocated(pixelData []int, d dicomio.Reader, bo binary.B
return nil
}

// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information
// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc)
func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo,
bytesRead int, err error) {
func makeErrorPixelData(reader io.Reader, vl uint32, fc chan<- *frame.Frame, parseErr error) (*PixelDataInfo, error) {
data := make([]byte, vl)
_, err := io.ReadFull(reader, data)
if err != nil {
return nil, fmt.Errorf("makeErrorPixelData: read pixelData: %w", err)
}

f := frame.Frame{
EncapsulatedData: frame.EncapsulatedFrame{
Data: data,
},
}

if fc != nil {
fc <- &f
}
image := PixelDataInfo{
IsEncapsulated: false,
ParseErr: parseErr,
Frames: []frame.Frame{f},
}
return &image, nil
}

// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information
// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc)
func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo,
bytesToRead int, err error) {
// Parse information from previously parsed attributes that are needed to parse NativeData Frames:
rows, err := parsedData.FindElementByTag(tag.Rows)
if err != nil {
Expand Down Expand Up @@ -269,10 +288,38 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr

debug.Logf("readNativeFrames:\nRows: %d\nCols:%d\nFrames::%d\nBitsAlloc:%d\nSamplesPerPixel:%d", MustGetInts(rows.Value)[0], MustGetInts(cols.Value)[0], nFrames, bitsAllocated, samplesPerPixel)

bytesAllocated := bitsAllocated / 8
bytesToRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames
if bitsAllocated == 1 {
bytesToRead = pixelsPerFrame * samplesPerPixel / 8 * nFrames
}

skipFinalPaddingByte := false
if uint32(bytesToRead) != vl {
switch {
case uint32(bytesToRead) == vl-1 && vl%2 == 0:
skipFinalPaddingByte = true
case uint32(bytesToRead) == vl-1 && vl%2 != 0:
return nil, 0, fmt.Errorf("vl=%d: %w", vl, ErrorExpectedEvenLength)
default:
// calculated bytesToRead and actual VL mismatch
if !parsedData.opts.allowMismatchPixelDataLength {
return nil, 0, fmt.Errorf("expected_vl=%d actual_vl=%d %w", bytesToRead, vl, ErrorMismatchPixelDataLength)
}
image, err := makeErrorPixelData(d, vl, fc, ErrorMismatchPixelDataLength)
if err != nil {
return nil, 0, err
}
return image, int(vl), nil
}
}

// Parse the pixels:
image := PixelDataInfo{
IsEncapsulated: false,
}
image.Frames = make([]frame.Frame, nFrames)
bo := d.ByteOrder()
bytesAllocated := bitsAllocated / 8
pixelBuf := make([]byte, bytesAllocated)
for frameIdx := 0; frameIdx < nFrames; frameIdx++ {
// Init current frame
Expand All @@ -282,36 +329,35 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr
BitsPerSample: bitsAllocated,
Rows: MustGetInts(rows.Value)[0],
Cols: MustGetInts(cols.Value)[0],
Data: make([][]int, int(pixelsPerFrame)),
Data: make([][]int, pixelsPerFrame),
},
}
buf := make([]int, int(pixelsPerFrame)*samplesPerPixel)
buf := make([]int, pixelsPerFrame*samplesPerPixel)
if bitsAllocated == 1 {
if err := fillBufferSingleBitAllocated(buf, d, bo); err != nil {
return nil, bytesRead, err
return nil, bytesToRead, err
}
for pixel := 0; pixel < int(pixelsPerFrame); pixel++ {
for pixel := 0; pixel < pixelsPerFrame; pixel++ {
for value := 0; value < samplesPerPixel; value++ {
currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel]
}
}
} else {
for pixel := 0; pixel < int(pixelsPerFrame); pixel++ {
for pixel := 0; pixel < pixelsPerFrame; pixel++ {
for value := 0; value < samplesPerPixel; value++ {
_, err := io.ReadFull(d, pixelBuf)
if err != nil {
return nil, bytesRead,
return nil, bytesToRead,
fmt.Errorf("could not read uint%d from input: %w", bitsAllocated, err)
}

if bitsAllocated == 8 {
buf[(pixel*samplesPerPixel)+value] = int(pixelBuf[0])
} else if bitsAllocated == 16 {
buf[(pixel*samplesPerPixel)+value] = int(bo.Uint16(pixelBuf))
} else if bitsAllocated == 32 {
buf[(pixel*samplesPerPixel)+value] = int(bo.Uint32(pixelBuf))
} else {
return nil, bytesRead, fmt.Errorf("unsupported BitsAllocated value of: %d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated)
return nil, bytesToRead, fmt.Errorf("bitsAllocated=%d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated)
}
}
currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel]
Expand All @@ -322,31 +368,26 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr
fc <- &currentFrame // write the current frame to the frame channel
}
}

bytesRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames
if vl > 0 && uint32(bytesRead) == vl-1 {
if vl%2 != 0 {
// this error should never happen if the file conforms to the DICOM spec
return nil, bytesRead, fmt.Errorf("odd number of bytes specified for PixelData violates DICOM spec: %d : %w", vl, ErrorExpectedEvenLength)
}
if skipFinalPaddingByte {
err := d.Skip(1)
if err != nil {
return nil, bytesRead, fmt.Errorf("could not read padding byte: %w", err)
return nil, bytesToRead, fmt.Errorf("could not read padding byte: %w", err)
}
bytesRead++
bytesToRead++
}
return &image, bytesRead, nil
return &image, bytesToRead, nil
}

// readSequence reads a sequence element (VR = SQ) that contains a subset of Items. Each item contains
// a set of Elements.
// See http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.2.html#table_7.5-1
func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) {
func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) {
var sequences sequencesValue

seqElements := &Dataset{opts: d.opts}
if vl == tag.VLUndefinedLength {
for {
subElement, err := readElement(r, nil, nil)
subElement, err := readElement(r, seqElements, nil)
if err != nil {
// Stop reading due to error
log.Println("error reading subitem, ", err)
Expand All @@ -373,7 +414,7 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err
return nil, err
}
for !r.IsLimitExhausted() {
subElement, err := readElement(r, nil, nil)
subElement, err := readElement(r, seqElements, nil)
if err != nil {
// TODO: option to ignore errors parsing subelements?
return nil, err
Expand All @@ -390,12 +431,12 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err

// readSequenceItem reads an item component of a sequence dicom element and returns an Element
// with a SequenceItem value.
func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) {
func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) {
var sequenceItem SequenceItemValue

// seqElements holds items read so far.
// TODO: deduplicate with sequenceItem above
var seqElements Dataset
seqElements := Dataset{opts: d.opts}

if vl == tag.VLUndefinedLength {
for {
Expand Down
Loading

0 comments on commit 58fd583

Please sign in to comment.