Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow BioTek plates to be split across multiple subdirectories #223

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
144 changes: 129 additions & 15 deletions src/main/java/com/glencoesoftware/bioformats2raw/BioTekReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ public class BioTekReader extends FormatReader {
private static final String TIFF_REGEX_Z =
WELL_REGEX + "_.+\\[(.+)_" +
ALPHANUM + "\\]_(\\d+)_(\\d+)_([0-9-]+)?" + SUFFIX;
private static final String TIFF_REGEX_ROI =
"([A-Z]{1,2})(\\d{1,2})ROI(\\d+)_(\\d+)_(\\d+)_(\\d+)(Z(\\d+))?_" +
ALPHANUM + "_(-?\\d+)" + SUFFIX;
private static final String DATE_FORMAT = "MM/dd/yy HH:mm:ss";

// -- Fields --
Expand Down Expand Up @@ -211,22 +214,67 @@ protected void initFile(String id) throws FormatException, IOException {
findXPTFiles(parent);

String[] files = parent.list(true);
for (int i=0; i<files.length; i++) {
files[i] = new Location(parent, files[i]).getAbsolutePath();
}
Arrays.sort(files);

// is there only one well in the directory?
// compare the well identifiers (relative file name up to first _)
boolean sameWell = true;
int endIndex = files[0].indexOf("_");
if (endIndex > 0) {
String wellCheck = files[0].substring(0, endIndex);
LOGGER.debug("well check string = {}", wellCheck);
for (int i=0; i<files.length; i++) {
if (!files[i].startsWith(wellCheck)) {
sameWell = false;
break;
}
}
}
LOGGER.debug("single well in {}: {}", parent, sameWell);
// if only one well exists, look in other subdirectories of the parent
if (sameWell) {
Location plateDir = parent.getParentFile();
LOGGER.debug("plate directory = {}", plateDir);
String[] wellDirs = plateDir.list(true);
ArrayList<String> allFiles = new ArrayList<String>();
for (String well : wellDirs) {
Location wellDir = new Location(plateDir, well).getAbsoluteFile();
LOGGER.debug("looking in well directory = {}", wellDir);
String[] f = wellDir.list(true);
for (String file : f) {
LOGGER.debug(" adding well file {}", file);
allFiles.add(new Location(wellDir, file).getAbsolutePath());
}
}
LOGGER.debug("found files = {}", allFiles);
files = allFiles.toArray(new String[allFiles.size()]);
Arrays.sort(files);
}

Pattern regexA = Pattern.compile(TIFF_REGEX_A);
Pattern regexB = Pattern.compile(TIFF_REGEX_B);
Pattern regexZ = Pattern.compile(TIFF_REGEX_Z);
Pattern regexROI = Pattern.compile(TIFF_REGEX_ROI);
ArrayList<WellIndex> validWellRowCol = new ArrayList<WellIndex>();
int maxRow = 0;
int minRow = Integer.MAX_VALUE;
int maxCol = 0;
int minCol = Integer.MAX_VALUE;
int maxPlateAcq = 0;
Map<Integer, Integer> maxField = new HashMap<Integer, Integer>();

for (String f : files) {
int matchingROI = -1;
String matchingPath = new Location(currentId).getAbsolutePath();
LOGGER.trace("matching path = {}", matchingPath);

for (String absolutePath : files) {
String f = new Location(absolutePath).getName();
Matcher m = regexA.matcher(f);
int rowIndex = -1;
int colIndex = -1;
int fieldIndex = -1;
int roiIndex = -1;
int z = 0;
int t = 0;
String channelName = "";
Expand Down Expand Up @@ -261,28 +309,57 @@ protected void initFile(String id) throws FormatException, IOException {
t = (int) Math.max(0, Integer.parseInt(m.group(8)) - 1);
channelName += m.group(6);
}
else {
m = regexROI.matcher(f);
if (m.matches()) {
rowIndex = getWellRow(m.group(1));
colIndex = Integer.parseInt(m.group(2)) - 1;
roiIndex = Integer.parseInt(m.group(3)) - 1;

LOGGER.trace("absolutePath = {}, roiIndex = {}",
absolutePath, roiIndex);
if (matchingROI < 0 && absolutePath.equals(matchingPath)) {
matchingROI = roiIndex;
LOGGER.trace("matchingROI = {}, absolutePath = {}",
matchingROI, absolutePath);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part will just be true and match with the first ROI file, and the other ROI fields (Cytation beacons) won't match, even there are many ROIs field just the first one will appears on final file.
I could workaround this commenting this lines and assign fieldIndex as roiIndex

Suggested change
if (matchingROI < 0 && absolutePath.equals(matchingPath)) {
matchingROI = roiIndex;
LOGGER.trace("matchingROI = {}, absolutePath = {}",
matchingROI, absolutePath);
fieldIndex = roiIndex;

and removing the condition check && matchingROI == roiIndex

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comments, @wapablos. If you're testing with a particular BioTek dataset, would you be able to share just the file names so we can compare against our existing data?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've test some file sets based on default Cytation output before the code change

(Good) Set 1 (Default filename - cytation montage export) - Detect all wells, channels and fields
A3_01_1_1_DAPI_001.tif
A3_01_1_2_DAPI_001.tif
A3_01_1_3_DAPI_001.tif
A3_01_1_4_DAPI_001.tif
A3_01_2_1_GFP_001.tif
A3_01_2_2_GFP_001.tif
A3_01_2_3_GFP_001.tif
A3_01_2_4_GFP_001.tif
A3_01_3_1_CY5_001.tif
A3_01_3_2_CY5_001.tif
A3_01_3_3_CY5_001.tif
A3_01_3_4_CY5_001.tif
A11_01_1_1_DAPI_001.tif
A11_01_1_2_DAPI_001.tif
A11_01_1_3_DAPI_001.tif
A11_01_1_4_DAPI_001.tif
A11_01_2_1_GFP_001.tif
A11_01_2_2_GFP_001.tif
A11_01_2_3_GFP_001.tif
A11_01_2_4_GFP_001.tif
A11_01_3_1_CY5_001.tif
A11_01_3_2_CY5_001.tif
A11_01_3_3_CY5_001.tif
A11_01_3_4_CY5_001.tif

(Good) Set 2 (Splitted folder Image Multimode (IMM)) - Detect all folders, wells, channels and fields
230920_155559_B1 10X PL FL/B1_-1_1_1_DAPI_001.tif
230920_155559_B2 10X PL FL/B2_-1_1_1_DAPI_001.tif
230920_155559_B2 10X PL FL/B2_-1_2_1_CY5_001.tif
230920_155559_D3 10X PL FL/D3_-1_1_1_DAPI_001.tif
230920_155559_D3 10X PL FL/D3_-1_1_2_DAPI_001.tif
230920_155559_D3 10X PL FL/D3_-1_1_3_DAPI_001.tif
230920_155559_D3 10X PL FL/D3_-1_2_1_GFP_001.tif
230920_155559_D3 10X PL FL/D3_-1_2_2_GFP_001.tif
230920_155559_D3 10X PL FL/D3_-1_2_3_GFP_001.tif
230920_155559_D3 10X PL FL/D3_-1_3_1_CY5_001.tif
230920_155559_D3 10X PL FL/D3_-1_3_2_CY5_001.tif
230920_155559_D3 10X PL FL/D3_-1_3_3_CY5_001.tif
230920_161057_I3 40X PL FL/I3_-1_1_1_DAPI_001.tif
230920_161057_I3 40X PL FL/I3_-1_2_1_GFP_001.tif
230920_161057_I3 40X PL FL/I3_-1_3_1_CY5_001.tif
230920_161057_J3 60X PL FL/J3_-1_1_1_DAPI_001.tif
230920_161057_J3 60X PL FL/J3_-1_2_1_GFP_001.tif
230920_161057_J3 60X PL FL/J3_-1_3_1_CY5_001.tif
230920_161628_F3 20X PL FL/F3_-1_1_1_DAPI_001.tif
230920_161628_F3 20X PL FL/F3_-1_2_1_GFP_001.tif
230920_161628_F3 20X PL FL/F3_-1_3_1_CY5_001.tif

(Not all good) Set 3 (Default filename - cytation ROI export) - Detect all wells, channels but not fields only ROI1
B23ROI5_01_2_1_Bright Field_001.tif
B23ROI5_01_1_1_DAPI_001.tif
B23ROI4_01_2_1_Bright Field_001.tif
B23ROI4_01_1_1_DAPI_001.tif
B23ROI3_01_2_1_Bright Field_001.tif
B23ROI3_01_1_1_DAPI_001.tif
B23ROI2_01_2_1_Bright Field_001.tif
B23ROI2_01_1_1_DAPI_001.tif
B23ROI1_01_2_1_Bright Field_001.tif
B23ROI1_01_1_1_DAPI_001.tif
B22ROI5_01_2_1_Bright Field_001.tif
B22ROI5_01_1_1_DAPI_001.tif
B22ROI4_01_2_1_Bright Field_001.tif
B22ROI4_01_1_1_DAPI_001.tif
B22ROI3_01_2_1_Bright Field_001.tif
B22ROI3_01_1_1_DAPI_001.tif
B22ROI2_01_2_1_Bright Field_001.tif
B22ROI2_01_1_1_DAPI_001.tif
B22ROI1_01_2_1_Bright Field_001.tif
B22ROI1_01_1_1_DAPI_001.tif

(Bad) Set 4 (Default filename for slides) - Cannot read image files
Exception in thread "main" picocli.CommandLine$ExecutionException: Error while calling command (com.glencoesoftware bioformats2raw.Converter@2d2ffcb7): java.lang.NullPointerException
Caused by: java.lang.NullPointerException
at com.glencoesoftware.bioformats2raw.BioTekReader$BioTekWell.getFile(BioTekReader.java:902)
at com.glencoesoftware.bioformats2raw.BioTekReader.initFile(BioTekReader.java:371)
A1_01_3_6_Blue_001.tif
A1_01_3_5_Blue_001.tif
A1_01_3_4_Blue_001.tif
A1_01_3_3_Blue_001.tif
A1_01_3_2_Blue_001.tif
A1_01_3_1_Blue_001.tif
A1_01_2_6_Green_001.tif
A1_01_2_5_Green_001.tif
A1_01_2_4_Green_001.tif
A1_01_2_3_Green_001.tif
A1_01_2_2_Green_001.tif
A1_01_2_1_Green_001.tif
A1_01_1_6_Red_001.tif
A1_01_1_5_Red_001.tif
A1_01_1_4_Red_001.tif
A1_01_1_3_Red_001.tif
A1_01_1_2_Red_001.tif
A1_01_1_1_Red_001.tif

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the last 4 commits (b7d3286 through 7650721) now address all of these cases.

}

int channelIndex = Integer.parseInt(m.group(5)) - 1;
fieldIndex = Integer.parseInt(m.group(6)) - 1;
try {
z = Integer.parseInt(m.group(8));
// can have two channels with same name
// one with Z stack and one without
channelName = "Z";
}
catch (NumberFormatException e) {
}
channelName += m.group(9);
// recorded T index may be negative if no timepoints
t = (int) Math.max(0, Integer.parseInt(m.group(10)) - 1);
}
}
}

if (rowIndex >= 0 && colIndex >= 0 && fieldIndex >= 0) {
if (rowIndex >= 0 && colIndex >= 0 && fieldIndex >= 0 &&
matchingROI == roiIndex)
{
BioTekWell well = lookupWell(0, rowIndex, colIndex);
if (fieldIndex >= well.getFieldCount()) {
well.setFieldCount(fieldIndex + 1);
}
int c = well.addChannelName(fieldIndex, channelName);
well.addFile(new PlaneIndex(fieldIndex, z, c, t),
new Location(parent, f).getAbsolutePath());
well.addFile(new PlaneIndex(fieldIndex, z, c, t), absolutePath);

if (rowIndex > maxRow) {
maxRow = rowIndex;
}
if (rowIndex < minRow) {
minRow = rowIndex;
}
if (colIndex > maxCol) {
maxCol = colIndex;
}
if (colIndex < minCol) {
minCol = colIndex;
WellIndex rowColPair = new WellIndex(rowIndex, colIndex);
if (!validWellRowCol.contains(rowColPair)) {
validWellRowCol.add(rowColPair);
}
Integer maxFieldIndex = maxField.get(0);
if (maxFieldIndex == null) {
Expand All @@ -292,6 +369,7 @@ protected void initFile(String id) throws FormatException, IOException {
}
}
wells.sort(null);
validWellRowCol.sort(null);

// split brightfield channels into a separate plate acquisition
maxField.put(1, -1);
Expand All @@ -303,6 +381,8 @@ protected void initFile(String id) throws FormatException, IOException {
for (int f=0; f<w.getFieldCount(); f++) {
String[] fieldFiles = w.getFiles(f);
for (String file : fieldFiles) {
LOGGER.trace("found file {} for well index {}, field index {}",
file, well, f);
Element root = getXMLRoot(file);
boolean brightfield = isBrightField(root);

Expand Down Expand Up @@ -443,12 +523,14 @@ else if (pa == 1) {

int nextImage = 0;
int[] nextWellSample = new int[(maxRow + 1) * (maxCol + 1)];
int totalColumns = (maxCol - minCol) + 1;
for (int w=0; w<wells.size(); w++) {
BioTekWell well = wells.get(w);
int effectiveRow = well.getRowIndex() - minRow;
int effectiveColumn = well.getColumnIndex() - minCol;
int wellIndex = effectiveRow * totalColumns + effectiveColumn;
int wellIndex = validWellRowCol.indexOf(
new WellIndex(well.getRowIndex(), well.getColumnIndex()));
LOGGER.debug(
"well #{}, row = {}, col = {}, index = {}",
w, well.getRowIndex(), well.getColumnIndex(), wellIndex);

well.fillMetadataStore(store, 0, well.getPlateAcquisition(), wellIndex,
nextWellSample[wellIndex], nextImage);

Expand Down Expand Up @@ -976,4 +1058,36 @@ public Channel(String name) {
}
}

class WellIndex implements Comparable<WellIndex> {
public int row;
public int col;

public WellIndex(int r, int c) {
this.row = r;
this.col = c;
}

@Override
public int compareTo(WellIndex w) {
if (this.row != w.row) {
return this.row - w.row;
}
return this.col - w.col;
}

@Override
public boolean equals(Object o) {
if (!(o instanceof WellIndex)) {
return false;
}
return compareTo((WellIndex) o) == 0;
}

@Override
public int hashCode() {
// this would need fixing if we had more than 65535 rows or columns
return (row & 0xffff) << 16 | (col & 0xffff);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ void createPlate(String[] filenames) throws IOException {
Path testTiff = getTestFile("test.tiff");

for (String f : filenames) {
Files.copy(testTiff, input.resolve(f));
Path copyLocation = input.resolve(f);
if (!input.equals(copyLocation.getParent())) {
// supplied file path includes a subdirectory
copyLocation.getParent().toFile().mkdirs();
}
Files.copy(testTiff, copyLocation);
}

// reset the input path to the first file in the list
Expand All @@ -80,24 +85,23 @@ void createPlate(String[] filenames) throws IOException {
}

/**
* Create an artificial single well BioTek plate with the given
* list of file names.
* Create an artificial BioTek plate with the given list of file names.
* Checks that the correct well row/column and ZCT sizes are detected.
*
* This test will be run once for each Arguments object
* returned by getTestCases below.
*
* @param paths path to each file in the plate
* @param wellRow well row index (from 0)
* @param wellColumn well column index (from 0)
* @param wellRow row index for each well (from 0)
* @param wellColumn column index for each well (from 0)
* @param fields number of fields in the well
* @param sizeZ number of Z sections
* @param sizeC number of channels
* @param sizeT number of timepoints
*/
@ParameterizedTest
@MethodSource("getTestCases")
public void testBioTek(String[] paths, int wellRow, int wellColumn,
public void testBioTek(String[] paths, int[] wellRow, int[] wellColumn,
int fields, int sizeZ, int sizeC, int sizeT) throws Exception
{
// set up the artificial plate
Expand All @@ -112,29 +116,33 @@ public void testBioTek(String[] paths, int wellRow, int wellColumn,
// an exception which would fail the test
reader.setId(input.toString());

// the number of OME Images should match the expected field count
// the number of OME Images should match the expected well * field count
// this should be the same as the series count
assertEquals(metadata.getImageCount(), fields);
assertEquals(metadata.getImageCount(), fields * wellRow.length);
assertEquals(metadata.getImageCount(), reader.getSeriesCount());
// there should be exactly one plate, with exactly one well
// there should be exactly one plate
assertEquals(metadata.getPlateCount(), 1);
assertEquals(metadata.getWellCount(0), 1);
assertEquals(metadata.getWellCount(0), wellRow.length);
// the well's row and column indexes should match expectations
// this is especially important for "sparse" plates where the first
// row and/or column in the plate are missing
assertEquals(metadata.getWellRow(0, 0).getValue(), wellRow);
assertEquals(metadata.getWellColumn(0, 0).getValue(), wellColumn);
// all of the Images should be linked to the well
assertEquals(metadata.getWellSampleCount(0, 0), fields);
for (int f=0; f<fields; f++) {
// sanity check that the Images are linked to the
// well in the correct order
assertEquals(metadata.getWellSampleImageRef(0, 0, f), "Image:" + f);
// check that the number of Z sections, channels, and timepoints
// all match expectations
assertEquals(metadata.getPixelsSizeZ(f).getValue(), sizeZ);
assertEquals(metadata.getPixelsSizeC(f).getValue(), sizeC);
assertEquals(metadata.getPixelsSizeT(f).getValue(), sizeT);
for (int w=0; w<wellRow.length; w++) {
assertEquals(metadata.getWellRow(0, w).getValue(), wellRow[w]);
assertEquals(metadata.getWellColumn(0, w).getValue(), wellColumn[w]);
// all of the fields should be linked to the well
assertEquals(metadata.getWellSampleCount(0, w), fields);
for (int f=0; f<fields; f++) {
// sanity check that the Images are linked to the
// well in the correct order
int imageIndex = (fields * w) + f;
assertEquals(metadata.getWellSampleImageRef(0, w, f),
"Image:" + imageIndex);
// check that the number of Z sections, channels, and timepoints
// all match expectations
assertEquals(metadata.getPixelsSizeZ(imageIndex).getValue(), sizeZ);
assertEquals(metadata.getPixelsSizeC(imageIndex).getValue(), sizeC);
assertEquals(metadata.getPixelsSizeT(imageIndex).getValue(), sizeT);
}
}
}
}
Expand Down Expand Up @@ -175,7 +183,7 @@ static Stream<Arguments> getTestCases() {
return Stream.of(
Arguments.of(new String[] {
"A1_-1_1_1_Tsf[Phase Contrast]_001.tif"
}, 0, 0, 1, 1, 1, 1),
}, new int[] {0}, new int[] {0}, 1, 1, 1, 1),
Arguments.of(new String[] {
"A1_01_1_1_Phase Contrast_001.tif",
"A1_01_1_2_Phase Contrast_001.tif",
Expand All @@ -186,33 +194,37 @@ static Stream<Arguments> getTestCases() {
"A1_01_1_7_Phase Contrast_001.tif",
"A1_01_1_8_Phase Contrast_001.tif",
"A1_01_1_9_Phase Contrast_001.tif",
}, 0, 0, 9, 1, 1, 1),
}, new int[] {0}, new int[] {0}, 9, 1, 1, 1),
Arguments.of(new String[] {
"P24_1_Bright Field_1_001_02.tif"
}, 15, 23, 1, 1, 1, 1),
}, new int[] {15}, new int[] {23}, 1, 1, 1, 1),
Arguments.of(new String[] {
"B2_1_Bright Field_1_001_02.tif"
}, 1, 1, 1, 1, 1, 1),
}, new int[] {1}, new int[] {1}, 1, 1, 1, 1),
Arguments.of(new String[] {
"A1_1_Stitched[AandB_Phase Contrast]_1_001_-1.tif"
}, 0, 0, 1, 1, 1, 1),
}, new int[] {0}, new int[] {0}, 1, 1, 1, 1),
Arguments.of(new String[] {
"A1_1Z0_DAPI_1_001_.tif",
"A1_1Z1_DAPI_1_001_.tif",
"A1_1Z2_DAPI_1_001_.tif",
"A1_1Z3_DAPI_1_001_.tif",
"A1_1Z4_DAPI_1_001_.tif"
}, 0, 0, 1, 5, 1, 1),
}, new int[] {0}, new int[] {0}, 1, 5, 1, 1),
Arguments.of(new String[] {
"A1_-1_1_1_Stitched[Channel1 300,400]_001.tif",
"A1_-1_2_1_Stitched[Channel2 500,600]_001.tif",
"A1_-1_3_1_Stitched[Channel 3 600,650]_001.tif"
}, 0, 0, 1, 1, 3, 1),
}, new int[] {0}, new int[] {0}, 1, 1, 3, 1),
Arguments.of(new String[] {
"A1_-2_1_1_Tsf[Stitched[Channel1 300,400]]_001.tif",
"A1_-2_2_1_Tsf[Stitched[Channel2 500,600]]_001.tif",
"A1_-2_3_1_Tsf[Stitched[Channel 3 600,650]]_001.tif"
}, 0, 0, 1, 1, 3, 1)
}, new int[] {0}, new int[] {0}, 1, 1, 3, 1),
Arguments.of(new String[] {
"B2/B2_1_Bright Field_1_001_02.tif",
"D3/D3_1_Bright Field_1_001_02.tif",
}, new int[] {1, 3}, new int[] {1, 2}, 1, 1, 1, 1)
);
}

Expand Down
Loading