Release v0.5.1

crs4 · Aug 26, 2020 · cb667af · cb667af
2 parents 85b9d18 + 0bc6756
commit cb667af
Show file tree

Hide file tree

Showing 7 changed files with 523 additions and 47 deletions.
diff --git a/promort/VERSION b/promort/VERSION
@@ -1 +1 @@
-0.5.0
+0.5.1
diff --git a/promort/reviews_manager/management/commands/build_rois_reviews_worklist.py b/promort/reviews_manager/management/commands/build_rois_reviews_worklist.py
@@ -24,7 +24,7 @@
 from slides_manager.models import Case
 from reviews_manager.models import ROIsAnnotation, ROIsAnnotationStep
 
-from csv import DictReader
+from csv import DictReader, DictWriter
 from uuid import uuid4
 import logging
 
@@ -39,6 +39,8 @@ def add_arguments(self, parser):
                             help='a CSV file containing the worklist, if not present reviews will be assigned randomly')
         parser.add_argument('--allow-duplicated', action='store_true', dest='allow_duplicated',
                             help='create worklist even for cases and slides that already have a related review')
+        parser.add_argument('--report-file', dest='report_file', type=str, default=None,
+                            help='a CSV file containing a report of the created ROIs annotation steps')
 
     def _get_rois_manager_users(self):
         rois_manager_group = Group.objects.get(name=DEFAULT_GROUPS['rois_manager']['name'])
@@ -85,32 +87,40 @@ def _create_rois_annotation_step(self, rois_annotation_obj, slide_obj):
         )
         try:
             annotation_step_obj.save()
+            created = True
             logger.info('Saved new ROIs Annotation Step with label %s', annotation_step_obj.label)
         except IntegrityError:
             annotation_step_obj = ROIsAnnotationStep.objects.get(rois_annotation=rois_annotation_obj, slide=slide_obj)
+            created = False
             logger.info('There is already a ROIs Annotation Step object (label %s)', annotation_step_obj.label)
-        return annotation_step_obj
+        return annotation_step_obj, created
 
     def _create_case_annotation(self, case, reviewer, allow_duplicated):
         if allow_duplicated:
             annotation_objs = [self._create_rois_annotation(case, reviewer)]
         else:
             annotation_objs = self._get_or_create_rois_annotation(case, reviewer)
+        case_annotation_report = list()
         for slide in case.slides.all():
             logger.info('Processing slide %s', slide.id)
             for annotation_obj in annotation_objs:
                 logger.info('Creating steps for ROIs Annotation %s', annotation_obj.label)
-                self._create_rois_annotation_step(annotation_obj, slide)
+                step_obj, created = self._create_rois_annotation_step(annotation_obj, slide)
+                case_annotation_report.append({'slide_id': slide.id, 'reviewer': reviewer,
+                                               'step_label': step_obj.label, 'created': created})
+        return case_annotation_report
 
-    def create_random_worklist(self, allow_duplicated):
+    def create_random_worklist(self, allow_duplicated, report_file=None):
         logger.info('Creating RANDOM worklist')
         rois_managers = self._get_rois_manager_users()
         cases = self._get_cases_list()
         for i, case in enumerate(cases):
             logger.info('Processing case %s', case.id)
-            self._create_case_annotation(case, rois_managers[i % len(rois_managers)], allow_duplicated)
+            case_report = self._create_case_annotation(case, rois_managers[i % len(rois_managers)], allow_duplicated)
+            if report_file:
+                report_file.writerows(case_report)
 
-    def create_worklist_from_file(self, worklist_file, allow_duplicated):
+    def create_worklist_from_file(self, worklist_file, allow_duplicated, report_file=None):
         with open(worklist_file) as f:
             reader = DictReader(f)
             cases_map = self._get_cases_map()
@@ -123,15 +133,25 @@ def create_worklist_from_file(self, worklist_file, allow_duplicated):
                 if row['reviewer'] not in reviewers_map:
                     logger.error('There is no reviewer with username %s', row['reviewer'])
                     continue
-                self._create_case_annotation(cases_map[row['case_id']], reviewers_map[row['reviewer']],
-                                             allow_duplicated)
+                case_report = self._create_case_annotation(cases_map[row['case_id']], reviewers_map[row['reviewer']],
+                                                           allow_duplicated)
+                if report_file:
+                    report_file.writerows(case_report)
 
     def handle(self, *args, **opts):
         logger.info('=== Starting ROIs worklist creation ===')
         worklist_file = opts['worklist']
         allow_duplicated = opts['allow_duplicated']
+        if opts['report_file']:
+            report_file = open(opts['report_file'], 'w')
+            report_writer = DictWriter(report_file, ['slide_id', 'reviewer', 'step_label', 'created'])
+            report_writer.writeheader()
+        else:
+            report_writer = None
         if worklist_file:
-            self.create_worklist_from_file(worklist_file, allow_duplicated)
+            self.create_worklist_from_file(worklist_file, allow_duplicated, report_writer)
         else:
-            self.create_random_worklist(allow_duplicated)
+            self.create_random_worklist(allow_duplicated, report_writer)
+        if report_writer:
+            report_file.close()
         logger.info('=== ROIs worklist creation completed ===')
diff --git a/promort/rois_manager/management/commands/dump_rois.py b/promort/rois_manager/management/commands/dump_rois.py
@@ -0,0 +1,144 @@
+#  Copyright (c) 2020, CRS4
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy of
+#  this software and associated documentation files (the "Software"), to deal in
+#  the Software without restriction, including without limitation the rights to
+#  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+#  the Software, and to permit persons to whom the Software is furnished to do so,
+#  subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in all
+#  copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+#  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+#  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+#  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+#  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from django.core.management.base import BaseCommand, CommandError
+from rois_manager.models import Slice, Core, FocusRegion
+from rois_manager.serializers import SliceSerializer, CoreSerializer, FocusRegionSerializer
+
+import csv, os, copy
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
+import logging
+
+logger = logging.getLogger('promort_commands')
+
+
+class Command(BaseCommand):
+    help = """
+    """
+
+    def add_arguments(self, parser):
+        parser.add_argument('--rois-list', dest='rois_list', type=str, required=True,
+                            help='A file containing the list of ROIs that will be extracted')
+        parser.add_argument('--out-folder', dest='out_folder', type=str, required=True,
+                            help='The output folder for the extracted data')
+
+    def _load_rois_map(self, rois_file):
+        logger.info('Loading data from CSV file')
+        with open(rois_file) as f:
+            rois_map = dict()
+            reader = csv.DictReader(f)
+            for row in reader:
+                rois_map.setdefault(row['slide_id'], dict()).setdefault(row['roi_type'], set()).add(int(row['roi_id']))
+        return rois_map
+
+    def _get_related(self, rois):
+        related_rois = copy.copy(rois)
+        related_rois.setdefault('slice', set())
+        related_rois.setdefault('core', set())
+        related_rois.setdefault('focus_region', set())
+        # step 1: process slices
+        logger.info('Processing %d slices', len(related_rois['slice']))
+        for s in related_rois['slice']:
+            s_obj = Slice.objects.get(pk=s)
+            # get cores related to given slice
+            for c_obj in s_obj.cores.all():
+                related_rois['core'].add(c_obj.id)
+        # step 2: process focus regions
+        logger.info('Processing %d focus regions', len(related_rois['focus_region']))
+        for fr in related_rois['focus_region']:
+            fr_obj = FocusRegion.objects.get(pk=fr)
+            # get core related to given focus region
+            related_rois['core'].add(fr_obj.core.id)
+        # step 3: process cores
+        logger.info('Processing %d cores', len(related_rois['core']))
+        for c in related_rois['core']:
+            c_obj = Core.objects.get(pk=c)
+            # get slice related to given core
+            related_rois['slice'].add(c_obj.slice.id)
+            # get focus regions related to given core
+            for fr_obj in c_obj.focus_regions.all():
+                related_rois['focus_region'].add(fr_obj.id)
+        logger.info('Retrived %d slices, %d cores, %d focus regions',
+                    len(related_rois['slice']), len(related_rois['core']),
+                    len(related_rois['focus_region']))
+        return related_rois
+
+    def _dump_slide_rois(self, slide_id, rois, output_folder):
+        logger.info('Dumping ROIs for slide %s', slide_id)
+        rois = self._get_related(rois)
+        labels_map = {
+            'slice': dict(),
+            'core': dict()
+        }
+        to_be_saved = {
+            'slice': list(),
+            'core': list(),
+            'focus_region': list()
+        }
+        for s in rois['slice']:
+            ser_obj = SliceSerializer(Slice.objects.get(pk=s)).data
+            labels_map['slice'][ser_obj.get('id')] = ser_obj['label']
+            slice_obj = {
+                'label': ser_obj['label'],
+                'roi_json': ser_obj['roi_json'],
+                'total_cores': ser_obj['total_cores']
+            }
+            to_be_saved['slice'].append(slice_obj)
+        for c in rois['core']:
+            ser_obj = CoreSerializer(Core.objects.get(pk=c)).data
+            labels_map['core'][ser_obj.get('id')] = ser_obj['label']
+            core_obj = {
+                'label': ser_obj['label'],
+                'slice': labels_map['slice'].get(ser_obj['slice']),
+                'roi_json': ser_obj['roi_json'],
+                'length': ser_obj['length'],
+                'area': ser_obj['area'],
+                'tumor_length': ser_obj['tumor_length']
+            }
+            to_be_saved['core'].append(core_obj)
+        for fr in rois['focus_region']:
+            ser_obj = FocusRegionSerializer(FocusRegion.objects.get(pk=fr)).data
+            focus_region_obj = {
+                'label': ser_obj['label'],
+                'core': labels_map['core'].get(ser_obj['core']),
+                'roi_json': ser_obj['roi_json'],
+                'length': ser_obj['length'],
+                'area': ser_obj['area'],
+                'tissue_status': ser_obj['tissue_status']
+            }
+            to_be_saved['focus_region'].append(focus_region_obj)
+        with open(os.path.join(output_folder, '%s.json' % slide_id), 'w') as out_file:
+            json.dump(to_be_saved, out_file)
+
+    def _dump_rois(self, rois_map, output_folder):
+        logger.debug('Checking if folder %s exists' % output_folder)
+        if not os.path.isdir(output_folder):
+            raise CommandError('Output folder %s does not exist, exit' % output_folder)
+        for slide, rois in rois_map.iteritems():
+            self._dump_slide_rois(slide, rois, output_folder)
+
+    def handle(self, *args, **opts):
+        logger.info('== Starting job ==')
+        rois = self._load_rois_map(opts['rois_list'])
+        self._dump_rois(rois, opts['out_folder'])
+        logger.info('== Job completed ==')
diff --git a/promort/rois_manager/management/commands/extract_cores.py b/promort/rois_manager/management/commands/extract_cores.py
@@ -0,0 +1,147 @@
+#  Copyright (c) 2019, CRS4
+#
+#  Permission is hereby granted, free of charge, to any person obtaining a copy of
+#  this software and associated documentation files (the "Software"), to deal in
+#  the Software without restriction, including without limitation the rights to
+#  use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+#  the Software, and to permit persons to whom the Software is furnished to do so,
+#  subject to the following conditions:
+#
+#  The above copyright notice and this permission notice shall be included in all
+#  copies or substantial portions of the Software.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+#  FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+#  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+#  IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+#  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+from django.core.management.base import BaseCommand, CommandError
+from reviews_manager.models import ROIsAnnotationStep
+from promort.settings import OME_SEADRAGON_BASE_URL
+
+from csv import DictWriter
+try:
+    import simplejson as json
+except ImportError:
+    import json
+
+import logging, sys, os, requests
+from urlparse import urljoin
+from shapely.geometry import Polygon
+
+logger = logging.getLogger('promort_commands')
+
+
+class Command(BaseCommand):
+    help = """
+    Extract focus regions as JSON objects
+    """
+
+    def add_arguments(self, parser):
+        parser.add_argument('--output_folder', dest='out_folder', type=str, required=True,
+                            help='path of the output folder for the extracted JSON objects')
+        parser.add_argument('--exclude_empty_cores', dest='exclude_empty', action='store_true',
+                            help='exclude cores with 0 focus regions')
+        parser.add_argument('--exclude_rejected', dest='exclude_rejected', action='store_true',
+                            help='exclude cores from review steps rejected by the user')
+        parser.add_argument('--limit-bounds', dest='limit_bounds', action='store_true',
+                            help='extract ROIs considering only the non-empty slide region')
+
+    def _load_rois_annotation_steps(self, exclude_rejected):
+        steps = ROIsAnnotationStep.objects.filter(completion_date__isnull=False)
+        if exclude_rejected:
+            steps = [s for s in steps if s.slide_evaluation.adequate_slide]
+        return steps
+
+    def _get_slide_bounds(self, slide):
+        if slide.image_type == 'OMERO_IMG':
+            url = urljoin(OME_SEADRAGON_BASE_URL, 'deepzoom/slide_bounds/%d.dzi' % slide.omero_id)
+        elif slide.image_type == 'MIRAX':
+            url = urljoin(OME_SEADRAGON_BASE_URL, 'mirax/deepzoom/slide_bounds/%s.dzi' % slide.id)
+        else:
+            logger.error('Unknown image type %s for slide %s', slide.image_type, slide.id)
+            return None
+        response = requests.get(url)
+        if response.status_code == requests.codes.OK:
+            return response.json()
+        else:
+            logger.error('Error while loading slide bounds %s', slide.id)
+            return None
+
+    def _extract_points(self, roi_json, slide_bounds):
+        points = list()
+        shape = json.loads(roi_json)
+        segments = shape['segments']
+        for x in segments:
+            points.append(
+                (
+                    x['point']['x'] + int(slide_bounds['bounds_x']),
+                    x['point']['y'] + int(slide_bounds['bounds_y'])
+                )
+            )
+        return points
+
+    def _extract_bounding_box(self, roi_points):
+        polygon = Polygon(roi_points)
+        bounds = polygon.bounds
+        return [(bounds[0], bounds[1]), (bounds[2], bounds[3])]
+
+    def _dump_core(self, core, slide_id, slide_bounds, out_folder):
+        file_path = os.path.join(out_folder, 'c_%d.json' % core.id)
+        points = self._extract_points(core.roi_json, slide_bounds)
+        bbox = self._extract_bounding_box(points)
+        with open(file_path, 'w') as ofile:
+            json.dump(points, ofile)
+        return {
+            'slide_id': slide_id,
+            'core_id': core.id,
+            'core_label': core.label,
+            'file_name': 'c_%d.json' % core.id,
+            'bbox': bbox,
+            'focus_regions_count': core.focus_regions.count()
+        }
+
+    def _dump_details(self, details, out_folder):
+        with open(os.path.join(out_folder, 'cores.csv'), 'w') as ofile:
+            writer = DictWriter(ofile, ['slide_id', 'core_id', 'core_label', 'focus_regions_count',
+                                        'bbox', 'file_name'])
+            writer.writeheader()
+            writer.writerows(details)
+
+    def _dump_cores(self, step, out_folder, exclude_empty, limit_bounds):
+        cores = step.cores
+        if exclude_empty:
+            cores = [c for c in cores if c.focus_regions.count() > 0]
+        slide = step.slide
+        logger.info('Loading info for slide %s', slide.id)
+        if not limit_bounds:
+            slide_bounds = self._get_slide_bounds(slide)
+        else:
+            slide_bounds = {'bounds_x': 0, 'bounds_y': 0}
+        if slide_bounds:
+            logger.info('Dumping %d cores for step %s', len(cores), step.label)
+            if len(cores) > 0:
+                out_path = os.path.join(out_folder, step.slide.id, step.label)
+                try:
+                    os.makedirs(out_path)
+                except OSError:
+                    pass
+                cores_details = list()
+                for c in cores:
+                    cores_details.append(
+                        self._dump_core(c, step.slide.id, slide_bounds, out_path)
+                    )
+                self._dump_details(cores_details, out_path)
+
+    def _export_data(self, out_folder, exclude_empty=False, exclude_rejected=False, limit_bounds=False):
+        steps = self._load_rois_annotation_steps(exclude_rejected)
+        logger.info('Loaded %d ROIs Annotation Steps', len(steps))
+        for s in steps:
+            self._dump_cores(s, out_folder, exclude_empty, limit_bounds)
+
+    def handle(self, *args, **opts):
+        logger.info('=== Starting export job ===')
+        self._export_data(opts['out_folder'], opts['exclude_empty'], opts['exclude_rejected'], opts['limit_bounds'])
+        logger.info('=== Export completed ===')