Merge pull request #682 from xchem/m2ms-1527-RHS-download

RHS data download (issue 1527, partial)
xchem · Oct 16, 2024 · 72826d1 · 72826d1
2 parents ca913cf + fb568e4
commit 72826d1
Show file tree

Hide file tree

Showing 5 changed files with 120 additions and 5 deletions.
diff --git a/api/urls.py b/api/urls.py
@@ -124,6 +124,11 @@
 router.register(
     "metadata_upload", viewer_views.UploadMetadataView, basename='metadata_upload'
 )
+router.register(
+    "computedset_download",
+    viewer_views.DownloadComputedSetView,
+    basename='computedset_download',
+)
 
 # Squonk Jobs
 router.register(

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
@@ -168,8 +168,13 @@ def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None:
 
         new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field)
         old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp)
-        old_filename.rename(new_filename)
-        os.chmod(new_filename, 0o755)
+
+        # there may be a case where 2 or more molfiles reference the
+        # same pdb. in this case, the old pdb is already renamed to
+        # new.
+        if old_filename.exists() and not new_filename.exists():
+            old_filename.rename(new_filename)
+            os.chmod(new_filename, 0o755)
 
         return str(pdb_field)
 
@@ -419,8 +424,8 @@ def set_mol(
             if groups is None or len(groups.groups()) != 3:
                 # just a quick sanity check
                 raise ValueError(f'Non-standard ComputedMolecule.name: {latest.name}')
-            number = groups.groups[1]  # type: ignore [index]
-            suffix = next(alphanumerator(start_from=groups.groups[2]))  # type: ignore [index]
+            number = groups.groups()[1]  # type: ignore [index]
+            suffix = next(alphanumerator(start_from=groups.groups()[2]))  # type: ignore [index]
         else:
             suffix = 'a'
             number = 1
@@ -675,6 +680,9 @@ def task(self) -> ComputedSet:
         # Process the molecules
         logger.info('%s mols_to_process=%s', computed_set, len(mols_to_process))
         for i in range(len(mols_to_process)):
+            logger.debug(
+                'processing mol %s: %s', i, mols_to_process[i].GetProp('_Name')
+            )
             _ = self.process_mol(
                 mols_to_process[i],
                 self.target_id,

diff --git a/viewer/models.py b/viewer/models.py
@@ -1078,6 +1078,20 @@ def __repr__(self) -> str:
             self.site_observation_code,
         )
 
+    def get_filename(self):
+        # strip the original filename from the auto-assigned name
+        # filename is stored in field like:
+        # computed_set_data/A0486a#c2b8d13c94bb40bb9bf9d244b05516d3.pdb_2e2245e16cca4961919c7f5fdd1d0ece
+        if self.pdb:
+            return Path(self.pdb_info.name).name
+        else:
+            fname = Path(self.pdb_info.name).name
+            if fname.find('#') > 0:
+                name = fname.split('#')[0]
+                return f'{name}.pdb'
+
+            return fname
+
 
 class ComputedSetComputedMolecule(models.Model):
     computed_set = models.ForeignKey(ComputedSet, null=False, on_delete=models.CASCADE)

diff --git a/viewer/serializers.py b/viewer/serializers.py
@@ -635,6 +635,13 @@ class Meta:
         fields = '__all__'
 
 
+class ComputedSetDownloadSerializer(serializers.ModelSerializer):
+    # validation is not called, so no reason to use it
+    class Meta:
+        model = models.ComputedSet
+        fields = ('name',)
+
+
 class ComputedMoleculeSerializer(serializers.ModelSerializer):
     # performance issue
     # inspiration_frags = MoleculeSerializer(read_only=True, many=True)

diff --git a/viewer/views.py b/viewer/views.py
@@ -3,7 +3,9 @@
 import os
 import shlex
 import shutil
+import zipfile
 from datetime import datetime
+from io import BytesIO
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from wsgiref.util import FileWrapper
@@ -2594,4 +2596,83 @@ def create(self, request, *args, **kwargs):
         if errors:
             return Response({"errors": errors}, status=status.HTTP_400_BAD_REQUEST)
         else:
-            return Response({'sucess': True}, status=status.HTTP_200_OK)
+            return Response({'success': True}, status=status.HTTP_200_OK)
+
+
+class DownloadComputedSetView(ISPyBSafeQuerySet):
+    queryset = models.ComputedSet.objects.all()
+    filter_permissions = "target__project"
+    serializer_class = serializers.ComputedSetDownloadSerializer
+    permission_class = [permissions.IsAuthenticated]
+
+    def get_view_name(self):
+        return "Computed set download"
+
+    def post(self, request, *args, **kwargs):
+        logger.info("+ DownloadComputedSetView.create called")
+        del args, kwargs
+
+        logger.debug('data: %s', request.data)
+
+        # If done like this, it's bypassing the validation..
+        computed_set_name = request.data['name']
+        try:
+            computed_set = models.ComputedSet.objects.get(name=computed_set_name)
+        except models.ComputedSet.DoesNotExist:
+            return Response(
+                {'error': f"ComputedSet '{computed_set_name}' not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+
+        if (
+            computed_set.target.project.title
+            not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user(
+                request.user, restrict_public_to_membership=False
+            )
+        ):
+            return Response(
+                {'error': "You have no access to the Project"},
+                status=status.HTTP_403_FORBIDDEN,
+            )
+
+        # so now, get the file, and get the pdbs
+        sdf_file = Path(computed_set.written_sdf_filename)
+        if not sdf_file.exists():
+            return Response(
+                {'error': f"Uploaded file '{str(sdf_file.name)}' not found"},
+                status=status.HTTP_404_NOT_FOUND,
+            )
+
+        pdbs = computed_set.computed_molecules.filter(pdb__isnull=True)
+        if pdbs.exists():
+            # custom pdbs exist, zip all together and return an archive
+            zip_buffer = BytesIO()
+            with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_DEFLATED) as ziparchive:
+                with open(sdf_file, 'rb') as contents:
+                    ziparchive.writestr(f'{sdf_file.name}.sdf', contents.read())
+                for f in pdbs:
+                    fpath = Path(settings.MEDIA_ROOT).joinpath(f.pdb_info.name)
+                    if fpath.is_file():
+                        with open(fpath, 'rb') as contents:
+                            ziparchive.writestr(f.get_filename(), contents.read())
+                    else:
+                        ziparchive.writestr(f'{f.get_filename()}_MISSING', r'')
+
+            response = HttpResponse(
+                zip_buffer.getvalue(), content_type='application/zip'
+            )
+            response['Content-Disposition'] = (
+                'attachment; filename="%s"' % f'{computed_set.name}.zip'
+            )
+            response['Content-Length'] = zip_buffer.getbuffer().nbytes
+            return response
+
+        else:
+            # no custom pdbs, return sdf
+            wrapper = FileWrapper(open(sdf_file, 'rb'))
+            response = FileResponse(wrapper, content_type='text/plain')
+            response['Content-Disposition'] = (
+                'attachment; filename="%s"' % sdf_file.name
+            )
+            response['Content-Length'] = os.path.getsize(sdf_file)
+            return response