Skip to content

Commit

Permalink
Merge pull request #682 from xchem/m2ms-1527-RHS-download
Browse files Browse the repository at this point in the history
RHS data download (issue 1527, partial)
  • Loading branch information
kaliif authored Oct 16, 2024
2 parents ca913cf + fb568e4 commit 72826d1
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 5 deletions.
5 changes: 5 additions & 0 deletions api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@
router.register(
"metadata_upload", viewer_views.UploadMetadataView, basename='metadata_upload'
)
router.register(
"computedset_download",
viewer_views.DownloadComputedSetView,
basename='computedset_download',
)

# Squonk Jobs
router.register(
Expand Down
16 changes: 12 additions & 4 deletions viewer/cset_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,13 @@ def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None:

new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field)
old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp)
old_filename.rename(new_filename)
os.chmod(new_filename, 0o755)

# there may be a case where 2 or more molfiles reference the
# same pdb. in this case, the old pdb is already renamed to
# new.
if old_filename.exists() and not new_filename.exists():
old_filename.rename(new_filename)
os.chmod(new_filename, 0o755)

return str(pdb_field)

Expand Down Expand Up @@ -419,8 +424,8 @@ def set_mol(
if groups is None or len(groups.groups()) != 3:
# just a quick sanity check
raise ValueError(f'Non-standard ComputedMolecule.name: {latest.name}')
number = groups.groups[1] # type: ignore [index]
suffix = next(alphanumerator(start_from=groups.groups[2])) # type: ignore [index]
number = groups.groups()[1] # type: ignore [index]
suffix = next(alphanumerator(start_from=groups.groups()[2])) # type: ignore [index]
else:
suffix = 'a'
number = 1
Expand Down Expand Up @@ -675,6 +680,9 @@ def task(self) -> ComputedSet:
# Process the molecules
logger.info('%s mols_to_process=%s', computed_set, len(mols_to_process))
for i in range(len(mols_to_process)):
logger.debug(
'processing mol %s: %s', i, mols_to_process[i].GetProp('_Name')
)
_ = self.process_mol(
mols_to_process[i],
self.target_id,
Expand Down
14 changes: 14 additions & 0 deletions viewer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,20 @@ def __repr__(self) -> str:
self.site_observation_code,
)

def get_filename(self):
# strip the original filename from the auto-assigned name
# filename is stored in field like:
# computed_set_data/A0486a#c2b8d13c94bb40bb9bf9d244b05516d3.pdb_2e2245e16cca4961919c7f5fdd1d0ece
if self.pdb:
return Path(self.pdb_info.name).name
else:
fname = Path(self.pdb_info.name).name
if fname.find('#') > 0:
name = fname.split('#')[0]
return f'{name}.pdb'

return fname


class ComputedSetComputedMolecule(models.Model):
computed_set = models.ForeignKey(ComputedSet, null=False, on_delete=models.CASCADE)
Expand Down
7 changes: 7 additions & 0 deletions viewer/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,13 @@ class Meta:
fields = '__all__'


class ComputedSetDownloadSerializer(serializers.ModelSerializer):
# validation is not called, so no reason to use it
class Meta:
model = models.ComputedSet
fields = ('name',)


class ComputedMoleculeSerializer(serializers.ModelSerializer):
# performance issue
# inspiration_frags = MoleculeSerializer(read_only=True, many=True)
Expand Down
83 changes: 82 additions & 1 deletion viewer/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import os
import shlex
import shutil
import zipfile
from datetime import datetime
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, List, Optional
from wsgiref.util import FileWrapper
Expand Down Expand Up @@ -2594,4 +2596,83 @@ def create(self, request, *args, **kwargs):
if errors:
return Response({"errors": errors}, status=status.HTTP_400_BAD_REQUEST)
else:
return Response({'sucess': True}, status=status.HTTP_200_OK)
return Response({'success': True}, status=status.HTTP_200_OK)


class DownloadComputedSetView(ISPyBSafeQuerySet):
queryset = models.ComputedSet.objects.all()
filter_permissions = "target__project"
serializer_class = serializers.ComputedSetDownloadSerializer
permission_class = [permissions.IsAuthenticated]

def get_view_name(self):
return "Computed set download"

def post(self, request, *args, **kwargs):
logger.info("+ DownloadComputedSetView.create called")
del args, kwargs

logger.debug('data: %s', request.data)

# If done like this, it's bypassing the validation..
computed_set_name = request.data['name']
try:
computed_set = models.ComputedSet.objects.get(name=computed_set_name)
except models.ComputedSet.DoesNotExist:
return Response(
{'error': f"ComputedSet '{computed_set_name}' not found"},
status=status.HTTP_404_NOT_FOUND,
)

if (
computed_set.target.project.title
not in _ISPYB_SAFE_QUERY_SET.get_proposals_for_user(
request.user, restrict_public_to_membership=False
)
):
return Response(
{'error': "You have no access to the Project"},
status=status.HTTP_403_FORBIDDEN,
)

# so now, get the file, and get the pdbs
sdf_file = Path(computed_set.written_sdf_filename)
if not sdf_file.exists():
return Response(
{'error': f"Uploaded file '{str(sdf_file.name)}' not found"},
status=status.HTTP_404_NOT_FOUND,
)

pdbs = computed_set.computed_molecules.filter(pdb__isnull=True)
if pdbs.exists():
# custom pdbs exist, zip all together and return an archive
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, 'a', zipfile.ZIP_DEFLATED) as ziparchive:
with open(sdf_file, 'rb') as contents:
ziparchive.writestr(f'{sdf_file.name}.sdf', contents.read())
for f in pdbs:
fpath = Path(settings.MEDIA_ROOT).joinpath(f.pdb_info.name)
if fpath.is_file():
with open(fpath, 'rb') as contents:
ziparchive.writestr(f.get_filename(), contents.read())
else:
ziparchive.writestr(f'{f.get_filename()}_MISSING', r'')

response = HttpResponse(
zip_buffer.getvalue(), content_type='application/zip'
)
response['Content-Disposition'] = (
'attachment; filename="%s"' % f'{computed_set.name}.zip'
)
response['Content-Length'] = zip_buffer.getbuffer().nbytes
return response

else:
# no custom pdbs, return sdf
wrapper = FileWrapper(open(sdf_file, 'rb'))
response = FileResponse(wrapper, content_type='text/plain')
response['Content-Disposition'] = (
'attachment; filename="%s"' % sdf_file.name
)
response['Content-Length'] = os.path.getsize(sdf_file)
return response

0 comments on commit 72826d1

Please sign in to comment.