import hashlib
import tempfile
import zipfile
from django.conf import settings
from django.utils.module_loading import import_string
import explorer.exporters
import pandas as pd
from uploader.models import SpectralData
[docs]class ZipSpectralDataMixin:
""" A custom mixin for explorer.exporters.BaseExporter used to collect SpectralData.data files and zip them with
query output data for download.
"""
@property
def content_type(self):
if self.is_zip:
return "application/zip"
else:
return self.__class__.content_type
[docs] def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.is_zip = False # Used to dynamically correct filename and content_type. Mutated only by get_file_output.
def get_filename(self, *args, **kwargs):
filename = super().get_filename(*args, **kwargs)
if self.is_zip:
filename += ".zip"
return filename
def get_output(self, **kwargs):
value = self.get_file_output(**kwargs)
if hasattr(value, "getvalue"):
value = value.getvalue()
return value
def get_file_output(self,
include_data_files=None,
return_info=False,
always_zip=False,
compression_type=None,
compression_level=None,
**kwargs):
self.is_zip = False # NOTE: This doesn't need resetting anywhere else.
if include_data_files is None:
include_data_files = settings.EXPLORER_DATA_EXPORTERS_INCLUDE_DATA_FILES
if compression_type is None:
compression_type = import_string(settings.ZIP_COMPRESSION)
if compression_level is None:
compression_level = settings.ZIP_COMPRESSION_LEVEL
# NOTE: The following two lines are the entire contents of explorer.exporters.BaseExporter.get_file_output.
res = self.query.execute_query_only()
output = self._get_output(res, **kwargs)
n_rows = len(res.data)
spectral_data_filenames = None
# Compute data checksum
output.seek(0)
# For .xlsx output is already a bytes object so doesn't need encoding.
_output = output.read()
if hasattr(_output, "encode"):
_output = _output.encode()
data_sha256 = hashlib.sha256(_output).hexdigest()
data_files = []
if include_data_files:
storage = SpectralData.data.field.storage
# Collect SpectralData files and zip along with query data from self._get_output().
if settings.EXPLORER_DATA_EXPORTERS_ALLOW_DATA_FILE_ALIAS:
# Spectral data files are modeled by the Spectraldata.data field, however, the sql query could have
# aliased these so it wouldn't be safe to search by column name. Instead, we can only exhaustively
# search all entries for some marker indicating that they are spectral data files, where this "marker"
# is the upload directory - SpectralData.data.field.upload_to.
upload_dir = SpectralData.data.field.upload_to # NOTE: We don't need to inc. the MEDIA_ROOT for this.
for row in res.data:
for item in row:
if isinstance(item, str) and item.startswith(upload_dir):
data_files.append(item)
else:
if (col_name := SpectralData.data.field.name) in res.header_strings:
df = pd.DataFrame(res.data, columns=res.header_strings)
df = df[col_name]
# There could be multiple "col_name" (aka "data") columns so flatten first.
data_files = df.to_numpy().flatten().tolist()
if data_files or always_zip:
# Dedupe and sort.
data_files = sorted(set(data_files))
spectral_data_filenames = data_files
# Zip everything together.
temp = tempfile.TemporaryFile()
with zipfile.ZipFile(temp,
mode="w",
compression=compression_type,
compresslevel=compression_level) as archive:
# Add query results to zipfile.
archive.writestr(self.get_filename(), output.getvalue())
# Add all data files to zipfile.
for filename in data_files:
try:
archive.write(storage.path(filename), arcname=filename)
except NotImplementedError:
# storage.path() will raise NotImplementedError for remote storages like S3. In this
# scenario, open and read all the file contents to zip.
with storage.open(filename) as fp:
data = fp.read()
archive.writestr(filename, data)
temp.seek(0)
output = temp
self.is_zip = True
return (output, (n_rows, data_sha256, spectral_data_filenames)) if return_info else output
[docs]class CSVExporter(ZipSpectralDataMixin, explorer.exporters.CSVExporter):
...
[docs]class ExcelExporter(ZipSpectralDataMixin, explorer.exporters.ExcelExporter):
...
[docs]class JSONExporter(ZipSpectralDataMixin, explorer.exporters.JSONExporter):
...