From 5e85be77fc4aea8615773174189dcf0c5f2fc69f Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Tue, 2 Aug 2022 23:36:07 -0400 Subject: [PATCH 01/12] being work --- hawc/apps/animal/admin.py | 9 +++---- hawc/apps/assessment/actions/__init__.py | 0 hawc/apps/assessment/actions/audit.py | 24 +++++++++++++++++++ hawc/apps/assessment/api.py | 11 ++++++++- .../templates/assessment/log_object_list.html | 2 ++ hawc/apps/assessment/views.py | 10 +++++--- hawc/apps/common/templatetags/bs4.py | 9 +++++++ .../study/templates/study/study_detail.html | 2 ++ 8 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 hawc/apps/assessment/actions/__init__.py create mode 100644 hawc/apps/assessment/actions/audit.py diff --git a/hawc/apps/animal/admin.py b/hawc/apps/animal/admin.py index 33fcf4163f..dad10cce98 100644 --- a/hawc/apps/animal/admin.py +++ b/hawc/apps/animal/admin.py @@ -1,10 +1,11 @@ from django.contrib import admin +from reversion.admin import VersionAdmin from . import models @admin.register(models.Experiment) -class ExperimentAdmin(admin.ModelAdmin): +class ExperimentAdmin(VersionAdmin, admin.ModelAdmin): raw_id_fields = ("study", "dtxsid") list_display = ( "id", @@ -24,7 +25,7 @@ class ExperimentAdmin(admin.ModelAdmin): @admin.register(models.AnimalGroup) -class AnimalGroupAdmin(admin.ModelAdmin): +class AnimalGroupAdmin(VersionAdmin, admin.ModelAdmin): list_display = ( "id", "experiment", @@ -46,7 +47,7 @@ class DoseGroupInline(admin.TabularInline): @admin.register(models.DosingRegime) -class DosingRegimeAdmin(admin.ModelAdmin): +class DosingRegimeAdmin(VersionAdmin, admin.ModelAdmin): list_display = ( "id", "dosed_animals", @@ -72,7 +73,7 @@ class EndpointGroupInline(admin.TabularInline): @admin.register(models.Endpoint) -class EndpointAdmin(admin.ModelAdmin): +class EndpointAdmin(VersionAdmin, admin.ModelAdmin): list_display = ( "id", "assessment_id", diff --git a/hawc/apps/assessment/actions/__init__.py b/hawc/apps/assessment/actions/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hawc/apps/assessment/actions/audit.py b/hawc/apps/assessment/actions/audit.py new file mode 100644 index 0000000000..569e8dfa91 --- /dev/null +++ b/hawc/apps/assessment/actions/audit.py @@ -0,0 +1,24 @@ +from enum import Enum + +import pandas as pd + +from ..models import Assessment +from ...common.helper import FlatExport +from ...common.serializers import PydanticDrfSerializer + + +class AuditType(str, Enum): + ASSESSMENT = "assessment" + + +class AssessmentAuditSerializer(PydanticDrfSerializer): + assessment: Assessment + type: AuditType + + class Config: + arbitrary_types_allowed = True + + def export(self) -> FlatExport: + df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns="a b c".split()) + export = FlatExport(df=df, filename="merp") + return export diff --git a/hawc/apps/assessment/api.py b/hawc/apps/assessment/api.py index 2b489ddb9f..08e115269a 100644 --- a/hawc/apps/assessment/api.py +++ b/hawc/apps/assessment/api.py @@ -13,6 +13,7 @@ from rest_framework.decorators import action from rest_framework.exceptions import APIException, PermissionDenied from rest_framework.pagination import PageNumberPagination +from rest_framework.request import Request from rest_framework.response import Response from hawc.services.epa import dsstox @@ -21,6 +22,7 @@ from ..common.renderers import PandasRenderers from ..common.views import create_object_log from . import models, serializers +from .actions.audit import AssessmentAuditSerializer class DisabledPagination(PageNumberPagination): @@ -297,7 +299,7 @@ def public(self, request): return Response(serializer.data) @action(detail=True) - def endpoints(self, request, pk: int = None): + def endpoints(self, request, pk: int): """ Optimized for queryset speed; some counts in get_queryset and others in the list here; depends on if a "select distinct" is @@ -454,6 +456,13 @@ def endpoints(self, request, pk: int = None): return Response({"name": instance.name, "id": instance.id, "items": items}) + @action(detail=True, url_path=r"audit/(?P[\w]+)", renderer_classes=PandasRenderers) + def audit(self, request: Request, pk: int, type: str): + instance = self.get_object() + serializer = AssessmentAuditSerializer.from_drf(data=dict(assessment=instance, type=type)) + export = serializer.export() + return Response(export) + class DatasetViewset(AssessmentViewset): model = models.Dataset diff --git a/hawc/apps/assessment/templates/assessment/log_object_list.html b/hawc/apps/assessment/templates/assessment/log_object_list.html index f4d3d103d2..b2d7ee112d 100644 --- a/hawc/apps/assessment/templates/assessment/log_object_list.html +++ b/hawc/apps/assessment/templates/assessment/log_object_list.html @@ -64,4 +64,6 @@

Logs

{% include 'assessment/_logs_note.html' %} +

Audit Logs

+{% for log in audit_logs %}{{log.id}}, {% empty %}-{% endfor %} {% endblock content %} diff --git a/hawc/apps/assessment/views.py b/hawc/apps/assessment/views.py index d738b737a9..8386fc8139 100644 --- a/hawc/apps/assessment/views.py +++ b/hawc/apps/assessment/views.py @@ -27,6 +27,7 @@ from django.views.decorators.cache import cache_page from django.views.generic import DetailView, FormView, ListView, TemplateView, View from django.views.generic.edit import CreateView +from reversion.models import Version from ...services.utils.rasterize import get_styles_svg_definition from ..common.crumbs import Breadcrumb @@ -790,9 +791,12 @@ def get_breadcrumbs(self) -> List[Breadcrumb]: def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - context["first_log"] = self.first_log - context["assessment"] = self.assessment - context["breadcrumbs"] = self.get_breadcrumbs() + context.update( + first_log=self.first_log, + assessment=self.assessment, + breadcrumbs=self.get_breadcrumbs(), + audit_logs=Version.objects.get_for_object(self.first_log.content_object), + ) return context diff --git a/hawc/apps/common/templatetags/bs4.py b/hawc/apps/common/templatetags/bs4.py index 7197ae6103..3ebb2ada8f 100644 --- a/hawc/apps/common/templatetags/bs4.py +++ b/hawc/apps/common/templatetags/bs4.py @@ -2,6 +2,8 @@ Twitter Bootstrap 4 - helper methods """ from django import template +from django.contrib.contenttypes.models import ContentType +from django.urls import reverse from django.utils.safestring import mark_safe register = template.Library() @@ -40,3 +42,10 @@ def bs4_fullrow(text: str, tr_attrs: str = "") -> str: return mark_safe( f'

{text}

' ) + + +@register.simple_tag() +def audit_url(object): + # todo move somewhere else + ct = ContentType.objects.get_for_model(object.__class__) + return reverse("assessment:log_object_list", args=(ct.pk, object.pk)) diff --git a/hawc/apps/study/templates/study/study_detail.html b/hawc/apps/study/templates/study/study_detail.html index 7bb6436d37..abceb5b485 100644 --- a/hawc/apps/study/templates/study/study_detail.html +++ b/hawc/apps/study/templates/study/study_detail.html @@ -1,6 +1,7 @@ {% extends 'assessment-rooted.html' %} {% load hastext %} +{% load bs4 %} {% block content %}

{{object}}

@@ -18,6 +19,7 @@

{{object}}

{% if obj_perms.edit_assessment %} {{ object.editable|yesno:"Lock study,Unlock study" }} + Audit log {% endif %} {% if object.editable %} From a362b0105f83a944d4cf0e96133a591000f09b42 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Thu, 18 Aug 2022 10:00:34 -0400 Subject: [PATCH 02/12] Preliminary animal --- hawc/apps/assessment/actions/audit.py | 60 +++++++++++++++++- hawc/apps/lit/models.py | 88 +++++++-------------------- 2 files changed, 82 insertions(+), 66 deletions(-) diff --git a/hawc/apps/assessment/actions/audit.py b/hawc/apps/assessment/actions/audit.py index 569e8dfa91..b168a33df7 100644 --- a/hawc/apps/assessment/actions/audit.py +++ b/hawc/apps/assessment/actions/audit.py @@ -1,14 +1,30 @@ from enum import Enum import pandas as pd +from reversion.models import Version +from django.contrib.contenttypes.models import ContentType from ..models import Assessment from ...common.helper import FlatExport from ...common.serializers import PydanticDrfSerializer +def versions_by_content_type(app_label: str, model: str, qs=None): + qs = Version.objects.all() if qs is None else qs + ct = ContentType.objects.get(app_label=app_label, model=model) + return qs.filter(content_type=ct) + + +def versions_by_related_field(related_field: str, related_values: list, qs=None): + qs = Version.objects.all() if qs is None else qs + ored_values = "|".join([str(id) for id in related_values]) + data_regex = rf"[\"\']fields[\"\']\s*:\s*{{[^}}]*?[\"\']{related_field}[\"\']\s*:\s*({ored_values})\s*," + return qs.filter(serialized_data__iregex=data_regex) + + class AuditType(str, Enum): ASSESSMENT = "assessment" + ANIMAL = "animal" class AssessmentAuditSerializer(PydanticDrfSerializer): @@ -18,7 +34,49 @@ class AssessmentAuditSerializer(PydanticDrfSerializer): class Config: arbitrary_types_allowed = True + def get_assessment_queryset(self): + return Version.objects.get_for_model(Assessment).filter(object_id=self.assessment.pk) + + def get_animal_queryset(self): + reference_qs = versions_by_content_type("lit", "reference") + reference_qs = versions_by_related_field("assessment", [self.assessment.pk], reference_qs) + # get bioassay studies associated with references + study_qs = versions_by_content_type("study", "study") + study_qs = study_qs.filter(object_id__in=set(reference_qs.values_list("object_id", flat=True))) + study_qs = versions_by_related_field("bioassay", ["true"], study_qs) + # get study experiments + experiment_qs = versions_by_content_type("animal", "experiment") + experiment_qs = versions_by_related_field( + "study", set(study_qs.values_list("object_id", flat=True)), experiment_qs + ) + # get experiment animal groups + animal_group_qs = versions_by_content_type("animal", "animalgroup") + animal_group_qs = versions_by_related_field( + "experiment", set(experiment_qs.values_list("object_id", flat=True)), animal_group_qs + ) + # get animal group endpoints + endpoint_qs = versions_by_content_type("animal", "endpoint") + endpoint_qs = versions_by_related_field( + "animal_group", set(animal_group_qs.values_list("object_id", flat=True)), endpoint_qs + ) + return experiment_qs | animal_group_qs | endpoint_qs + + def get_queryset(self): + qs = getattr(self, f"get_{self.type}_queryset")() + return qs.select_related("content_type", "revision") + def export(self) -> FlatExport: - df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns="a b c".split()) + qs = self.get_queryset() + df = pd.DataFrame( + qs.values_list( + "content_type__app_label", + "content_type__model", + "object_id", + "serialized_data", + "revision__user", + "revision__date_created", + ), + columns=["app", "model", "pk", "serialized_data", "user", "date_revised"], + ) export = FlatExport(df=df, filename="merp") return export diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py index a8910b5851..c3ceca251c 100644 --- a/hawc/apps/lit/models.py +++ b/hawc/apps/lit/models.py @@ -22,6 +22,7 @@ from django.utils.html import strip_tags from taggit.models import ItemBase from treebeard.mp_tree import MP_Node +from reversion import revisions as reversion from ...refml import topics from ...services.nih import pubmed @@ -196,8 +197,7 @@ class Search(models.Model): ) slug = models.SlugField( verbose_name="URL Name", - help_text="The URL (web address) used to describe this object " - "(no spaces or special-characters).", + help_text="The URL (web address) used to describe this object " "(no spaces or special-characters).", ) description = models.TextField( blank=True, @@ -226,9 +226,7 @@ def __str__(self): @property def is_manual_import(self): # special case- created when assessment is created - return ( - self.search_type == constants.SearchType.IMPORT and self.slug == self.MANUAL_IMPORT_SLUG - ) + return self.search_type == constants.SearchType.IMPORT and self.slug == self.MANUAL_IMPORT_SLUG def clean(self): # unique_together constraint checked above; @@ -237,19 +235,9 @@ def clean(self): errors = {} if self.pk: pk_exclusion["pk"] = self.pk - if ( - Search.objects.filter(assessment=self.assessment, title=self.title) - .exclude(**pk_exclusion) - .count() - > 0 - ): + if Search.objects.filter(assessment=self.assessment, title=self.title).exclude(**pk_exclusion).count() > 0: errors["title"] = ASSESSMENT_UNIQUE_MESSAGE - if ( - Search.objects.filter(assessment=self.assessment, slug=self.slug) - .exclude(**pk_exclusion) - .count() - > 0 - ): + if Search.objects.filter(assessment=self.assessment, slug=self.slug).exclude(**pk_exclusion).count() > 0: errors["slug"] = ASSESSMENT_UNIQUE_MESSAGE if errors: raise ValidationError(errors) @@ -264,9 +252,7 @@ def delete(self, **kwargs): # cascade delete references which no longer relate to any searches orphans = self.sole_references() if orphans.count() > 0: - logger.info( - f"Removed {orphans.count()} orphan references from assessment {self.assessment_id}" - ) + logger.info(f"Removed {orphans.count()} orphan references from assessment {self.assessment_id}") orphans.delete() super().delete(**kwargs) @@ -348,9 +334,7 @@ def create_new_references(self, results): # it, just associate the current reference with this search. added_str = [str(id) for id in results["added"]] ref_ids = ( - Reference.objects.filter( - assessment=self.assessment, identifiers__unique_id__in=added_str - ) + Reference.objects.filter(assessment=self.assessment, identifiers__unique_id__in=added_str) .exclude(searches=self) .values_list("pk", flat=True) ) @@ -384,9 +368,7 @@ def create_new_references(self, results): logger.debug(f"Completed bulk creation of {len(refs)} references") # re-query to get the objects back with PKs - refs = Reference.objects.filter(assessment=self.assessment, block_id=block_id).order_by( - "pk" - ) + refs = Reference.objects.filter(assessment=self.assessment, block_id=block_id).order_by("pk") # associate identifiers with each ref_searches = [] @@ -460,10 +442,7 @@ def sole_references(self) -> models.QuerySet: @property def date_last_run(self): - if ( - self.source == constants.ReferenceDatabase.PUBMED - and self.search_type == constants.SearchType.SEARCH - ): + if self.source == constants.ReferenceDatabase.PUBMED and self.search_type == constants.SearchType.SEARCH: try: return PubMedQuery.objects.filter(search=self).latest().query_date except Exception: @@ -505,12 +484,7 @@ def references_count(self): @property def references_tagged_count(self): - return ( - self.references.all() - .annotate(tag_count=models.Count("tags")) - .filter(tag_count__gt=0) - .count() - ) + return self.references.all().annotate(tag_count=models.Count("tags")).filter(tag_count__gt=0).count() @property def fraction_tagged(self): @@ -588,9 +562,7 @@ def create_identifiers(self): start_index = int(i * block_size) end_index = min(int(i * block_size + block_size), ids_to_add_len) logger.debug(f"Building from {start_index} to {end_index}") - fetch = pubmed.PubMedFetch( - id_list=ids_to_add[start_index:end_index], retmax=int(block_size) - ) + fetch = pubmed.PubMedFetch(id_list=ids_to_add[start_index:end_index], retmax=int(block_size)) identifiers = [] for item in fetch.get_content(): identifiers.append( @@ -622,9 +594,7 @@ def get_len(obj): class Identifiers(models.Model): objects = managers.IdentifiersManager() - unique_id = models.CharField( - max_length=256, db_index=True - ) # DOI has no limit; we make this relatively large + unique_id = models.CharField(max_length=256, db_index=True) # DOI has no limit; we make this relatively large database = models.IntegerField(choices=constants.ReferenceDatabase.choices) content = models.TextField() url = models.URLField(blank=True) @@ -778,9 +748,7 @@ def appendChildren(obj, parents): class ReferenceTags(ItemBase): objects = managers.ReferenceTagsManager() - tag = models.ForeignKey( - ReferenceFilterTag, on_delete=models.CASCADE, related_name="%(app_label)s_%(class)s_items" - ) + tag = models.ForeignKey(ReferenceFilterTag, on_delete=models.CASCADE, related_name="%(app_label)s_%(class)s_items") content_object = models.ForeignKey("Reference", on_delete=models.CASCADE) @@ -789,15 +757,11 @@ class Reference(models.Model): objects = managers.ReferenceManager() - assessment = models.ForeignKey( - "assessment.Assessment", on_delete=models.CASCADE, related_name="references" - ) + assessment = models.ForeignKey("assessment.Assessment", on_delete=models.CASCADE, related_name="references") searches = models.ManyToManyField(Search, blank=False, related_name="references") identifiers = models.ManyToManyField(Identifiers, blank=True, related_name="references") title = models.TextField(blank=True) - authors_short = models.TextField( - blank=True, help_text='Short-text for to display (eg., "Smith et al.")' - ) + authors_short = models.TextField(blank=True, help_text='Short-text for to display (eg., "Smith et al.")') authors = models.TextField( blank=True, help_text='The complete, comma separated authors list, (eg., "Smith JD, Tom JF, McFarlen PD")', @@ -808,8 +772,7 @@ class Reference(models.Model): tags = managers.ReferenceFilterTagManager(through=ReferenceTags, blank=True) full_text_url = CustomURLField( blank=True, - help_text="Link to full-text URL from journal site (may require increased " - "access privileges to view)", + help_text="Link to full-text URL from journal site (may require increased " "access privileges to view)", ) created = models.DateTimeField(auto_now_add=True) last_updated = models.DateTimeField(auto_now=True) @@ -868,9 +831,7 @@ def delete_cache(cls, assessment_id: int, delete_study_cache: bool = True): ids = list(cls.objects.filter(assessment_id=assessment_id).values_list("id", flat=True)) SerializerHelper.delete_caches(cls, ids) if delete_study_cache: - apps.get_model("study", "Study").delete_cache( - assessment_id, delete_reference_cache=False - ) + apps.get_model("study", "Study").delete_cache(assessment_id, delete_reference_cache=False) @classmethod def update_hero_metadata(cls, assessment_id: int) -> ResultBase: @@ -992,9 +953,7 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): n_doi_initial = qs_dois.count() qs_no_doi = ( - qs.only("id") - .exclude(identifiers__database=constants.ReferenceDatabase.DOI) - .prefetch_related("identifiers") + qs.only("id").exclude(identifiers__database=constants.ReferenceDatabase.DOI).prefetch_related("identifiers") ) new_doi_relations = [] @@ -1016,9 +975,7 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): doi_creates = [] for doi, _ in new_doi_relations: if doi not in existing_dois: - doi_creates.append( - Identifiers(database=constants.ReferenceDatabase.DOI, unique_id=doi) - ) + doi_creates.append(Identifiers(database=constants.ReferenceDatabase.DOI, unique_id=doi)) existing_dois[doi] = -1 # set temporary value until after bulk_create created = Identifiers.objects.bulk_create(doi_creates) @@ -1038,6 +995,7 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): logger.write( f"{n_doi_initial:8} -> {n_doi:8} references with a DOI (+{n_doi-n_doi_initial}; {n_doi/n:.0%} have DOI)" ) - logger.write( - f"{n-n_doi:8} references remaining without a DOI ({(n-n_doi)/n:.0%} missing DOI)" - ) + logger.write(f"{n-n_doi:8} references remaining without a DOI ({(n-n_doi)/n:.0%} missing DOI)") + + +reversion.register(Reference) From 8c406b59948cb5c40da52dbde20133f6877bfb68 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Thu, 18 Aug 2022 16:21:33 -0400 Subject: [PATCH 03/12] Fleshed out audit logs, made improvements --- hawc/apps/assessment/actions/audit.py | 178 ++++++++++++++++++++++++-- hawc/apps/lit/models.py | 86 ++++++++++--- hawc/apps/summary/models.py | 1 + 3 files changed, 236 insertions(+), 29 deletions(-) diff --git a/hawc/apps/assessment/actions/audit.py b/hawc/apps/assessment/actions/audit.py index b168a33df7..123b195737 100644 --- a/hawc/apps/assessment/actions/audit.py +++ b/hawc/apps/assessment/actions/audit.py @@ -1,12 +1,13 @@ from enum import Enum import pandas as pd -from reversion.models import Version from django.contrib.contenttypes.models import ContentType +from reversion.models import Version -from ..models import Assessment from ...common.helper import FlatExport from ...common.serializers import PydanticDrfSerializer +from ..constants import EpiVersion +from ..models import Assessment def versions_by_content_type(app_label: str, model: str, qs=None): @@ -18,13 +19,18 @@ def versions_by_content_type(app_label: str, model: str, qs=None): def versions_by_related_field(related_field: str, related_values: list, qs=None): qs = Version.objects.all() if qs is None else qs ored_values = "|".join([str(id) for id in related_values]) - data_regex = rf"[\"\']fields[\"\']\s*:\s*{{[^}}]*?[\"\']{related_field}[\"\']\s*:\s*({ored_values})\s*," + data_regex = ( + rf"[\"\']fields[\"\']\s*:\s*{{[^}}]*?[\"\']{related_field}[\"\']\s*:\s*({ored_values})\s*," + ) return qs.filter(serialized_data__iregex=data_regex) class AuditType(str, Enum): ASSESSMENT = "assessment" ANIMAL = "animal" + EPI = "epi" + ROB = "riskofbias" + SUMMARY = "summary" class AssessmentAuditSerializer(PydanticDrfSerializer): @@ -38,12 +44,14 @@ def get_assessment_queryset(self): return Version.objects.get_for_model(Assessment).filter(object_id=self.assessment.pk) def get_animal_queryset(self): + # get assessment references reference_qs = versions_by_content_type("lit", "reference") reference_qs = versions_by_related_field("assessment", [self.assessment.pk], reference_qs) - # get bioassay studies associated with references + # get reference studies study_qs = versions_by_content_type("study", "study") - study_qs = study_qs.filter(object_id__in=set(reference_qs.values_list("object_id", flat=True))) - study_qs = versions_by_related_field("bioassay", ["true"], study_qs) + study_qs = study_qs.filter( + object_id__in=set(reference_qs.values_list("object_id", flat=True)) + ) # get study experiments experiment_qs = versions_by_content_type("animal", "experiment") experiment_qs = versions_by_related_field( @@ -59,10 +67,162 @@ def get_animal_queryset(self): endpoint_qs = versions_by_related_field( "animal_group", set(animal_group_qs.values_list("object_id", flat=True)), endpoint_qs ) - return experiment_qs | animal_group_qs | endpoint_qs + # get base endpoints + base_endpoint_qs = versions_by_content_type("assessment", "baseendpoint") + base_endpoint_qs = base_endpoint_qs.filter( + object_id__in=set(endpoint_qs.values_list("object_id", flat=True)) + ) + + return experiment_qs | animal_group_qs | endpoint_qs | base_endpoint_qs + + def get_epiv1_queryset(self): + # get assessment references + reference_qs = versions_by_content_type("lit", "reference") + reference_qs = versions_by_related_field("assessment", [self.assessment.pk], reference_qs) + # get reference studies + study_qs = versions_by_content_type("study", "study") + study_qs = study_qs.filter( + object_id__in=set(reference_qs.values_list("object_id", flat=True)) + ) + # get study populations + study_population_qs = versions_by_content_type("epi", "studypopulation") + study_population_qs = versions_by_related_field( + "study", set(study_qs.values_list("object_id", flat=True)), study_population_qs + ) + # get study population outcomes + outcome_qs = versions_by_content_type("epi", "outcome") + outcome_qs = versions_by_related_field( + "study_population", + set(study_population_qs.values_list("object_id", flat=True)), + outcome_qs, + ) + # get study population exposures + exposure_qs = versions_by_content_type("epi", "exposure") + exposure_qs = versions_by_related_field( + "study_population", + set(study_population_qs.values_list("object_id", flat=True)), + exposure_qs, + ) + # get outcome results + result_qs = versions_by_content_type("epi", "result") + result_qs = versions_by_related_field( + "outcome", set(outcome_qs.values_list("object_id", flat=True)), result_qs + ) + + return study_population_qs | outcome_qs | exposure_qs | result_qs + + def get_epiv2_queryset(self): + # get assessment references + reference_qs = versions_by_content_type("lit", "reference") + reference_qs = versions_by_related_field("assessment", [self.assessment.pk], reference_qs) + # get reference studies + study_qs = versions_by_content_type("study", "study") + study_qs = study_qs.filter( + object_id__in=set(reference_qs.values_list("object_id", flat=True)) + ) + # get study designs + design_qs = versions_by_content_type("epiv2", "design") + design_qs = versions_by_related_field( + "study", set(study_qs.values_list("object_id", flat=True)), design_qs + ) + # get design chemicals + chemical_qs = versions_by_content_type("epiv2", "chemical") + chemical_qs = versions_by_related_field( + "study", set(design_qs.values_list("object_id", flat=True)), chemical_qs + ) + # get design exposures + exposure_qs = versions_by_content_type("epiv2", "exposure") + exposure_qs = versions_by_related_field( + "design", set(design_qs.values_list("object_id", flat=True)), exposure_qs + ) + # get design exposure levels + exposure_level_qs = versions_by_content_type("epiv2", "exposurelevel") + exposure_level_qs = versions_by_related_field( + "design", set(design_qs.values_list("object_id", flat=True)), exposure_level_qs + ) + # get design outcomes + outcome_qs = versions_by_content_type("epiv2", "outcome") + outcome_qs = versions_by_related_field( + "design", set(design_qs.values_list("object_id", flat=True)), outcome_qs + ) + # get design adjustment factors + adjustment_factor_qs = versions_by_content_type("epiv2", "adjustmentfactor") + adjustment_factor_qs = versions_by_related_field( + "design", set(design_qs.values_list("object_id", flat=True)), adjustment_factor_qs + ) + # get design data extractions + data_extraction_qs = versions_by_content_type("epiv2", "dataextraction") + data_extraction_qs = versions_by_related_field( + "design", set(design_qs.values_list("object_id", flat=True)), data_extraction_qs + ) + + return ( + design_qs + | chemical_qs + | exposure_qs + | exposure_level_qs + | outcome_qs + | adjustment_factor_qs + | data_extraction_qs + ) + + def get_riskofbias_queryset(self): + # get assessment domains + domain_qs = versions_by_content_type("riskofbias", "riskofbiasdomain") + domain_qs = versions_by_related_field("assessment", [self.assessment.pk], domain_qs) + # get domain metrics + metric_qs = versions_by_content_type("riskofbias", "riskofbiasmetric") + metric_qs = versions_by_related_field( + "domain", set(domain_qs.values_list("object_id", flat=True)), metric_qs + ) + # get metric scores + score_qs = versions_by_content_type("riskofbias", "riskofbiasscore") + score_qs = versions_by_related_field( + "metric", set(metric_qs.values_list("object_id", flat=True)), score_qs + ) + + return domain_qs | metric_qs | score_qs + + def get_summary_queryset(self): + # get assessment summary tables + summary_table_qs = versions_by_content_type("summary", "summarytable") + summary_table_qs = versions_by_related_field( + "assessment", [self.assessment.pk], summary_table_qs + ) + # get assessment visuals + visual_qs = versions_by_content_type("summary", "visual") + visual_qs = versions_by_related_field("assessment", [self.assessment.pk], visual_qs) + # get assessment data pivots + data_pivot_qs = versions_by_content_type("summary", "datapivot") + data_pivot_qs = versions_by_related_field("assessment", [self.assessment.pk], data_pivot_qs) + # get data pivot uploads + data_pivot_upload_qs = versions_by_content_type("summary", "datapivotupload") + data_pivot_upload_qs = data_pivot_upload_qs.filter( + object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) + ) + # get data pivot queries + data_pivot_query_qs = versions_by_content_type("summary", "datapivotquery") + data_pivot_query_qs = data_pivot_query_qs.filter( + object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) + ) + + return ( + summary_table_qs + | visual_qs + | data_pivot_qs + | data_pivot_upload_qs + | data_pivot_query_qs + ) def get_queryset(self): - qs = getattr(self, f"get_{self.type}_queryset")() + audit_type = self.type + if audit_type == AuditType.EPI: + audit_type = ( + audit_type + "v1" + if self.assessment.epi_version == EpiVersion.V1 + else audit_type + "v2" + ) + qs = getattr(self, f"get_{audit_type}_queryset")() return qs.select_related("content_type", "revision") def export(self) -> FlatExport: @@ -78,5 +238,5 @@ def export(self) -> FlatExport: ), columns=["app", "model", "pk", "serialized_data", "user", "date_revised"], ) - export = FlatExport(df=df, filename="merp") + export = FlatExport(df=df, filename=f"{self.assessment}-{self.type}-audit-logs") return export diff --git a/hawc/apps/lit/models.py b/hawc/apps/lit/models.py index c3ceca251c..0bd493e2f9 100644 --- a/hawc/apps/lit/models.py +++ b/hawc/apps/lit/models.py @@ -20,9 +20,9 @@ from django.urls import reverse from django.utils import timezone from django.utils.html import strip_tags +from reversion import revisions as reversion from taggit.models import ItemBase from treebeard.mp_tree import MP_Node -from reversion import revisions as reversion from ...refml import topics from ...services.nih import pubmed @@ -197,7 +197,8 @@ class Search(models.Model): ) slug = models.SlugField( verbose_name="URL Name", - help_text="The URL (web address) used to describe this object " "(no spaces or special-characters).", + help_text="The URL (web address) used to describe this object " + "(no spaces or special-characters).", ) description = models.TextField( blank=True, @@ -226,7 +227,9 @@ def __str__(self): @property def is_manual_import(self): # special case- created when assessment is created - return self.search_type == constants.SearchType.IMPORT and self.slug == self.MANUAL_IMPORT_SLUG + return ( + self.search_type == constants.SearchType.IMPORT and self.slug == self.MANUAL_IMPORT_SLUG + ) def clean(self): # unique_together constraint checked above; @@ -235,9 +238,19 @@ def clean(self): errors = {} if self.pk: pk_exclusion["pk"] = self.pk - if Search.objects.filter(assessment=self.assessment, title=self.title).exclude(**pk_exclusion).count() > 0: + if ( + Search.objects.filter(assessment=self.assessment, title=self.title) + .exclude(**pk_exclusion) + .count() + > 0 + ): errors["title"] = ASSESSMENT_UNIQUE_MESSAGE - if Search.objects.filter(assessment=self.assessment, slug=self.slug).exclude(**pk_exclusion).count() > 0: + if ( + Search.objects.filter(assessment=self.assessment, slug=self.slug) + .exclude(**pk_exclusion) + .count() + > 0 + ): errors["slug"] = ASSESSMENT_UNIQUE_MESSAGE if errors: raise ValidationError(errors) @@ -252,7 +265,9 @@ def delete(self, **kwargs): # cascade delete references which no longer relate to any searches orphans = self.sole_references() if orphans.count() > 0: - logger.info(f"Removed {orphans.count()} orphan references from assessment {self.assessment_id}") + logger.info( + f"Removed {orphans.count()} orphan references from assessment {self.assessment_id}" + ) orphans.delete() super().delete(**kwargs) @@ -334,7 +349,9 @@ def create_new_references(self, results): # it, just associate the current reference with this search. added_str = [str(id) for id in results["added"]] ref_ids = ( - Reference.objects.filter(assessment=self.assessment, identifiers__unique_id__in=added_str) + Reference.objects.filter( + assessment=self.assessment, identifiers__unique_id__in=added_str + ) .exclude(searches=self) .values_list("pk", flat=True) ) @@ -368,7 +385,9 @@ def create_new_references(self, results): logger.debug(f"Completed bulk creation of {len(refs)} references") # re-query to get the objects back with PKs - refs = Reference.objects.filter(assessment=self.assessment, block_id=block_id).order_by("pk") + refs = Reference.objects.filter(assessment=self.assessment, block_id=block_id).order_by( + "pk" + ) # associate identifiers with each ref_searches = [] @@ -442,7 +461,10 @@ def sole_references(self) -> models.QuerySet: @property def date_last_run(self): - if self.source == constants.ReferenceDatabase.PUBMED and self.search_type == constants.SearchType.SEARCH: + if ( + self.source == constants.ReferenceDatabase.PUBMED + and self.search_type == constants.SearchType.SEARCH + ): try: return PubMedQuery.objects.filter(search=self).latest().query_date except Exception: @@ -484,7 +506,12 @@ def references_count(self): @property def references_tagged_count(self): - return self.references.all().annotate(tag_count=models.Count("tags")).filter(tag_count__gt=0).count() + return ( + self.references.all() + .annotate(tag_count=models.Count("tags")) + .filter(tag_count__gt=0) + .count() + ) @property def fraction_tagged(self): @@ -562,7 +589,9 @@ def create_identifiers(self): start_index = int(i * block_size) end_index = min(int(i * block_size + block_size), ids_to_add_len) logger.debug(f"Building from {start_index} to {end_index}") - fetch = pubmed.PubMedFetch(id_list=ids_to_add[start_index:end_index], retmax=int(block_size)) + fetch = pubmed.PubMedFetch( + id_list=ids_to_add[start_index:end_index], retmax=int(block_size) + ) identifiers = [] for item in fetch.get_content(): identifiers.append( @@ -594,7 +623,9 @@ def get_len(obj): class Identifiers(models.Model): objects = managers.IdentifiersManager() - unique_id = models.CharField(max_length=256, db_index=True) # DOI has no limit; we make this relatively large + unique_id = models.CharField( + max_length=256, db_index=True + ) # DOI has no limit; we make this relatively large database = models.IntegerField(choices=constants.ReferenceDatabase.choices) content = models.TextField() url = models.URLField(blank=True) @@ -748,7 +779,9 @@ def appendChildren(obj, parents): class ReferenceTags(ItemBase): objects = managers.ReferenceTagsManager() - tag = models.ForeignKey(ReferenceFilterTag, on_delete=models.CASCADE, related_name="%(app_label)s_%(class)s_items") + tag = models.ForeignKey( + ReferenceFilterTag, on_delete=models.CASCADE, related_name="%(app_label)s_%(class)s_items" + ) content_object = models.ForeignKey("Reference", on_delete=models.CASCADE) @@ -757,11 +790,15 @@ class Reference(models.Model): objects = managers.ReferenceManager() - assessment = models.ForeignKey("assessment.Assessment", on_delete=models.CASCADE, related_name="references") + assessment = models.ForeignKey( + "assessment.Assessment", on_delete=models.CASCADE, related_name="references" + ) searches = models.ManyToManyField(Search, blank=False, related_name="references") identifiers = models.ManyToManyField(Identifiers, blank=True, related_name="references") title = models.TextField(blank=True) - authors_short = models.TextField(blank=True, help_text='Short-text for to display (eg., "Smith et al.")') + authors_short = models.TextField( + blank=True, help_text='Short-text for to display (eg., "Smith et al.")' + ) authors = models.TextField( blank=True, help_text='The complete, comma separated authors list, (eg., "Smith JD, Tom JF, McFarlen PD")', @@ -772,7 +809,8 @@ class Reference(models.Model): tags = managers.ReferenceFilterTagManager(through=ReferenceTags, blank=True) full_text_url = CustomURLField( blank=True, - help_text="Link to full-text URL from journal site (may require increased " "access privileges to view)", + help_text="Link to full-text URL from journal site (may require increased " + "access privileges to view)", ) created = models.DateTimeField(auto_now_add=True) last_updated = models.DateTimeField(auto_now=True) @@ -831,7 +869,9 @@ def delete_cache(cls, assessment_id: int, delete_study_cache: bool = True): ids = list(cls.objects.filter(assessment_id=assessment_id).values_list("id", flat=True)) SerializerHelper.delete_caches(cls, ids) if delete_study_cache: - apps.get_model("study", "Study").delete_cache(assessment_id, delete_reference_cache=False) + apps.get_model("study", "Study").delete_cache( + assessment_id, delete_reference_cache=False + ) @classmethod def update_hero_metadata(cls, assessment_id: int) -> ResultBase: @@ -953,7 +993,9 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): n_doi_initial = qs_dois.count() qs_no_doi = ( - qs.only("id").exclude(identifiers__database=constants.ReferenceDatabase.DOI).prefetch_related("identifiers") + qs.only("id") + .exclude(identifiers__database=constants.ReferenceDatabase.DOI) + .prefetch_related("identifiers") ) new_doi_relations = [] @@ -975,7 +1017,9 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): doi_creates = [] for doi, _ in new_doi_relations: if doi not in existing_dois: - doi_creates.append(Identifiers(database=constants.ReferenceDatabase.DOI, unique_id=doi)) + doi_creates.append( + Identifiers(database=constants.ReferenceDatabase.DOI, unique_id=doi) + ) existing_dois[doi] = -1 # set temporary value until after bulk_create created = Identifiers.objects.bulk_create(doi_creates) @@ -995,7 +1039,9 @@ def extract_dois(cls, qs, logger=None, full_text: bool = False): logger.write( f"{n_doi_initial:8} -> {n_doi:8} references with a DOI (+{n_doi-n_doi_initial}; {n_doi/n:.0%} have DOI)" ) - logger.write(f"{n-n_doi:8} references remaining without a DOI ({(n-n_doi)/n:.0%} missing DOI)") + logger.write( + f"{n-n_doi:8} references remaining without a DOI ({(n-n_doi)/n:.0%} missing DOI)" + ) reversion.register(Reference) diff --git a/hawc/apps/summary/models.py b/hawc/apps/summary/models.py index 1b590ac7b6..549effbbe8 100644 --- a/hawc/apps/summary/models.py +++ b/hawc/apps/summary/models.py @@ -1027,6 +1027,7 @@ def copy_across_assessments(prefilters: str, cw: Dict) -> str: reversion.register(SummaryText) reversion.register(SummaryTable) +reversion.register(DataPivot) reversion.register(DataPivotUpload) reversion.register(DataPivotQuery) reversion.register(Visual) From c7178e29182a644880a27500e31f29b4f2430913 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Wed, 24 Aug 2022 08:42:19 -0400 Subject: [PATCH 04/12] Fix tests --- .../templates/assessment/log_object_list.html | 22 ++++++++++++++++++- hawc/apps/assessment/views.py | 4 +++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/hawc/apps/assessment/templates/assessment/log_object_list.html b/hawc/apps/assessment/templates/assessment/log_object_list.html index b2d7ee112d..d85ce4ba1e 100644 --- a/hawc/apps/assessment/templates/assessment/log_object_list.html +++ b/hawc/apps/assessment/templates/assessment/log_object_list.html @@ -65,5 +65,25 @@

Logs

{% include 'assessment/_logs_note.html' %}

Audit Logs

-{% for log in audit_logs %}{{log.id}}, {% empty %}-{% endfor %} + + + + + + + + + + + + {% for log in audit_logs %} + + + + + + {% endfor %} + + +
SnapshotUserTimestamp
{{log.serialized_data}}{{log.revision.user}}{{log.revision.date_created}}
{% endblock content %} diff --git a/hawc/apps/assessment/views.py b/hawc/apps/assessment/views.py index 8386fc8139..a09a53ece7 100644 --- a/hawc/apps/assessment/views.py +++ b/hawc/apps/assessment/views.py @@ -795,7 +795,9 @@ def get_context_data(self, **kwargs): first_log=self.first_log, assessment=self.assessment, breadcrumbs=self.get_breadcrumbs(), - audit_logs=Version.objects.get_for_object(self.first_log.content_object), + audit_logs=Version.objects.filter( + content_type=self.first_log.content_type, object_id=self.first_log.object_id + ).select_related("revision"), ) return context From 3b44ee960ec76cd0e5dfd7146c7ab6132464e298 Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Mon, 29 Aug 2022 09:34:18 -0400 Subject: [PATCH 05/12] Add downloads, combine reversion logs with object log page --- hawc/apps/assessment/managers.py | 96 +++++++++++++++++++ hawc/apps/assessment/models.py | 2 + .../assessment/assessment_log_list.html | 13 ++- .../templates/assessment/log_object_list.html | 39 ++------ hawc/apps/assessment/views.py | 54 +++++++---- 5 files changed, 151 insertions(+), 53 deletions(-) diff --git a/hawc/apps/assessment/managers.py b/hawc/apps/assessment/managers.py index e01ed5212f..26298eadc7 100644 --- a/hawc/apps/assessment/managers.py +++ b/hawc/apps/assessment/managers.py @@ -1,7 +1,10 @@ import json +from datetime import timedelta +from typing import Union from django.contrib.contenttypes.models import ContentType from django.db.models import Q, QuerySet +from reversion.models import Version from ..common.helper import HAWCDjangoJSONEncoder from ..common.models import BaseManager @@ -140,3 +143,96 @@ class TimeSpentEditingManager(BaseManager): class DatasetManager(BaseManager): assessment_relation = "assessment" + + +class LogManager(BaseManager): + assessment_relation = "assessment" + + def get_object_audit(self, content_type: Union[ContentType, int], object_id: int) -> list[dict]: + """ + Combines information from HAWC's internal logs and reversion logs for a more complete audit. + Matching is attempted between these two log types to account for same operations. + + Args: + content_type (Union[ContentType, int]): Content type of interested object. + object_id (int): ID of interested object. + + Returns: + list[dict]: Serialized logs with message, snapshot, user, and date created. + """ + logs = list(self.filter(content_type=content_type, object_id=object_id)) + versions = list( + Version.objects.filter(content_type=content_type, object_id=object_id).select_related( + "revision" + ) + ) + + audit = [] + + while logs and versions: + # if there are only versions left, append them + if not logs: + audit.extend( + [ + { + "message": "", + "snapshot": version.serialized_data, + "user": version.revision.user, + "created": version.revision.created_date, + } + for version in versions + ] + ) + break + # if there are only logs left, append them + if not versions: + audit.extend( + [ + { + "message": log.message, + "snapshot": "", + "user": log.user, + "created": log.created, + } + for log in logs + ] + ) + break + # if log and version are close enough in time, + # assume they are from the same operation + diff = abs(logs[0].created - versions[0].revision.date_created) + if diff < timedelta(minutes=1): + log = logs.pop(0) + version = versions.pop(0) + audit.append( + { + "message": log.message, + "snapshot": version.serialized_data, + "user": log.user or version.revision.user, + "created": log.created, + } + ) + # if log occurs earlier than version, append log + elif logs[0].created <= versions[0].revision.date_created: + log = logs.pop(0) + audit.append( + { + "message": log.message, + "snapshot": "", + "user": log.user, + "created": log.created, + } + ) + # if version occurs earlier than log, append version + else: + version = versions.pop(0) + audit.append( + { + "message": "", + "snapshot": version.serialized_data, + "user": version.revision.user, + "created": version.revision.created_date, + } + ) + + return audit diff --git a/hawc/apps/assessment/models.py b/hawc/apps/assessment/models.py index bf6ee7d331..14a0e4dad1 100644 --- a/hawc/apps/assessment/models.py +++ b/hawc/apps/assessment/models.py @@ -914,6 +914,8 @@ def set_message(cls, model, text: str) -> "Communication": class Log(models.Model): + objects = managers.LogManager() + assessment = models.ForeignKey( Assessment, blank=True, null=True, related_name="logs", on_delete=models.CASCADE ) diff --git a/hawc/apps/assessment/templates/assessment/assessment_log_list.html b/hawc/apps/assessment/templates/assessment/assessment_log_list.html index 78645154a6..773daab34f 100644 --- a/hawc/apps/assessment/templates/assessment/assessment_log_list.html +++ b/hawc/apps/assessment/templates/assessment/assessment_log_list.html @@ -5,7 +5,18 @@ {% block content %} -

{{assessment}} Logs

+

{{assessment}} Logs

+
diff --git a/hawc/apps/assessment/templates/assessment/log_object_list.html b/hawc/apps/assessment/templates/assessment/log_object_list.html index d85ce4ba1e..f9ac027c4f 100644 --- a/hawc/apps/assessment/templates/assessment/log_object_list.html +++ b/hawc/apps/assessment/templates/assessment/log_object_list.html @@ -2,7 +2,7 @@ {% block content %} -

{{first_log}}s

+

{{object_name}} Logs

@@ -14,11 +14,11 @@

{{first_log}}s

@@ -26,7 +26,7 @@

{{first_log}}s

- + @@ -43,15 +43,17 @@

Logs

+ - {% for obj in page_obj %} + {% for obj in object_list %} + @@ -60,30 +62,5 @@

Logs

Item - {% with first_log.content_object.get_absolute_url as absolute_url %} + {% with object.get_absolute_url as absolute_url %} {% if absolute_url %} - {{first_log.get_object_name}} + {{object_name}} {% else %} - {{first_log.get_object_name}} + {{object_name}} {% endif %} {% endwith %}
Data type{{first_log.content_type}}{{content_type}}
MessageSnapshot User Timestamp
{{obj.message}}{{obj.snapshot}} {{obj.user}} {{obj.created}}
-{% include "includes/paginator.html" %} - {% include 'assessment/_logs_note.html' %} - -

Audit Logs

- - - - - - - - - - - - {% for log in audit_logs %} - - - - - - {% endfor %} - - -
SnapshotUserTimestamp
{{log.serialized_data}}{{log.revision.user}}{{log.revision.date_created}}
{% endblock content %} diff --git a/hawc/apps/assessment/views.py b/hawc/apps/assessment/views.py index a09a53ece7..0b8c9e1552 100644 --- a/hawc/apps/assessment/views.py +++ b/hawc/apps/assessment/views.py @@ -8,7 +8,7 @@ from django.contrib.auth.mixins import UserPassesTestMixin from django.contrib.contenttypes.models import ContentType from django.core.cache import cache -from django.core.exceptions import PermissionDenied +from django.core.exceptions import ObjectDoesNotExist, PermissionDenied from django.db import transaction from django.db.models import Count from django.http import ( @@ -27,7 +27,6 @@ from django.views.decorators.cache import cache_page from django.views.generic import DetailView, FormView, ListView, TemplateView, View from django.views.generic.edit import CreateView -from reversion.models import Version from ...services.utils.rasterize import get_styles_svg_definition from ..common.crumbs import Breadcrumb @@ -766,20 +765,33 @@ def get_breadcrumbs(self) -> List[Breadcrumb]: class LogObjectList(ListView): template_name = "assessment/log_object_list.html" model = models.Log - paginate_by = 25 - def get_queryset(self): - qs = self.model.objects.filter(**self.kwargs) - if qs.count() == 0: + def dispatch(self, request, *args, **kwargs): + try: + self.ct = ContentType.objects.get_for_id(self.kwargs["content_type"]) + except ObjectDoesNotExist: raise Http404() - self.first_log = qs[0] - self.assessment = qs[0].assessment - if not qs[0].user_can_view(self.request.user): - raise PermissionDenied() - return qs + try: + self.object = self.ct.get_object_for_this_type(pk=self.kwargs["object_id"]) + except ObjectDoesNotExist: + self.object = None + + if not hasattr(self.object, "get_assessment"): + self.assessment = None + if not request.user.is_staff: + raise PermissionDenied() + else: + self.assessment = self.object.get_assessment() + if not self.assessment.user_is_team_member_or_higher(request.user): + raise PermissionDenied() + + return super().dispatch(request, *args, **kwargs) + + def get_queryset(self): + return self.model.objects.get_object_audit(**self.kwargs) def get_breadcrumbs(self) -> List[Breadcrumb]: - crumbs = Breadcrumb.build_crumbs( + return Breadcrumb.build_crumbs( self.request.user, "Logs", [ @@ -787,18 +799,18 @@ def get_breadcrumbs(self) -> List[Breadcrumb]: Breadcrumb(name="Logs", url=self.assessment.get_assessment_logs_url()), ], ) - return crumbs def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - context.update( - first_log=self.first_log, - assessment=self.assessment, - breadcrumbs=self.get_breadcrumbs(), - audit_logs=Version.objects.filter( - content_type=self.first_log.content_type, object_id=self.first_log.object_id - ).select_related("revision"), - ) + context.update(assessment=self.assessment, object=self.object, content_type=self.ct) + if self.object: + context["object_name"] = str(self.object) + else: + context[ + "object_name" + ] = f"{self.ct.app_label}.{self.ct.model} #{self.kwargs['object_id']}" + if self.assessment: + context["breadcrumbs"] = self.get_breadcrumbs() return context From c577d3f2cee0ebb6d54155394236a2c23cb082fe Mon Sep 17 00:00:00 2001 From: Daniel Rabstejnek Date: Mon, 29 Aug 2022 13:14:11 -0400 Subject: [PATCH 06/12] Fix test, simplify view --- .../templates/assessment/log_object_list.html | 14 ++++----- hawc/apps/assessment/views.py | 31 +++++++------------ 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/hawc/apps/assessment/templates/assessment/log_object_list.html b/hawc/apps/assessment/templates/assessment/log_object_list.html index f9ac027c4f..b0b4f154d6 100644 --- a/hawc/apps/assessment/templates/assessment/log_object_list.html +++ b/hawc/apps/assessment/templates/assessment/log_object_list.html @@ -1,8 +1,8 @@ -{% extends assessment|yesno:"assessment-rooted.html,crumbless.html" %} +{% extends first_log.assessment|yesno:"assessment-rooted.html,crumbless.html" %} {% block content %} -

{{object_name}} Logs

+

{{first_log}}s

@@ -14,11 +14,11 @@

{{object_name}} Logs

@@ -26,12 +26,12 @@

{{object_name}} Logs

- + - + diff --git a/hawc/apps/assessment/views.py b/hawc/apps/assessment/views.py index 0b8c9e1552..cb09465fbb 100644 --- a/hawc/apps/assessment/views.py +++ b/hawc/apps/assessment/views.py @@ -768,22 +768,19 @@ class LogObjectList(ListView): def dispatch(self, request, *args, **kwargs): try: - self.ct = ContentType.objects.get_for_id(self.kwargs["content_type"]) + content_type = ContentType.objects.get_for_id(kwargs["content_type"]) except ObjectDoesNotExist: raise Http404() - try: - self.object = self.ct.get_object_for_this_type(pk=self.kwargs["object_id"]) - except ObjectDoesNotExist: - self.object = None + first_log = self.model.objects.filter(**self.kwargs).first() + if not first_log: + first_log = self.model(content_type=content_type, object_id=kwargs["object_id"]) + if hasattr(first_log.content_object, "get_assessment"): + first_log.assessment = first_log.content_object.get_assessment() + if not first_log.user_can_view(request.user): + raise PermissionDenied() - if not hasattr(self.object, "get_assessment"): - self.assessment = None - if not request.user.is_staff: - raise PermissionDenied() - else: - self.assessment = self.object.get_assessment() - if not self.assessment.user_is_team_member_or_higher(request.user): - raise PermissionDenied() + self.first_log = first_log + self.assessment = first_log.assessment return super().dispatch(request, *args, **kwargs) @@ -802,13 +799,7 @@ def get_breadcrumbs(self) -> List[Breadcrumb]: def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - context.update(assessment=self.assessment, object=self.object, content_type=self.ct) - if self.object: - context["object_name"] = str(self.object) - else: - context[ - "object_name" - ] = f"{self.ct.app_label}.{self.ct.model} #{self.kwargs['object_id']}" + context.update(assessment=self.assessment, first_log=self.first_log) if self.assessment: context["breadcrumbs"] = self.get_breadcrumbs() return context From 04e88a687470f5c30681b57b6b596c9b0befc5c2 Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Tue, 13 Sep 2022 22:15:35 -0400 Subject: [PATCH 07/12] add fallback --- hawc/apps/common/tasks.py | 12 ++++++++++++ hawc/main/celery.py | 5 +++++ 2 files changed, 17 insertions(+) diff --git a/hawc/apps/common/tasks.py b/hawc/apps/common/tasks.py index b4bc2b8831..0ef660fdf5 100644 --- a/hawc/apps/common/tasks.py +++ b/hawc/apps/common/tasks.py @@ -4,6 +4,7 @@ from celery.utils.log import get_task_logger from django.conf import settings from django.contrib.auth import get_user_model +from django.core.management import call_command from django.utils.timezone import now from rest_framework.authtoken.models import Token @@ -30,3 +31,14 @@ def destroy_old_api_tokens(): qs = Token.objects.filter(created__lt=deletion_date) logger.info(f"Destroying {qs.count()} old tokens") qs.delete() + + +@shared_task +def create_initial_revisions(): + """ + Most apis/views should create initial revisions; however if we're importing data from + other non-standard sources and we may have missed the initial revision, this task will ensure + that the revision has been created. We would lose the user who created it, but this is our + best effort as a fallback mechanism. + """ + call_command("createinitialrevisions", "reversion") diff --git a/hawc/main/celery.py b/hawc/main/celery.py index 019c630b4c..850efa4093 100644 --- a/hawc/main/celery.py +++ b/hawc/main/celery.py @@ -31,6 +31,11 @@ def debug_task(self): "schedule": timedelta(minutes=10), "options": {"expires": timedelta(minutes=10).total_seconds()}, }, + "create-initial-revisions": { + "task": "hawc.apps.common.tasks.create_initial_revisions", + "schedule": timedelta(days=1), + "options": {"expires": timedelta(days=1).total_seconds()}, + }, "lit-schedule_topic_model_reruns-10-min": { "task": "hawc.apps.lit.tasks.schedule_topic_model_reruns", "schedule": timedelta(minutes=10), From d349edc957cf242f72f7289d0ebdde089262dfbc Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 22 Sep 2022 14:47:35 -0400 Subject: [PATCH 08/12] update excel exports --- hawc/apps/assessment/actions/audit.py | 103 +++++++++++------- hawc/apps/assessment/models.py | 1 + .../assessment/assessment_log_list.html | 5 +- 3 files changed, 65 insertions(+), 44 deletions(-) diff --git a/hawc/apps/assessment/actions/audit.py b/hawc/apps/assessment/actions/audit.py index 123b195737..9067f4bce8 100644 --- a/hawc/apps/assessment/actions/audit.py +++ b/hawc/apps/assessment/actions/audit.py @@ -1,7 +1,9 @@ from enum import Enum +from typing import Optional import pandas as pd from django.contrib.contenttypes.models import ContentType +from django.db.models import QuerySet from reversion.models import Version from ...common.helper import FlatExport @@ -10,13 +12,16 @@ from ..models import Assessment -def versions_by_content_type(app_label: str, model: str, qs=None): - qs = Version.objects.all() if qs is None else qs +def versions_by_content_type(app_label: str, model: str, qs: Optional[QuerySet] = None) -> QuerySet: + if qs is None: + qs = Version.objects.all() ct = ContentType.objects.get(app_label=app_label, model=model) return qs.filter(content_type=ct) -def versions_by_related_field(related_field: str, related_values: list, qs=None): +def versions_by_related_field( + related_field: str, related_values: list, qs: Optional[QuerySet] = None +) -> QuerySet: qs = Version.objects.all() if qs is None else qs ored_values = "|".join([str(id) for id in related_values]) data_regex = ( @@ -30,7 +35,6 @@ class AuditType(str, Enum): ANIMAL = "animal" EPI = "epi" ROB = "riskofbias" - SUMMARY = "summary" class AssessmentAuditSerializer(PydanticDrfSerializer): @@ -41,7 +45,59 @@ class Config: arbitrary_types_allowed = True def get_assessment_queryset(self): - return Version.objects.get_for_model(Assessment).filter(object_id=self.assessment.pk) + # assessments + assess_qs = Version.objects.get_for_model(Assessment).filter(object_id=self.assessment.pk) + + # get assessment attachments + attach_qs = versions_by_content_type("assessment", "attachment") + attach_qs = versions_by_related_field( + "content_type", [ContentType.objects.get_for_model(Assessment).id], attach_qs + ) + attach_qs = versions_by_related_field("object_id", [self.assessment.pk], attach_qs) + + # get assessment datasets + dataset_qs = versions_by_content_type("assessment", "dataset") + dataset_qs = versions_by_related_field("assessment", [self.assessment.pk], dataset_qs) + + # get assessment dataset revisions + dataset_revision_qs = versions_by_content_type("assessment", "datasetrevision") + dataset_revision_qs = versions_by_related_field( + "dataset", set(dataset_qs.values_list("object_id", flat=True)), dataset_revision_qs + ) + + # get assessment summary tables + summary_table_qs = versions_by_content_type("summary", "summarytable") + summary_table_qs = versions_by_related_field( + "assessment", [self.assessment.pk], summary_table_qs + ) + # get assessment visuals + visual_qs = versions_by_content_type("summary", "visual") + visual_qs = versions_by_related_field("assessment", [self.assessment.pk], visual_qs) + # get assessment data pivots + data_pivot_qs = versions_by_content_type("summary", "datapivot") + data_pivot_qs = versions_by_related_field("assessment", [self.assessment.pk], data_pivot_qs) + # get data pivot uploads + data_pivot_upload_qs = versions_by_content_type("summary", "datapivotupload") + data_pivot_upload_qs = data_pivot_upload_qs.filter( + object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) + ) + # get data pivot queries + data_pivot_query_qs = versions_by_content_type("summary", "datapivotquery") + data_pivot_query_qs = data_pivot_query_qs.filter( + object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) + ) + + return ( + assess_qs + | attach_qs + | dataset_qs + | dataset_revision_qs + | summary_table_qs + | visual_qs + | data_pivot_qs + | data_pivot_upload_qs + | data_pivot_query_qs + ) def get_animal_queryset(self): # get assessment references @@ -183,45 +239,10 @@ def get_riskofbias_queryset(self): return domain_qs | metric_qs | score_qs - def get_summary_queryset(self): - # get assessment summary tables - summary_table_qs = versions_by_content_type("summary", "summarytable") - summary_table_qs = versions_by_related_field( - "assessment", [self.assessment.pk], summary_table_qs - ) - # get assessment visuals - visual_qs = versions_by_content_type("summary", "visual") - visual_qs = versions_by_related_field("assessment", [self.assessment.pk], visual_qs) - # get assessment data pivots - data_pivot_qs = versions_by_content_type("summary", "datapivot") - data_pivot_qs = versions_by_related_field("assessment", [self.assessment.pk], data_pivot_qs) - # get data pivot uploads - data_pivot_upload_qs = versions_by_content_type("summary", "datapivotupload") - data_pivot_upload_qs = data_pivot_upload_qs.filter( - object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) - ) - # get data pivot queries - data_pivot_query_qs = versions_by_content_type("summary", "datapivotquery") - data_pivot_query_qs = data_pivot_query_qs.filter( - object_id__in=set(data_pivot_qs.values_list("object_id", flat=True)) - ) - - return ( - summary_table_qs - | visual_qs - | data_pivot_qs - | data_pivot_upload_qs - | data_pivot_query_qs - ) - def get_queryset(self): audit_type = self.type if audit_type == AuditType.EPI: - audit_type = ( - audit_type + "v1" - if self.assessment.epi_version == EpiVersion.V1 - else audit_type + "v2" - ) + audit_type = "epiv1" if self.assessment.epi_version == EpiVersion.V1 else "epiv2" qs = getattr(self, f"get_{audit_type}_queryset")() return qs.select_related("content_type", "revision") diff --git a/hawc/apps/assessment/models.py b/hawc/apps/assessment/models.py index 14a0e4dad1..effabb9021 100644 --- a/hawc/apps/assessment/models.py +++ b/hawc/apps/assessment/models.py @@ -1041,6 +1041,7 @@ def rendered_page( reversion.register(DSSTox) reversion.register(Assessment) +reversion.register(Attachment) reversion.register(EffectTag) reversion.register(Species) reversion.register(Strain) diff --git a/hawc/apps/assessment/templates/assessment/assessment_log_list.html b/hawc/apps/assessment/templates/assessment/assessment_log_list.html index 4cb31ef6b9..0fe1a6b0da 100644 --- a/hawc/apps/assessment/templates/assessment/assessment_log_list.html +++ b/hawc/apps/assessment/templates/assessment/assessment_log_list.html @@ -9,12 +9,11 @@

{{assessment}} Logs

From 492375540bc0a2f8426bff2a828291386d0c7c5c Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 22 Sep 2022 15:03:40 -0400 Subject: [PATCH 09/12] update permissions --- hawc/apps/assessment/api.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hawc/apps/assessment/api.py b/hawc/apps/assessment/api.py index 7510b2608e..523fef8760 100644 --- a/hawc/apps/assessment/api.py +++ b/hawc/apps/assessment/api.py @@ -460,9 +460,11 @@ def endpoints(self, request, pk: int): return Response({"name": instance.name, "id": instance.id, "items": items}) - @action(detail=True, url_path=r"audit/(?P[\w]+)", renderer_classes=PandasRenderers) - def audit(self, request: Request, pk: int, type: str): + @action(detail=True, url_path=r"logs/(?P[\w]+)", renderer_classes=PandasRenderers) + def logs(self, request: Request, pk: int, type: str): instance = self.get_object() + if not instance.user_is_team_member_or_higher(self.request.user): + raise PermissionDenied() serializer = AssessmentAuditSerializer.from_drf(data=dict(assessment=instance, type=type)) export = serializer.export() return Response(export) From 1eb65fe79c0fb89dbd7260c918e38216b300630b Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 22 Sep 2022 22:47:04 -0400 Subject: [PATCH 10/12] move audit_logs to animal app --- .../templates/animal/animalgroup_detail.html | 4 ++++ .../animal/templates/animal/endpoint_detail.html | 3 +++ .../templates/animal/experiment_detail.html | 3 +++ hawc/apps/common/templatetags/bs4.py | 9 --------- hawc/apps/common/templatetags/hawc.py | 15 +++++++++++++++ hawc/apps/study/templates/study/study_detail.html | 2 -- 6 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 hawc/apps/common/templatetags/hawc.py diff --git a/hawc/apps/animal/templates/animal/animalgroup_detail.html b/hawc/apps/animal/templates/animal/animalgroup_detail.html index 2ca095d8dc..dac1b24a1f 100644 --- a/hawc/apps/animal/templates/animal/animalgroup_detail.html +++ b/hawc/apps/animal/templates/animal/animalgroup_detail.html @@ -1,5 +1,7 @@ {% extends 'assessment-rooted.html' %} +{% load hawc %} + {% block content %} {% if obj_perms.edit %}
' ) - - -@register.simple_tag() -def audit_url(object): - # todo move somewhere else - ct = ContentType.objects.get_for_model(object.__class__) - return reverse("assessment:log_object_list", args=(ct.pk, object.pk)) diff --git a/hawc/apps/common/templatetags/hawc.py b/hawc/apps/common/templatetags/hawc.py new file mode 100644 index 0000000000..196d54977e --- /dev/null +++ b/hawc/apps/common/templatetags/hawc.py @@ -0,0 +1,15 @@ +""" +HAWC helper methods +""" +from django import template +from django.contrib.contenttypes.models import ContentType +from django.urls import reverse +from django.utils.safestring import mark_safe + +register = template.Library() + + +@register.simple_tag() +def audit_url(object): + ct = ContentType.objects.get_for_model(object.__class__) + return mark_safe(reverse("assessment:log_object_list", args=(ct.pk, object.pk))) diff --git a/hawc/apps/study/templates/study/study_detail.html b/hawc/apps/study/templates/study/study_detail.html index abceb5b485..7bb6436d37 100644 --- a/hawc/apps/study/templates/study/study_detail.html +++ b/hawc/apps/study/templates/study/study_detail.html @@ -1,7 +1,6 @@ {% extends 'assessment-rooted.html' %} {% load hastext %} -{% load bs4 %} {% block content %}

{{object}}

@@ -19,7 +18,6 @@

{{object}}

{% if obj_perms.edit_assessment %} {{ object.editable|yesno:"Lock study,Unlock study" }} - Audit log {% endif %} {% if object.editable %} From 563c2062c6aae71e9f3aa837b08862e00cc47e9d Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 22 Sep 2022 22:47:22 -0400 Subject: [PATCH 11/12] restyle log list --- .../assessment/assessment_log_list.html | 8 +++--- .../templates/assessment/log_object_list.html | 27 ++++++++++++------- hawc/apps/assessment/urls.py | 2 +- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/hawc/apps/assessment/templates/assessment/assessment_log_list.html b/hawc/apps/assessment/templates/assessment/assessment_log_list.html index 0fe1a6b0da..2f8d87e8ba 100644 --- a/hawc/apps/assessment/templates/assessment/assessment_log_list.html +++ b/hawc/apps/assessment/templates/assessment/assessment_log_list.html @@ -10,10 +10,10 @@

{{assessment}} Logs

Actions diff --git a/hawc/apps/assessment/templates/assessment/log_object_list.html b/hawc/apps/assessment/templates/assessment/log_object_list.html index b0b4f154d6..f3029a0706 100644 --- a/hawc/apps/assessment/templates/assessment/log_object_list.html +++ b/hawc/apps/assessment/templates/assessment/log_object_list.html @@ -40,25 +40,32 @@

{{first_log}}s

Logs

Item - {% with object.get_absolute_url as absolute_url %} + {% with first_log.content_object.get_absolute_url as absolute_url %} {% if absolute_url %} - {{object_name}} + {{first_log.get_object_name}} {% else %} - {{object_name}} + {{first_log.get_object_name}} {% endif %} {% endwith %}
Data type{{content_type}}{{first_log.content_type}}
Assessment{{assessment}}{{first_log.assessment}}

{text}

+ + + + + - - - + + - - {% for obj in object_list %} + {% for event in object_list %} - - - - + + + + + {% empty %} + + {% endfor %} -
MessageSnapshotUser TimestampUserMessage
{{obj.message}}{{obj.snapshot}}{{obj.user}}{{obj.created}}{{event.created}}{{event.user}}{% if event.message %}

{{event.message}}

{% endif %}{{event.snapshot}}
+ No logs available; see details below. +
diff --git a/hawc/apps/assessment/urls.py b/hawc/apps/assessment/urls.py index d82dd73c72..b78f5b9ef5 100644 --- a/hawc/apps/assessment/urls.py +++ b/hawc/apps/assessment/urls.py @@ -38,7 +38,7 @@ path("/clear-cache/", views.AssessmentClearCache.as_view(), name="clear_cache"), # log object path( - "//log/", + "log///", views.LogObjectList.as_view(), name="log_object_list", ), From e92a8462b2b61799069904110b4583fdd354ee21 Mon Sep 17 00:00:00 2001 From: Andy Shapiro Date: Thu, 22 Sep 2022 22:55:07 -0400 Subject: [PATCH 12/12] update event comparisons --- hawc/apps/assessment/managers.py | 168 +++++++++++++++++-------------- 1 file changed, 92 insertions(+), 76 deletions(-) diff --git a/hawc/apps/assessment/managers.py b/hawc/apps/assessment/managers.py index 26298eadc7..3741102f60 100644 --- a/hawc/apps/assessment/managers.py +++ b/hawc/apps/assessment/managers.py @@ -1,6 +1,6 @@ import json -from datetime import timedelta -from typing import Union +from datetime import datetime, timedelta +from typing import Any, NamedTuple, Union from django.contrib.contenttypes.models import ContentType from django.db.models import Q, QuerySet @@ -145,10 +145,59 @@ class DatasetManager(BaseManager): assessment_relation = "assessment" +class Event(NamedTuple): + """A potentially collapsed changed event between Logs and Reversions""" + + message: str + snapshot: str + user: Any + created: datetime + + +class EventPair: + """An Event Pair Comparison between a Log and Reversion""" + + def __init__(self, item_1, item_2=None): + """Build an event pair, or at least one event. + + Args: + item_1 (Union[Log, Version]): The first item in the pair + item_2 (Union[Log, Version], optional): The optional second item in the pair + """ + self.log = None + self.version = None + if isinstance(item_1, Version): + self.version = item_1 + else: + self.log = item_1 + if item_2: + if isinstance(item_2, Version): + self.version = item_2 + else: + self.log = item_2 + + def collapsable(self) -> bool: + # should the two items be collapsed? + if self.log is None or self.version is None: + return False + return abs(self.log.created - self.version.revision.date_created) < timedelta(seconds=10) + + def output(self) -> Event: + # Return a collapsed event + return Event( + message=self.log.message if self.log else "", + snapshot=self.version.serialized_data if self.version else "", + user=self.log.user if self.log else self.version.revision.user, + created=self.log.created if self.log else self.version.revision.date_created, + ) + + class LogManager(BaseManager): assessment_relation = "assessment" - def get_object_audit(self, content_type: Union[ContentType, int], object_id: int) -> list[dict]: + def get_object_audit( + self, content_type: Union[ContentType, int], object_id: int + ) -> list[Event]: """ Combines information from HAWC's internal logs and reversion logs for a more complete audit. Matching is attempted between these two log types to account for same operations. @@ -158,81 +207,48 @@ def get_object_audit(self, content_type: Union[ContentType, int], object_id: int object_id (int): ID of interested object. Returns: - list[dict]: Serialized logs with message, snapshot, user, and date created. + list[Event]: Serialized logs with message, snapshot, user, and date created. """ - logs = list(self.filter(content_type=content_type, object_id=object_id)) - versions = list( - Version.objects.filter(content_type=content_type, object_id=object_id).select_related( - "revision" - ) + # sort all events in descending order + logs = ( + self.filter(content_type=content_type, object_id=object_id) + .select_related("user") + .order_by("id") + ) + versions = ( + Version.objects.filter(content_type=content_type, object_id=object_id) + .select_related("revision__user") + .order_by("id") + ) + events = list(logs) + list(versions) + events.sort( + key=lambda el: el.created if isinstance(el, self.model) else el.revision.date_created, + reverse=True, ) - audit = [] - - while logs and versions: - # if there are only versions left, append them - if not logs: - audit.extend( - [ - { - "message": "", - "snapshot": version.serialized_data, - "user": version.revision.user, - "created": version.revision.created_date, - } - for version in versions - ] - ) - break - # if there are only logs left, append them - if not versions: - audit.extend( - [ - { - "message": log.message, - "snapshot": "", - "user": log.user, - "created": log.created, - } - for log in logs - ] - ) + # build event aggregations + aggregations = [] + used_next_event = None + for i, this_event in enumerate(events): + # skip current item if we've already used it + if this_event is used_next_event: + continue + + # try to get next event to compare; if we dont have one, add the current + try: + next_event = events[i + 1] + except IndexError: + aggregations.append(EventPair(this_event).output()) break - # if log and version are close enough in time, - # assume they are from the same operation - diff = abs(logs[0].created - versions[0].revision.date_created) - if diff < timedelta(minutes=1): - log = logs.pop(0) - version = versions.pop(0) - audit.append( - { - "message": log.message, - "snapshot": version.serialized_data, - "user": log.user or version.revision.user, - "created": log.created, - } - ) - # if log occurs earlier than version, append log - elif logs[0].created <= versions[0].revision.date_created: - log = logs.pop(0) - audit.append( - { - "message": log.message, - "snapshot": "", - "user": log.user, - "created": log.created, - } - ) - # if version occurs earlier than log, append version + + # run pair comparisons + pair = EventPair(this_event, next_event) + if pair.collapsable(): + # add both; mark second as consumed + aggregations.append(pair.output()) + used_next_event = next_event else: - version = versions.pop(0) - audit.append( - { - "message": "", - "snapshot": version.serialized_data, - "user": version.revision.user, - "created": version.revision.created_date, - } - ) - - return audit + # just add one + aggregations.append(EventPair(this_event).output()) + + return aggregations