diff --git a/eatsmart/locations/wake/__init__.py b/eatsmart/locations/wake/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/eatsmart/locations/wake/api.py b/eatsmart/locations/wake/api.py new file mode 100644 index 0000000..80c21b9 --- /dev/null +++ b/eatsmart/locations/wake/api.py @@ -0,0 +1,159 @@ +import csv +import os +import io +import logging +import requests +import zipfile +import tempfile +import datetime +import time +import pprint + + +from eatsmart.locations.base import Importer +from eatsmart.locations.wake import forms +from inspections.models import Establishment, Inspection, Violation + + +logger = logging.getLogger(__name__) + + +class WakeCounty(object): + + url = "http://www.wakegov.com/data/Documents/WCRestaurantInspections.zip" + + def download_and_unzip_data(self, destination): + logger.debug("Requesting {}".format(self.url)) + response = requests.get(self.url) + archive = zipfile.ZipFile(io.BytesIO(response.content)) + logger.debug("Extracting archive into {}".format(str(destination))) + archive.extractall(path=destination) + + def import_lives(self): + with tempfile.TemporaryDirectory(prefix='wake') as destination: + logger.debug("Created temp directory {}".format(destination)) + self.download_and_unzip_data(destination) + businesses = os.path.join(destination, 'businesses.csv') + # if os.path.exists(businesses): + # BusinessImporter().run(path=businesses) + inspections = os.path.join(destination, 'inspections.csv') + # Inspection.objects.filter(establishment__county='Wake').delete() + # if os.path.exists(inspections): + # InspectionImporter().run(path=inspections) + violations = os.path.join(destination, 'violations.csv') + if os.path.exists(violations): + ViolationImporter().run(path=violations) + + +class WakeCSVImporter(Importer): + "Special importer to open CSV files using the Windows encoding" + + def run(self, path): + logger.debug("Importing {}".format(path)) + with open(path, 'r', encoding='ISO-8859-1') as csv_file: + reader = csv.DictReader(csv_file) + self.fetch(reader) + + def fetch(self, data, **kwargs): + "Primay import workflow with error handling" + objects = [] + start_time = time.time() + for index, api in enumerate(data): + row = self.map_fields(api=api, **kwargs) + form = self.Form(dict(row)) + if not form.is_valid(): + errors = {'model': self.Model._meta.object_name, + 'errors': dict(form.errors.items()), + 'cleaned_data': form.cleaned_data, + 'api': api, + 'row': row} + logger.error(pprint.pformat(errors, indent=4)) + continue + try: + instance = self.get_instance(data=form.cleaned_data, **kwargs) + except self.Model.DoesNotExist: + # Instance doesn't exist, must be new + instance = None + if instance: + form.instance = instance + objects.append(form.save()) + if index % 20 == 0: + elapsed_time = time.time() - start_time + values = {'model': self.Model._meta.object_name, + 'id': row.get('external_id', 'n/a'), + 's': len(objects)/elapsed_time} + msg = "{model} ID: {id} ({s:.2f} records/sec)".format(**values) + logger.debug(msg) + start_time = time.time() + objects = [] + + +class BusinessImporter(WakeCSVImporter): + "Import Wake County, NC restaurants" + + Model = Establishment + Form = forms.BusinessForm + + def get_instance(self, data): + "Instance exists if we have external_id and it's within Wake County" + return self.Model.objects.get(external_id=data['external_id'], + county=data['county']) + + def map_fields(self, api): + "Map CSV field names from Wake's data to our database schema" + return {'external_id': api['business_id'], + 'name': api['name'], + 'type': 1, # Restaurant + 'address': api['address'], + 'city': api['city'], + 'county': 'Wake', + 'state': 'NC', + 'postal_code': api['postal_code'], + 'phone_number': api['phone_number'], + 'lat': api['latitude'], + 'lon': api['longitude'], + 'status': 'active'} + + +class InspectionImporter(WakeCSVImporter): + "Import Wake inspections" + + Model = Inspection + Form = forms.InspectionForm + + def get_instance(self, data): + "Inspections with same establishment, date, and type is existing" + query = { + 'date': data['date'], + 'type': data['type'], + 'establishment': data['establishment'], + } + return self.Model.objects.get(**query) + + def map_fields(self, api): + "Map CSV field names from Wake's data to our database schema" + return {'establishment': api['business_id'], + 'date': api['date'], + 'type': api['type'], + 'score': api['score'], + 'description': api['description']} + + +class ViolationImporter(WakeCSVImporter): + "Import Wake violations" + + Model = Violation + Form = forms.ViolationForm + + def get_instance(self, data): + "Instance exists if we have external_id for the given inspection" + return self.Model.objects.get(date=data['date'], + code=data['code'], + inspection=data['inspection']) + + def map_fields(self, api): + "Map CSV field names from Wake's data to our database schema" + return {'establishment': api['business_id'], + 'date': api['date'], + 'code': api['code'], + 'description': api['description']} diff --git a/eatsmart/locations/wake/forms.py b/eatsmart/locations/wake/forms.py new file mode 100644 index 0000000..58d0c38 --- /dev/null +++ b/eatsmart/locations/wake/forms.py @@ -0,0 +1,102 @@ +import logging + +from django.contrib.gis import forms +from django.contrib.gis.geos import Point + +from inspections.models import Establishment, Inspection, Violation + + +DATE_FORMATS = ['%Y%m%d'] +INSPECTION_TYPE_MAP = { + 'initial': 5, # Permit + 'routine': 1, # Routine Inspection + 'followup': 9, # Verification + 'complaint': 31, # Critical Violation Followup +} +LIVES_INSPECTION_TYPES = [(x, x) for x in INSPECTION_TYPE_MAP.keys()] + +logger = logging.getLogger(__name__) + + +class BusinessForm(forms.ModelForm): + "Validate and clean Wake's bussiness data" + + lat = forms.FloatField(required=False) + lon = forms.FloatField(required=False) + + class Meta: + model = Establishment + exclude = ('location',) + + def clean_city(self): + city = self.cleaned_data['city'] + return city.title() + + def clean(self): + lat = self.cleaned_data.get('lat', None) + lon = self.cleaned_data.get('lon', None) + if lat and lon: + self.cleaned_data['location'] = Point(lon, lat) + return self.cleaned_data + + def save(self, commit=True): + instance = super().save(commit=False) + if 'location' in self.cleaned_data: + instance.location = self.cleaned_data['location'] + instance.save() + return instance + + +class InspectionForm(forms.ModelForm): + "Validate and clean Wake's inspection data" + + establishment = forms.CharField() + score = forms.FloatField(required=False) + date = forms.DateTimeField(input_formats=DATE_FORMATS) + type = forms.ChoiceField(choices=LIVES_INSPECTION_TYPES) + + class Meta: + model = Inspection + + def clean_type(self): + type_ = self.cleaned_data['type'] + return INSPECTION_TYPE_MAP[type_] + + def clean_establishment(self): + query = {'county': 'Wake', + 'external_id': self.cleaned_data['establishment']} + try: + return Establishment.objects.get(**query) + except Establishment.DoesNotExist: + raise forms.ValidationError("Establishment doesn't exist") + + +class ViolationForm(forms.ModelForm): + "Validate and clean Wake's violation data" + + establishment = forms.CharField() + inspection = forms.CharField(required=False) + date = forms.DateTimeField(input_formats=DATE_FORMATS) + + class Meta: + model = Violation + + def clean(self): + cleaned_data = self.cleaned_data + query = {'county': 'Wake', + 'external_id': cleaned_data['establishment']} + try: + establishment = Establishment.objects.get(**query) + except Establishment.DoesNotExist: + raise forms.ValidationError("Establishment doesn't exist") + query = {'date': cleaned_data['date'], + 'establishment': establishment} + try: + inspection = Inspection.objects.get(**query) + except Inspection.DoesNotExist: + raise forms.ValidationError("Inspection doesn't exist") + except Inspection.MultipleObjectsReturned: + raise forms.ValidationError("Multiple inspections found: {}".format(str(query))) + cleaned_data['inspection'] = inspection + cleaned_data['establishment'] = establishment + return cleaned_data diff --git a/eatsmart/locations/wake/management/__init__.py b/eatsmart/locations/wake/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/eatsmart/locations/wake/management/commands/__init__.py b/eatsmart/locations/wake/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/eatsmart/locations/wake/management/commands/import_wake.py b/eatsmart/locations/wake/management/commands/import_wake.py new file mode 100644 index 0000000..8fac004 --- /dev/null +++ b/eatsmart/locations/wake/management/commands/import_wake.py @@ -0,0 +1,10 @@ +from django.core.management.base import BaseCommand + +from eatsmart.locations.wake import api + + +class Command(BaseCommand): + """Import saniation data from Durham County API""" + + def handle(self, *args, **options): + api.WakeCounty().import_lives() diff --git a/eatsmart/locations/wake/models.py b/eatsmart/locations/wake/models.py new file mode 100644 index 0000000..e69de29 diff --git a/eatsmart/settings/base.py b/eatsmart/settings/base.py index 50e2499..e7981e1 100644 --- a/eatsmart/settings/base.py +++ b/eatsmart/settings/base.py @@ -151,6 +151,7 @@ 'inspections', 'users', 'eatsmart.locations.durham', + 'eatsmart.locations.wake', ) # A sample logging configuration. The only tangible logging diff --git a/inspections/admin.py b/inspections/admin.py index a6aa1bb..ca4f42e 100644 --- a/inspections/admin.py +++ b/inspections/admin.py @@ -5,9 +5,8 @@ class EstablishmentAdmin(LeafletGeoAdmin): search_fields = ('name', 'address') - list_display = ('id', 'name', 'type', - 'county', 'state_id', 'point', 'update_date') - list_filter = ('county', 'postal_code') + list_display = ('id', 'name', 'type', 'county', 'point', 'update_date') + list_filter = ('county', 'update_date') ordering = ('-update_date',) def point(self, obj): @@ -16,23 +15,53 @@ def point(self, obj): return None +class InspectionCountyFilter(admin.SimpleListFilter): + title = 'County' + parameter_name = 'county' + + def lookups(self, request, model_admin): + counties = Establishment.objects.values_list('county', flat=True) + return [(name, name) for name in counties.distinct()] + + def queryset(self, request, queryset): + if self.value(): + return queryset.filter(establishment__county=self.value()) + else: + return queryset + + class InspectionAdmin(admin.ModelAdmin): search_fields = ('id', 'establishment__external_id', 'external_id', 'establishment__name') - list_display = ('id', 'external_id', 'establishment', 'type', - 'date', 'update_date') - list_filter = ('update_date', 'type') + list_display = ('id', 'establishment', 'type', + 'date', 'external_id', 'update_date') + list_filter = (InspectionCountyFilter, 'type', 'update_date') ordering = ('-date',) raw_id_fields = ('establishment',) date_hierarchy = 'date' +class ViolationCountyFilter(admin.SimpleListFilter): + title = 'County' + parameter_name = 'county' + + def lookups(self, request, model_admin): + counties = Establishment.objects.values_list('county', flat=True) + return [(name, name) for name in counties.distinct()] + + def queryset(self, request, queryset): + if self.value(): + return queryset.filter(inspection__establishment__county=self.value()) + else: + return queryset + + class ViolationAdmin(admin.ModelAdmin): search_fields = ('id', 'external_id', 'code', 'description', 'establishment__name') - list_display = ('id', 'external_id', 'establishment', 'code', - 'date', 'comments') - list_filter = ('code',) + list_display = ('id', 'establishment', 'code', + 'date', 'comments', 'external_id') + list_filter = (ViolationCountyFilter, 'date') raw_id_fields = ('establishment', 'inspection') ordering = ('-date',) date_hierarchy = 'date' diff --git a/inspections/migrations/0003_auto__chg_field_establishment_opening_date__chg_field_establishment_st.py b/inspections/migrations/0003_auto__chg_field_establishment_opening_date__chg_field_establishment_st.py new file mode 100644 index 0000000..4dca0a6 --- /dev/null +++ b/inspections/migrations/0003_auto__chg_field_establishment_opening_date__chg_field_establishment_st.py @@ -0,0 +1,70 @@ +# -*- coding: utf-8 -*- +from south.utils import datetime_utils as datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Changing field 'Establishment.opening_date' + db.alter_column('inspections_establishment', 'opening_date', self.gf('django.db.models.fields.DateTimeField')(null=True)) + + # Changing field 'Establishment.state_id' + db.alter_column('inspections_establishment', 'state_id', self.gf('django.db.models.fields.BigIntegerField')(null=True)) + + def backwards(self, orm): + + # Changing field 'Establishment.opening_date' + db.alter_column('inspections_establishment', 'opening_date', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(1970, 1, 1, 0, 0))) + + # Changing field 'Establishment.state_id' + db.alter_column('inspections_establishment', 'state_id', self.gf('django.db.models.fields.BigIntegerField')(default=0)) + + models = { + 'inspections.establishment': { + 'Meta': {'unique_together': "(('external_id', 'county'),)", 'object_name': 'Establishment'}, + 'address': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'city': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'county': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '64'}), + 'external_id': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'location': ('django.contrib.gis.db.models.fields.PointField', [], {'blank': 'True', 'null': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '255'}), + 'opening_date': ('django.db.models.fields.DateTimeField', [], {'blank': 'True', 'null': 'True'}), + 'phone_number': ('django.db.models.fields.CharField', [], {'blank': 'True', 'max_length': '64'}), + 'postal_code': ('django.db.models.fields.CharField', [], {'max_length': '16'}), + 'property_id': ('django.db.models.fields.CharField', [], {'blank': 'True', 'max_length': '128'}), + 'state': ('django.db.models.fields.CharField', [], {'max_length': '64'}), + 'state_id': ('django.db.models.fields.BigIntegerField', [], {'blank': 'True', 'null': 'True'}), + 'status': ('django.db.models.fields.CharField', [], {'default': "'active'", 'max_length': '32'}), + 'type': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}), + 'update_date': ('django.db.models.fields.DateTimeField', [], {'blank': 'True', 'db_index': 'True', 'null': 'True'}) + }, + 'inspections.inspection': { + 'Meta': {'object_name': 'Inspection'}, + 'date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'establishment': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'inspections'", 'to': "orm['inspections.Establishment']"}), + 'external_id': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'score': ('django.db.models.fields.FloatField', [], {'blank': 'True', 'null': 'True'}), + 'type': ('django.db.models.fields.PositiveIntegerField', [], {'default': '0'}), + 'update_date': ('django.db.models.fields.DateTimeField', [], {'blank': 'True', 'db_index': 'True', 'null': 'True'}) + }, + 'inspections.violation': { + 'Meta': {'object_name': 'Violation'}, + 'code': ('django.db.models.fields.CharField', [], {'max_length': '32'}), + 'date': ('django.db.models.fields.DateTimeField', [], {'db_index': 'True'}), + 'description': ('django.db.models.fields.TextField', [], {'blank': 'True'}), + 'establishment': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'violations'", 'to': "orm['inspections.Establishment']"}), + 'external_id': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'inspection': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'violations'", 'blank': 'True', 'to': "orm['inspections.Inspection']", 'null': 'True'}), + 'update_date': ('django.db.models.fields.DateTimeField', [], {'blank': 'True', 'db_index': 'True', 'null': 'True'}) + } + } + + complete_apps = ['inspections'] diff --git a/inspections/models.py b/inspections/models.py index 025f693..e1516d6 100644 --- a/inspections/models.py +++ b/inspections/models.py @@ -50,7 +50,8 @@ class Establishment(models.Model): (73, ugettext_lazy('Temporary Food Establishment')), ) external_id = models.CharField(ugettext_lazy("External ID"), max_length=128) - state_id = models.BigIntegerField(ugettext_lazy("State ID")) + state_id = models.BigIntegerField(ugettext_lazy("State ID"), null=True, + blank=True) property_id = models.CharField(ugettext_lazy("Property ID"), max_length=128, blank=True) name = models.CharField(ugettext_lazy("Name"), max_length=255) type = models.PositiveIntegerField(ugettext_lazy("Type"), default=0, choices=TYPE_CHOICES) @@ -60,7 +61,8 @@ class Establishment(models.Model): state = models.CharField(ugettext_lazy("State"), max_length=64) postal_code = models.CharField(ugettext_lazy("Postal Code"), max_length=16) phone_number = models.CharField(ugettext_lazy("Phone Number"), max_length=64, blank=True) - opening_date = models.DateTimeField(ugettext_lazy("Opening Date")) + opening_date = models.DateTimeField(ugettext_lazy("Opening Date"), + null=True, blank=True) update_date = models.DateTimeField(ugettext_lazy("Update Date"), null=True, blank=True, db_index=True) status = models.CharField(ugettext_lazy("Status"), choices=STATUS_CHOICES, max_length=32, default='active') @@ -96,7 +98,8 @@ class Inspection(models.Model): establishment = models.ForeignKey(Establishment, verbose_name=ugettext_lazy("Establishment"), related_name='inspections') - external_id = models.CharField(ugettext_lazy("External ID"), max_length=128) + external_id = models.CharField(ugettext_lazy("External ID"), + max_length=128, blank=True) date = models.DateTimeField(ugettext_lazy("Date"), db_index=True) score = models.FloatField(ugettext_lazy("Score"), null=True, blank=True) description = models.TextField(ugettext_lazy("Description"), blank=True) @@ -118,7 +121,8 @@ class Violation(models.Model): inspection = models.ForeignKey(Inspection, related_name='violations', verbose_name=ugettext_lazy("Inspection"), null=True, blank=True) - external_id = models.CharField(ugettext_lazy("External ID"), max_length=128) + external_id = models.CharField(ugettext_lazy("External ID"), + max_length=128, blank=True) date = models.DateTimeField(ugettext_lazy("Date"), db_index=True) code = models.CharField(ugettext_lazy("Code"), max_length=32) description = models.TextField(ugettext_lazy("Description"), blank=True)