Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix duplicate SIP and Transfer Manifest issue #38

Merged
merged 1 commit into from
Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/integration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ jobs:
run: |
sudo apt-get install -y libxml2-dev libxslt1-dev
-
name: 💽 Building distribution
name: 💽 Building & testing distribution
run: |
rm -rf dist proddist testdist
python3 bootstrap.py
bin/buildout
bin/buildout setup . egg_info --tag-build .dev --tag-date sdist --dist-dir testdist
bin/test
bin/buildout setup . egg_info --tag-build .$(date --utc '+%Y%m%d%H%M%S') sdist --dist-dir testdist
bin/buildout setup . sdist --dist-dir proddist
# TODO: Put in unit+functional+integration testing here
-
name: 📇 Publishing to Test PyPI
uses: pypa/gh-action-pypi-publish@master
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
.*.swp
.DS_Store
__pycache__
.eggs
typescript
/*.tab
/*.xml
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

graft src/pds
graft docs
include *.rst
include *.rst *.tab *.TAB *.xml *.xsd *.pdf
global-exclude *.pyc *.pyo
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
'pds-deep-archive=pds.aipgen.main:main'
]
},
test_suite='pds.aipgen.tests.test_suite',
namespace_packages=['pds'],
packages=find_packages('src', exclude=['docs', 'tests', 'bootstrap', 'ez_setup']),
package_dir={'': 'src'},
Expand Down
8 changes: 4 additions & 4 deletions src/pds/aipgen/aip.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def _writeLabel(
• ``xferNum`` — count of records in the transfer manifest file
'''

_logger.debug('🏷 Writing AIP label to %s\n', labelOutputFile)
_logger.debug('🏷 Writing AIP label to %s', labelOutputFile)
ts = datetime.utcnow()
ts = datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute, ts.second, microsecond=0, tzinfo=None)

Expand Down Expand Up @@ -295,7 +295,7 @@ def process(bundle):
``bundle``, which is an open file stream (with a ``name`` atribute) on the local
filesystem. Return the name of the generated checksum manifest file.
'''
_logger.info('🏃‍♀️ Starting AIP generation for %s\n', bundle.name)
_logger.info('🏃‍♀️ Starting AIP generation for %s', bundle.name)
d = os.path.dirname(os.path.abspath(bundle.name))

# Get the bundle's primary collections and other useful info
Expand Down Expand Up @@ -330,7 +330,7 @@ def process(bundle):
_logger.info('🎉 Success! AIP done, files generated:')
_logger.info('• Checksum manifest: %s', chksumFN)
_logger.info('• Transfer manifest: %s', xferFN)
_logger.info('• XML label for them both: %s\n', labelFN)
_logger.info('• XML label for them both: %s', labelFN)
return chksumFN


Expand All @@ -347,7 +347,7 @@ def main():
logging.basicConfig(level=args.loglevel, format='%(levelname)s %(message)s')
_logger.debug('⚙️ command line args = %r', args)
process(args.bundle)
_logger.info('👋 Thanks for using this program! Bye!\n\n')
_logger.info('👋 Thanks for using this program! Bye!')
sys.exit(0)


Expand Down
2 changes: 1 addition & 1 deletion src/pds/aipgen/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def main():
args.bundle_base_url,
chksumStream
)
_logger.info("👋 That's it! Thanks for making an AIP and SIP with us today. Bye!\n\n")
_logger.info("👋 That's it! Thanks for making an AIP and SIP with us today. Bye!")
sys.exit(0)


Expand Down
74 changes: 46 additions & 28 deletions src/pds/aipgen/sip.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@

# Other constants and defaults:
_registryServiceURL = 'https://pds.nasa.gov/services/registry/pds' # Default registry service
_bufsiz = 512 # Buffer size for reading from URL con
_pLineMatcher = re.compile(r'^P,\s*(.+)') # Match P-lines in a tab file
_bufsiz = 512 # Buffer size for reading from URL con
_pLineMatcher = re.compile(r'^P,\s*([^\s]+)') # Match P-lines in a tab file

# TODO: Auto-generate from PDS4 IM
_providerSiteIDs = ['PDS_' + i for i in ('ATM', 'ENG', 'GEO', 'IMG', 'JPL', 'NAI', 'PPI', 'PSI', 'RNG', 'SBN')]
Expand All @@ -89,6 +89,7 @@

# Logging
# -------

_logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -165,10 +166,27 @@ def _getAssociatedProducts(root, filepath):
if not matches: return products
for m in matches:
products.add('file:' + os.path.join(filepath, m.text))

return products


def _createLidVidtoXMLFileTable(xmlFiles, con):
'''Fill out a table for later (future multiprocessing-enabled) use to rapidly look up lidvids
in XML files. We get all of this XPath out of the way!
'''
for xmlFile in xmlFiles:
tree = etree.parse(xmlFile)
root = tree.getroot()
matches = root.findall(f'./{{{PDS_NS_URI}}}Identification_Area/{{{PDS_NS_URI}}}logical_identifier')
if not matches: continue
lid = matches[0].text.strip()

matches = root.findall(f'./{{{PDS_NS_URI}}}Identification_Area/{{{PDS_NS_URI}}}version_id')
if not matches: continue
vid = matches[0].text.strip()
lidvid = lid + '::' + vid
con.execute('''INSERT OR IGNORE INTO lidvids (lidvid, xmlFile) VALUES (?,?)''', (lidvid, xmlFile))


def _getLocalFileInfo(bundle, primaries, bundleLidvid, con):
'''Search all XML files (except for the ``bundle`` file) in the same directory as ``bundle``
and look for all XPath ``Product_Collection/Identification_Area/logical_identifier`` values
Expand All @@ -182,8 +200,6 @@ def _getLocalFileInfo(bundle, primaries, bundleLidvid, con):
have that "lidvid" and return then a mapping of lidvids to set of matching files, as ``file:``
URLs.
'''
# First get a set of all XML files under the same directory as ``bundle``

# I'll take a six-pack of tabs
lidvids = set()

Expand All @@ -198,6 +214,7 @@ def _getLocalFileInfo(bundle, primaries, bundleLidvid, con):
xmlFile text NOT NULL
)''')
cursor.execute('''CREATE INDEX IF NOT EXISTS lidvidIndex ON lidvids (lidvid)''')
cursor.execute('''CREATE UNIQUE INDEX lidvidPairing ON lidvids (lidvid, xmlFile)''')

# Add bundle to manifest
lidvidsToFiles[bundleLidvid] = {'file:' + bundle}
Expand All @@ -209,6 +226,8 @@ def _getLocalFileInfo(bundle, primaries, bundleLidvid, con):
# Locate all the XML files
for dirpath, dirnames, filenames in os.walk(root):
xmlFiles |= set([os.path.join(dirpath, i) for i in filenames if i.lower().endswith(PDS_LABEL_FILENAME_EXTENSION.lower())])
with con:
_createLidVidtoXMLFileTable(xmlFiles, con)

# Get the lidvids and inventory of files mentioned in each xml file
with con:
Expand All @@ -222,8 +241,6 @@ def _getLocalFileInfo(bundle, primaries, bundleLidvid, con):
for tab in tabs:
lidvids |= _getPLines(tab)
lidvidsToFiles[lidvid].add('file:' + tab)
for lidvid in lidvids:
con.execute('INSERT INTO lidvids (lidvid, xmlFile) VALUES (?,?)', (lidvid, xmlFile))

# Now go through each lidvid mentioned by the PLines in each inventory tab and find their xml files
for lidvid in lidvids:
Expand Down Expand Up @@ -265,7 +282,7 @@ def _writeTable(hashedFiles, hashName, manifest, offline, baseURL, basePathToRep
If ``offline`` mode, we transform all URLs written to the table by stripping off
everything except the last component (the file) and prepending the given ``baseURL``.
'''
hashish, size = hashlib.new('md5'), 0
hashish, size, hashName = hashlib.new('md5'), 0, hashName.upper()
for url, digest, lidvid in sorted(hashedFiles):
if offline:
if baseURL.endswith('/'):
Expand Down Expand Up @@ -397,9 +414,8 @@ def produce(bundle, hashName, registryServiceURL, insecureConnectionFlag, site,
# the future for sharing this DB amongst many processes for some fancy multiprocessing
with tempfile.NamedTemporaryFile() as dbfile:
con = sqlite3.connect(dbfile.name)
_logger.debug('→ Database file (deleted) is %sf', dbfile.name)

_logger.info('🏃‍♀️ Starting SIP generation for %s\n', bundle.name)
_logger.info('🏃‍♀️ Starting SIP generation for %s', bundle.name)

# Get the bundle path
bundle = os.path.abspath(bundle.name)
Expand All @@ -423,7 +439,7 @@ def produce(bundle, hashName, registryServiceURL, insecureConnectionFlag, site,
_writeLabel(bundleLID, bundleVID, title, md5, size, len(hashedFiles), hashName, manifestFileName, site, label, aipFile)
_logger.info('🎉 Success! From %s, generated these output files:', bundle)
_logger.info('• SIP Manifest: %s', manifestFileName)
_logger.info('• XML label for the SIP: %s\n', labelFileName)
_logger.info('• XML label for the SIP: %s', labelFileName)
return manifestFileName, labelFileName


Expand All @@ -448,7 +464,7 @@ def addSIParguments(parser):
# TODO: Temporarily setting offline to True by default until online mode is available
group.add_argument(
'-n', '--offline', default=True, action='store_true',
help='Run offline, scanning bundle directory for matching files instead of querying registry service.'+
help='Run offline, scanning bundle directory for matching files instead of querying registry service.'
' NOTE: By default, set to True until online mode is available.'
)

Expand All @@ -461,7 +477,7 @@ def addSIParguments(parser):
# TODO: Temporarily setting to be required by default until online mode is available
parser.add_argument(
'-b', '--bundle-base-url', required=True,
help='Base URL for Node data archive. This URL will be prepended to' +
help='Base URL for Node data archive. This URL will be prepended to'
' the bundle directory to form URLs to the products. For example,'
' if we are generating a SIP for mission_bundle/LADEE_Bundle_1101.xml,'
' and bundle-base-url is https://atmos.nmsu.edu/PDS/data/PDS4/LADEE/,'
Expand All @@ -471,8 +487,10 @@ def addSIParguments(parser):

def main():
'''Check the command-line for options and create a SIP from the given bundle XML'''
parser = argparse.ArgumentParser(description=_description,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser = argparse.ArgumentParser(
description=_description,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('--version', action='version', version=f'%(prog)s {_version}')
addSIParguments(parser)
addLoggingArguments(parser)
Expand All @@ -488,21 +506,21 @@ def main():
_logger.debug('⚙️ command line args = %r', args)
if args.offline and not args.bundle_base_url:
parser.error('--bundle-base-url is required when in offline mode (--offline).')
manifest, label = _produce(
args.bundle,
manifest, label = produce(
bundle=args.bundle,
# TODO: Temporarily hardcoding these values until other modes are available
# HASH_ALGORITHMS[args.algorithm],
# args.url,
# args.insecure,
HASH_ALGORITHMS['MD5'],
'',
'',
args.site,
args.offline,
args.bundle_base_url,
args.aip
# hashName=HASH_ALGORITHMS[args.algorithm],
# registryServiceURL=args.url,
# insecureConnectionFlag=args.insecure,
hashName=HASH_ALGORITHMS['MD5'],
registryServiceURL=None,
insecureConnectionFlag=False,
site=args.site,
offline=args.offline,
baseURL=args.bundle_base_url,
aipFile=args.aip
)
_logger.info('INFO 👋 All done. Thanks for making a SIP. Bye!\n\n')
_logger.info('👋 All done. Thanks for making a SIP. Bye!')
sys.exit(0)


Expand Down
12 changes: 12 additions & 0 deletions src/pds/aipgen/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,15 @@


'''PDS AIP-GEN Tests'''


import unittest
import pds.aipgen.tests.test_utils
import pds.aipgen.tests.test_functional


def test_suite():
return unittest.TestSuite([
pds.aipgen.tests.test_utils.test_suite(),
pds.aipgen.tests.test_functional.test_suite()
])
1 change: 1 addition & 0 deletions src/pds/aipgen/tests/data
71 changes: 71 additions & 0 deletions src/pds/aipgen/tests/test_functional.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# encoding: utf-8
#
# Copyright © 2020 California Institute of Technology ("Caltech").
# ALL RIGHTS RESERVED. U.S. Government sponsorship acknowledged.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# • Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# • Redistributions must reproduce the above copyright notice, this list of
# conditions and the following disclaimer in the documentation and/or other
# materials provided with the distribution.
# • Neither the name of Caltech nor its operating division, the Jet Propulsion
# Laboratory, nor the names of its contributors may be used to endorse or
# promote products derived from this software without specific prior written
# permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.


'''PDS AIP-GEN functional tests'''


import unittest, tempfile, shutil, os, pkg_resources, filecmp
from pds.aipgen.sip import produce


class SIPFunctionalTestCase(unittest.TestCase):
'''Functional test case for SIP generation.

TODO: factor this out so we can generically do AIP and other file-based functional tests too.
'''
def setUp(self):
super(SIPFunctionalTestCase, self).setUp()
self.input = pkg_resources.resource_stream(__name__, 'data/ladee_test/mission_bundle/LADEE_Bundle_1101.xml')
self.valid = pkg_resources.resource_filename(__name__, 'data/ladee_test/valid/ladee_mission_bundle_sip_v1.0.tab')
self.cwd, self.testdir = os.getcwd(), tempfile.mkdtemp()
os.chdir(self.testdir)
def test_sip_of_a_ladee(self):
'''Test if the SIP manifest of LADEE bundle works as expected'''
manifest, label = produce(
bundle=self.input,
hashName='md5',
registryServiceURL=None,
insecureConnectionFlag=True,
site='PDS_ATM',
offline=True,
baseURL='https://atmos.nmsu.edu/PDS/data/PDS4/LADEE/',
aipFile=None
)
self.assertTrue(filecmp.cmp(manifest, self.valid), "SIP manifest doesn't match the valid version")
def tearDown(self):
self.input.close()
os.chdir(self.cwd)
shutil.rmtree(self.testdir, ignore_errors=True)
super(SIPFunctionalTestCase, self).tearDown()


def test_suite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
Loading