diff --git a/helpers/helperslib/Packages.py b/helpers/helperslib/Packages.py index c5977aa..c263cb8 100644 --- a/helpers/helperslib/Packages.py +++ b/helpers/helperslib/Packages.py @@ -1,297 +1,296 @@ import copy import os import re import stat import yaml import shutil import tempfile -import paramiko import urllib, urllib.request from helperslib import CommonUtils def nameForProject( product, project, branchGroup ): return "{0}-{1}-{2}".format( product, project, branchGroup ) class Archive(object): # Sets up an archive for use, to allow for retrieving and uploading new resources to the archive # A local cache of the archive will be managed to improve performance # contentsSuffix is the filename suffix used for the contents files def __init__( self, name, platform, usingCache = True, contentsSuffix = ".tar" ): # Save the name of this archive, the platform we're on and whether a local cache should be used for later use self.name = name self.platform = platform self.usingCache = usingCache self.contentsSuffix = contentsSuffix # Set an empty manifest up for safety later on self.serverManifest = {} # Load our configuration configFileLocation = os.path.join( CommonUtils.scriptsBaseDirectory(), 'archive-configs', name + '.yaml' ) with open(configFileLocation, 'r') as configFile: # Parse the YAML file self.config = yaml.load( configFile ) # Are we using a local cache? # If not, then retrieval of packages won't be possible so nothing further needs to be done if not self.usingCache: return # Does our local cache exist? if not os.path.isdir( self.cacheLocation() ): # Create it os.makedirs( self.cacheLocation() ) # Retrieve the archive manifest from the remote server # We'll use this to determine if we need to update a file from the remote server response = urllib.request.urlopen( self.downloadBaseUrl() + '/manifest.yaml' ) self.serverManifest = yaml.load( response.read() ) # Make sure the Manifest is valid... if self.serverManifest is None: # Set a blank manifest to ensure we don't fail later on self.serverManifest = {} # Returns the location of our local cache of the remote archive # The cache is used to minimize unnecessary downloads from the remote archive def cacheLocation( self ): return self.config['cacheLocation'][self.platform] # Returns the directory which should be used for storing temporary files prior to moving them to the cache # We can't use the system temporary directory as Linux builds have this on a separate file system (as far as the kernel is concerned anyway) # This makes the resulting moves from the temporary location to the final location non-atomic def temporaryFileLocation( self ): # If we aren't using a cache we don't care where files are created # We communicate this to Python by supplying None as the value of dir= to NamedTemporaryFile if not self.usingCache: return None # If we are using one though then we want the cache location to be our temporary file directory return self.cacheLocation() # Returns the base url used to retrieve the metadata and archives from the archive # An archive is expected to have a section for each platform def downloadBaseUrl( self ): return self.config['client']['downloadBaseUrl'] + "/" + self.platform # Retrieve the package for the given project and branch group combination from the archive # If a cached copy is available and is the most recent version that will be used instead of fetching the package again # Where the remote archive has a newer version then the package will be retrieved from the remote archive # All lookups will be restricted to our current platform, as specified when creating this archive. def retrievePackage( self, package ): # Does the archive contain what we are after? # It will never contain it if the use of a local cache has been disabled # In that case there is nothing we can do and we should bail if package not in self.serverManifest: # There is nothing for us to fetch - the server will just yield a 404 # So let's bow out gracefully here return ( None, None ) # Determine the suffix for the content in the archive # Should the metadata not specify one, use the archive default suffix serverMetadata = self.serverManifest[ package ] contentsSuffix = serverMetadata.get('contentsSuffix', self.contentsSuffix) # Determine the names the metadata and archive files would have respectively metadataFilename = package + ".yaml" contentsFilename = package + contentsSuffix # Begin determining if we need to download or not # We start from the assumption we will need to download an archive needToDownload = True # Do we have a local copy of this? localMetadataPath = os.path.join(self.cacheLocation(), metadataFilename) if os.path.exists( localMetadataPath ): # Load the local metadata with open(localMetadataPath, 'r', encoding='utf-8') as localMetadataFile: localMetadata = yaml.load( localMetadataFile ) # Look it up in the server manifest... serverMetadata = self.serverManifest[ package ] # If the server timestamp is lower or the same, no need to fetch needToDownload = ( serverMetadata['timestamp'] > localMetadata['timestamp'] ) # Does the local contents file exist? localContentsPath = os.path.join(self.cacheLocation(), contentsFilename) if not os.path.exists( localContentsPath ): # If it doesn't, we always need to download needToDownload = True # Let's retrieve the file if we need to now... if needToDownload: # Download the archive first... response = urllib.request.urlopen( self.downloadBaseUrl() + '/' + contentsFilename ) latestContent = tempfile.NamedTemporaryFile(delete=False, mode='wb', dir=self.temporaryFileLocation()) latestContent.write( response.read() ) latestContent.close() # Now the metadata file... response = urllib.request.urlopen( self.downloadBaseUrl() + '/' + metadataFilename ) latestMetadata = tempfile.NamedTemporaryFile(delete=False, mode='wb', dir=self.temporaryFileLocation()) latestMetadata.write( response.read() ) latestMetadata.close() # Move both to their final resting places shutil.move( latestContent.name, localContentsPath ) shutil.move( latestMetadata.name, localMetadataPath ) # All done, we can return a tuple of the archive and metadata now return ( localContentsPath, serverMetadata ) # Generates the metadata which is stored in the .yaml file that accompanies each package which is stored in the archive # Extra metadata saved to metadata file, and will be written to yaml file, needs to be a dict like object def generateMetadataForFile( self, contentsNeedingMetadata, scmRevision, extraMetadata=None ): # First, determine the timestamp the file was last modified packageTimestamp = os.path.getmtime( contentsNeedingMetadata ) # Now the checksum packageChecksum = CommonUtils.generateFileChecksum( contentsNeedingMetadata ) # Start preparing the metadata we're going to save alongside the package metadataForPackage = {} # If we have extraMetadata for this Package, then we need to pre-seed the metadata dictionary if extraMetadata: metadataForPackage = copy.copy( extraMetadata ) # Update/adds the nessary keys, that we want to exist. metadataForPackage.update({ 'timestamp': packageTimestamp, 'checksum': packageChecksum, 'scmRevision': scmRevision, 'contentsSuffix': self.contentsSuffix, }) # Write the YAML out to a temporary file latestMetadata = tempfile.NamedTemporaryFile(delete=False, mode='w', dir=self.temporaryFileLocation()) yaml.dump( metadataForPackage, latestMetadata, default_flow_style=False, allow_unicode=True ) latestMetadata.close() # Return the name to that temporary file return latestMetadata.name # Stores a package in the archive, either by creation of or updating of an existing package # As part of this process metadata will be generated for the package we are about to add to the archive to assist in caching later on # The package and it's metadata will then be uploaded to the remote archive and published, then transferred to our local cache # Extra metadata saved to metadata file, and will be written to yaml file, needs to be a dict like object def storePackage( self, package, archiveFileToInclude, scmRevision = '', extraMetadata=None ): # Determine the names the metadata and archive files would have respectively metadataFilename = package + ".yaml" contentsFilename = package + self.contentsSuffix # Generate metadata for the package we are about to store archiveMetadata = self.generateMetadataForFile( archiveFileToInclude, scmRevision, extraMetadata ) # Connect to the upload host privateKeyFile = os.path.join( os.path.expanduser('~'), 'Keys', self.name + '.key') uploadConnection = CommonUtils.establishSSHConnection( self.config['client']['uploadHostname'], self.config['client']['uploadUsername'], privateKeyFile ) # Initiate SFTP uploads sftp = uploadConnection.open_sftp() # Upload our archive and it's metadata file sftp.put( archiveFileToInclude, '/'.join( [self.config['client']['uploadDirectory'], self.platform, contentsFilename] ) ) sftp.put( archiveMetadata, '/'.join( [self.config['client']['uploadDirectory'], self.platform, metadataFilename] ) ) # We are finished with file transfers now sftp.close() # Ask the upload server to publish the metadata and archive publishCommand = "{0}/helpers/publish-package.py --package {1} --platform {2} --environment {3}" publishCommand = publishCommand.format( self.config['client']['uploadTools'], package, self.platform, self.name ) cmdStdin, cmdStdout, cmdStderr = uploadConnection.exec_command( publishCommand ) cmdStdout.channel.recv_exit_status() # We're all done with the remote server now, so disconnect uploadConnection.close() # If we have no local cache, then we have nothing further to do if not self.usingCache: # Because we aren't going to move the metadata and archive files into a proper home we need to cleanup after ourselves os.remove( archiveFileToInclude ) os.remove( archiveMetadata ) # All done now - nothing more for us to do return # Finally, we publish the archive and metadata into our local cache localContentsPath = os.path.join(self.cacheLocation(), contentsFilename) shutil.move( archiveFileToInclude, localContentsPath ) localMetadataPath = os.path.join(self.cacheLocation(), metadataFilename) shutil.move( archiveMetadata, localMetadataPath ) # And update our internal copy of the server side metadata with open(localMetadataPath, 'r', encoding='utf-8') as localMetadataFile: self.serverManifest[ package ] = yaml.load( localMetadataFile ) # Performs the package publishing process # This function should only be called on the archive server and will not function correctly on clients. def publishPackage( self, package ): # Determine where we can find the metadata file and what it's final home will be metadataFilename = package + ".yaml" stagedMetadataPath = os.path.join( self.config['client']['uploadDirectory'], self.platform, metadataFilename ) finalMetadataPath = os.path.join( self.config['server']['archiveDirectory'], self.platform, metadataFilename ) # Because we need to know the contentsSuffix of the file we're publishing, load the metadata for the file... with open(stagedMetadataPath, 'r', encoding='utf-8') as metadataFile: metadata = yaml.load( metadataFile ) contentsSuffix = metadata.get('contentsSuffix', self.contentsSuffix) # Now that we know the contentsSuffix, we can go ahead and determine where our package's content can be found and where it needs to be moved to contentsFilename = package + contentsSuffix stagedContentsPath = os.path.join( self.config['client']['uploadDirectory'], self.platform, contentsFilename ) finalContentsPath = os.path.join( self.config['server']['archiveDirectory'], self.platform, contentsFilename ) # Move the contents file first # Assuming we're on the same file system this should be an atomic operation and thus instant # We move the metadata second in case uploadDirectory and archiveDirectory are on different file systems # As the contents file could be several hundred megabytes, while the metadata file should be a matter of a few kilobytes and thus copy across instantly # Also, as the metadata file governs when files should be expired, it is better to over-expire than risk an outdated cached copy being used shutil.move( stagedContentsPath, finalContentsPath ) # Now the metadata goes over as well shutil.move( stagedMetadataPath, finalMetadataPath ) # Now we update the global manifest file for this platform self.refreshManifest() # Refreshes the manifest.yaml file for this archive # This function should only be called on the archive server and will not function correctly on clients. def refreshManifest( self ): # Initialize manifestContents = {} # First we list all the files in the archive for this platform archiveFolder = os.path.join( self.config['server']['archiveDirectory'], self.platform ) presentFiles = os.listdir( archiveFolder ) # Now we go over each one in turn, to see if it is a metadata file we need to include in the manifest for fileToCheck in presentFiles: # Is this the manifest file itself? If so, skip if fileToCheck == "manifest.yaml": continue # Is this a metadata file. and if it is, what's the package name? manifestPackageName = re.match( "(.*)\.yaml$", fileToCheck ) # If we didn't have a match, skip to the next one if manifestPackageName is None: continue # Grab the package name... packageName = manifestPackageName.group(1) # Determine the full path.. metadataPath = os.path.join(archiveFolder, fileToCheck) # Load the YAML file with open(metadataPath, 'r', encoding='utf-8') as metadataFile: metadata = yaml.load( metadataFile ) # Add it to our Manifest manifestContents[ packageName ] = metadata # Now we write it out to disk # We write it to a temporary file to ensure we don't get caught out mid-write... latestManifest = tempfile.NamedTemporaryFile(delete=False, mode='w', dir=self.temporaryFileLocation()) yaml.dump( manifestContents, latestManifest, default_flow_style=False, allow_unicode=True ) latestManifest.close() # Make the Manifest file readable by everyone so Apache can serve it os.chmod( latestManifest.name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH ) # Move it to it's final home manifestPath = os.path.join( archiveFolder, 'manifest.yaml' ) shutil.move( latestManifest.name, manifestPath )