Package CedarBackup2 :: Package extend :: Module split
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.extend.split

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2007 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.3) 
 29  # Project  : Official Cedar Backup Extensions 
 30  # Revision : $Id: split.py 861 2008-03-18 03:45:42Z pronovic $ 
 31  # Purpose  : Provides an extension to split up large files in staging directories. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Provides an extension to split up large files in staging directories. 
 41   
 42  When this extension is executed, it will look through the configured Cedar 
 43  Backup staging directory for files exceeding a specified size limit, and split 
 44  them down into smaller files using the 'split' utility.  Any directory which 
 45  has already been split (as indicated by the C{cback.split} file) will be 
 46  ignored. 
 47   
 48  This extension requires a new configuration section <split> and is intended 
 49  to be run immediately after the standard stage action or immediately before the 
 50  standard store action.  Aside from its own configuration, it requires the 
 51  options and staging configuration sections in the standard Cedar Backup 
 52  configuration file. 
 53   
 54  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 55  """ 
 56   
 57  ######################################################################## 
 58  # Imported modules 
 59  ######################################################################## 
 60   
 61  # System modules 
 62  import os 
 63  import re 
 64  import logging 
 65   
 66  # Cedar Backup modules 
 67  from CedarBackup2.filesystem import FilesystemList 
 68  from CedarBackup2.util import resolveCommand, executeCommand 
 69  from CedarBackup2.util import changeOwnership, buildNormalizedPath 
 70  from CedarBackup2.util import UNIT_BYTES, UNIT_KBYTES, UNIT_MBYTES, UNIT_GBYTES 
 71  from CedarBackup2.xmlutil import createInputDom, addContainerNode, addStringNode 
 72  from CedarBackup2.xmlutil import readFirstChild, readString 
 73  from CedarBackup2.actions.util import findDailyDirs, writeIndicatorFile, getBackupFiles 
 74  from CedarBackup2.config import ByteQuantity, readByteQuantity, addByteQuantityNode 
 75   
 76   
 77  ######################################################################## 
 78  # Module-wide constants and variables 
 79  ######################################################################## 
 80   
 81  logger = logging.getLogger("CedarBackup2.log.extend.split") 
 82   
 83  SPLIT_COMMAND = [ "split", ] 
 84  SPLIT_INDICATOR = "cback.split" 
 85   
 86   
 87  ######################################################################## 
 88  # SplitConfig class definition 
 89  ######################################################################## 
 90   
91 -class SplitConfig(object):
92 93 """ 94 Class representing split configuration. 95 96 Split configuration is used for splitting staging directories. 97 98 The following restrictions exist on data in this class: 99 100 - The size limit must be a ByteQuantity 101 - The split size must be a ByteQuantity 102 103 @sort: __init__, __repr__, __str__, __cmp__, sizeLimit, splitSize 104 """ 105
106 - def __init__(self, sizeLimit=None, splitSize=None):
107 """ 108 Constructor for the C{SplitCOnfig} class. 109 110 @param sizeLimit: Size limit of the files, in bytes 111 @param splitSize: Size that files exceeding the limit will be split into, in bytes 112 113 @raise ValueError: If one of the values is invalid. 114 """ 115 self._sizeLimit = None 116 self._splitSize = None 117 self.sizeLimit = sizeLimit 118 self.splitSize = splitSize
119
120 - def __repr__(self):
121 """ 122 Official string representation for class instance. 123 """ 124 return "SplitConfig(%s, %s)" % (self.sizeLimit, self.splitSize)
125
126 - def __str__(self):
127 """ 128 Informal string representation for class instance. 129 """ 130 return self.__repr__()
131
132 - def __cmp__(self, other):
133 """ 134 Definition of equals operator for this class. 135 Lists within this class are "unordered" for equality comparisons. 136 @param other: Other object to compare to. 137 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 138 """ 139 if other is None: 140 return 1 141 if self._sizeLimit != other._sizeLimit: 142 if self._sizeLimit < other._sizeLimit: 143 return -1 144 else: 145 return 1 146 if self._splitSize != other._splitSize: 147 if self._splitSize < other._splitSize: 148 return -1 149 else: 150 return 1 151 return 0
152
153 - def _setSizeLimit(self, value):
154 """ 155 Property target used to set the size limit. 156 If not C{None}, the value must be a C{ByteQuantity} object. 157 @raise ValueError: If the value is not a C{ByteQuantity} 158 """ 159 if value is None: 160 self._sizeLimit = None 161 else: 162 if not isinstance(value, ByteQuantity): 163 raise ValueError("Value must be a C{ByteQuantity} object.") 164 self._sizeLimit = value
165
166 - def _getSizeLimit(self):
167 """ 168 Property target used to get the size limit. 169 """ 170 return self._sizeLimit
171
172 - def _setSplitSize(self, value):
173 """ 174 Property target used to set the split size. 175 If not C{None}, the value must be a C{ByteQuantity} object. 176 @raise ValueError: If the value is not a C{ByteQuantity} 177 """ 178 if value is None: 179 self._splitSize = None 180 else: 181 if not isinstance(value, ByteQuantity): 182 raise ValueError("Value must be a C{ByteQuantity} object.") 183 self._splitSize = value
184
185 - def _getSplitSize(self):
186 """ 187 Property target used to get the split size. 188 """ 189 return self._splitSize
190 191 sizeLimit = property(_getSizeLimit, _setSizeLimit, None, doc="Size limit, as a ByteQuantity") 192 splitSize = property(_getSplitSize, _setSplitSize, None, doc="Split size, as a ByteQuantity")
193 194 195 ######################################################################## 196 # LocalConfig class definition 197 ######################################################################## 198
199 -class LocalConfig(object):
200 201 """ 202 Class representing this extension's configuration document. 203 204 This is not a general-purpose configuration object like the main Cedar 205 Backup configuration object. Instead, it just knows how to parse and emit 206 split-specific configuration values. Third parties who need to read and 207 write configuration related to this extension should access it through the 208 constructor, C{validate} and C{addConfig} methods. 209 210 @note: Lists within this class are "unordered" for equality comparisons. 211 212 @sort: __init__, __repr__, __str__, __cmp__, split, validate, addConfig 213 """ 214
215 - def __init__(self, xmlData=None, xmlPath=None, validate=True):
216 """ 217 Initializes a configuration object. 218 219 If you initialize the object without passing either C{xmlData} or 220 C{xmlPath} then configuration will be empty and will be invalid until it 221 is filled in properly. 222 223 No reference to the original XML data or original path is saved off by 224 this class. Once the data has been parsed (successfully or not) this 225 original information is discarded. 226 227 Unless the C{validate} argument is C{False}, the L{LocalConfig.validate} 228 method will be called (with its default arguments) against configuration 229 after successfully parsing any passed-in XML. Keep in mind that even if 230 C{validate} is C{False}, it might not be possible to parse the passed-in 231 XML document if lower-level validations fail. 232 233 @note: It is strongly suggested that the C{validate} option always be set 234 to C{True} (the default) unless there is a specific need to read in 235 invalid configuration from disk. 236 237 @param xmlData: XML data representing configuration. 238 @type xmlData: String data. 239 240 @param xmlPath: Path to an XML file on disk. 241 @type xmlPath: Absolute path to a file on disk. 242 243 @param validate: Validate the document after parsing it. 244 @type validate: Boolean true/false. 245 246 @raise ValueError: If both C{xmlData} and C{xmlPath} are passed-in. 247 @raise ValueError: If the XML data in C{xmlData} or C{xmlPath} cannot be parsed. 248 @raise ValueError: If the parsed configuration document is not valid. 249 """ 250 self._split = None 251 self.split = None 252 if xmlData is not None and xmlPath is not None: 253 raise ValueError("Use either xmlData or xmlPath, but not both.") 254 if xmlData is not None: 255 self._parseXmlData(xmlData) 256 if validate: 257 self.validate() 258 elif xmlPath is not None: 259 xmlData = open(xmlPath).read() 260 self._parseXmlData(xmlData) 261 if validate: 262 self.validate()
263
264 - def __repr__(self):
265 """ 266 Official string representation for class instance. 267 """ 268 return "LocalConfig(%s)" % (self.split)
269
270 - def __str__(self):
271 """ 272 Informal string representation for class instance. 273 """ 274 return self.__repr__()
275
276 - def __cmp__(self, other):
277 """ 278 Definition of equals operator for this class. 279 Lists within this class are "unordered" for equality comparisons. 280 @param other: Other object to compare to. 281 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 282 """ 283 if other is None: 284 return 1 285 if self._split != other._split: 286 if self._split < other._split: 287 return -1 288 else: 289 return 1 290 return 0
291
292 - def _setSplit(self, value):
293 """ 294 Property target used to set the split configuration value. 295 If not C{None}, the value must be a C{SplitConfig} object. 296 @raise ValueError: If the value is not a C{SplitConfig} 297 """ 298 if value is None: 299 self._split = None 300 else: 301 if not isinstance(value, SplitConfig): 302 raise ValueError("Value must be a C{SplitConfig} object.") 303 self._split = value
304
305 - def _getSplit(self):
306 """ 307 Property target used to get the split configuration value. 308 """ 309 return self._split
310 311 split = property(_getSplit, _setSplit, None, "Split configuration in terms of a C{SplitConfig} object.") 312
313 - def validate(self):
314 """ 315 Validates configuration represented by the object. 316 317 Split configuration must be filled in. Within that, both the size limit 318 and split size must be filled in. 319 320 @raise ValueError: If one of the validations fails. 321 """ 322 if self.split is None: 323 raise ValueError("Split section is required.") 324 if self.split.sizeLimit is None: 325 raise ValueError("Size limit must be set.") 326 if self.split.splitSize is None: 327 raise ValueError("Split size must be set.")
328
329 - def addConfig(self, xmlDom, parentNode):
330 """ 331 Adds a <split> configuration section as the next child of a parent. 332 333 Third parties should use this function to write configuration related to 334 this extension. 335 336 We add the following fields to the document:: 337 338 sizeLimit //cb_config/split/size_limit 339 splitSize //cb_config/split/split_size 340 341 @param xmlDom: DOM tree as from C{impl.createDocument()}. 342 @param parentNode: Parent that the section should be appended to. 343 """ 344 if self.split is not None: 345 sectionNode = addContainerNode(xmlDom, parentNode, "split") 346 addByteQuantityNode(xmlDom, sectionNode, "size_limit", self.split.sizeLimit) 347 addByteQuantityNode(xmlDom, sectionNode, "split_size", self.split.splitSize)
348
349 - def _parseXmlData(self, xmlData):
350 """ 351 Internal method to parse an XML string into the object. 352 353 This method parses the XML document into a DOM tree (C{xmlDom}) and then 354 calls a static method to parse the split configuration section. 355 356 @param xmlData: XML data to be parsed 357 @type xmlData: String data 358 359 @raise ValueError: If the XML cannot be successfully parsed. 360 """ 361 (xmlDom, parentNode) = createInputDom(xmlData) 362 self._split = LocalConfig._parseSplit(parentNode)
363
364 - def _parseSplit(parent):
365 """ 366 Parses an split configuration section. 367 368 We read the following individual fields:: 369 370 sizeLimit //cb_config/split/size_limit 371 splitSize //cb_config/split/split_size 372 373 @param parent: Parent node to search beneath. 374 375 @return: C{EncryptConfig} object or C{None} if the section does not exist. 376 @raise ValueError: If some filled-in value is invalid. 377 """ 378 split = None 379 section = readFirstChild(parent, "split") 380 if section is not None: 381 split = SplitConfig() 382 split.sizeLimit = readByteQuantity(section, "size_limit") 383 split.splitSize = readByteQuantity(section, "split_size") 384 return split
385 _parseSplit = staticmethod(_parseSplit)
386 387 388 ######################################################################## 389 # Public functions 390 ######################################################################## 391 392 ########################### 393 # executeAction() function 394 ########################### 395
396 -def executeAction(configPath, options, config):
397 """ 398 Executes the split backup action. 399 400 @param configPath: Path to configuration file on disk. 401 @type configPath: String representing a path on disk. 402 403 @param options: Program command-line options. 404 @type options: Options object. 405 406 @param config: Program configuration. 407 @type config: Config object. 408 409 @raise ValueError: Under many generic error conditions 410 @raise IOError: If there are I/O problems reading or writing files 411 """ 412 logger.debug("Executing split extended action.") 413 if config.options is None or config.stage is None: 414 raise ValueError("Cedar Backup configuration is not properly filled in.") 415 local = LocalConfig(xmlPath=configPath) 416 dailyDirs = findDailyDirs(config.stage.targetDir, SPLIT_INDICATOR) 417 for dailyDir in dailyDirs: 418 _splitDailyDir(dailyDir, local.split.sizeLimit, local.split.splitSize, 419 config.options.backupUser, config.options.backupGroup) 420 writeIndicatorFile(dailyDir, SPLIT_INDICATOR, config.options.backupUser, config.options.backupGroup) 421 logger.info("Executed the split extended action successfully.")
422 423 424 ############################## 425 # _splitDailyDir() function 426 ############################## 427
428 -def _splitDailyDir(dailyDir, sizeLimit, splitSize, backupUser, backupGroup):
429 """ 430 Splits large files in a daily staging directory. 431 432 Files that match INDICATOR_PATTERNS (i.e. C{"cback.store"}, 433 C{"cback.stage"}, etc.) are assumed to be indicator files and are ignored. 434 All other files are split. 435 436 @param dailyDir: Daily directory to encrypt 437 @param sizeLimit: Size limit, in bytes 438 @param splitSize: Split size, in bytes 439 @param backupUser: User that target files should be owned by 440 @param backupGroup: Group that target files should be owned by 441 442 @raise ValueError: If the encrypt mode is not supported. 443 @raise ValueError: If the daily staging directory does not exist. 444 """ 445 logger.debug("Begin splitting contents of [%s]." % dailyDir) 446 fileList = getBackupFiles(dailyDir) # ignores indicator files 447 for path in fileList: 448 size = float(os.stat(path).st_size) 449 if size > sizeLimit.bytes: 450 _splitFile(path, splitSize, backupUser, backupGroup, removeSource=True) 451 logger.debug("Completed splitting contents of [%s]." % dailyDir)
452 453 454 ######################## 455 # _splitFile() function 456 ######################## 457
458 -def _splitFile(sourcePath, splitSize, backupUser, backupGroup, removeSource=False):
459 """ 460 Splits the source file into chunks of the indicated size. 461 462 The split files will be owned by the indicated backup user and group. If 463 C{removeSource} is C{True}, then the source file will be removed after it is 464 successfully split. 465 466 @param sourcePath: Absolute path of the source file to split 467 @param splitSize: Encryption mode (only "gpg" is allowed) 468 @param backupUser: User that target files should be owned by 469 @param backupGroup: Group that target files should be owned by 470 @param removeSource: Indicates whether to remove the source file 471 472 @raise IOError: If there is a problem accessing, splitting or removing the source file. 473 """ 474 cwd = os.getcwd() 475 try: 476 if not os.path.exists(sourcePath): 477 raise ValueError("Source path [%s] does not exist." % sourcePath); 478 dirname = os.path.dirname(sourcePath) 479 filename = os.path.basename(sourcePath) 480 prefix = "%s_" % filename 481 bytes = int(splitSize.bytes) 482 os.chdir(dirname) # need to operate from directory that we want files written to 483 command = resolveCommand(SPLIT_COMMAND) 484 args = [ "--verbose", "--numeric-suffixes", "--suffix-length=5", "--bytes=%d" % bytes, filename, prefix, ] 485 (result, output) = executeCommand(command, args, returnOutput=True, ignoreStderr=False) 486 if result != 0: 487 raise IOError("Error [%d] calling split for [%s]." % (result, sourcePath)) 488 pattern = re.compile(r"(creating file `)(%s)(.*)(')" % prefix) 489 match = pattern.search(output[-1:][0]) 490 if match is None: 491 raise IOError("Unable to parse output from split command.") 492 value = int(match.group(3).strip()) 493 for index in range(0, value): 494 path = "%s%05d" % (prefix, index) 495 if not os.path.exists(path): 496 raise IOError("After call to split, expected file [%s] does not exist." % path) 497 changeOwnership(path, backupUser, backupGroup) 498 if removeSource: 499 if os.path.exists(sourcePath): 500 try: 501 os.remove(sourcePath) 502 logger.debug("Completed removing old file [%s]." % sourcePath) 503 except: 504 raise IOError("Failed to remove file [%s] after splitting it." % (sourcePath)) 505 finally: 506 os.chdir(cwd)
507