# -*- coding: iso-8859-1 -*-
# vim: set ft=python ts=3 sw=3 expandtab:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# C E D A R
# S O L U T I O N S "Software done right."
# S O F T W A R E
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Copyright (c) 2006-2007,2010,2015 Kenneth J. Pronovici.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# Version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Copies of the GNU General Public License are available from
# the Free Software Foundation website, http://www.gnu.org/.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Author : Kenneth J. Pronovici <pronovic@ieee.org>
# Language : Python 3 (>= 3.4)
# Project : Official Cedar Backup Extensions
# Purpose : Provides an extension to back up mbox email files.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
########################################################################
# Module documentation
########################################################################
"""
Provides an extension to back up mbox email files.
Backing up email
================
Email folders (often stored as mbox flatfiles) are not well-suited being backed
up with an incremental backup like the one offered by Cedar Backup. This is
because mbox files often change on a daily basis, forcing the incremental
backup process to back them up every day in order to avoid losing data. This
can result in quite a bit of wasted space when backing up large folders. (Note
that the alternative maildir format does not share this problem, since it
typically uses one file per message.)
One solution to this problem is to design a smarter incremental backup process,
which backs up baseline content on the first day of the week, and then backs up
only new messages added to that folder on every other day of the week. This way,
the backup for any single day is only as large as the messages placed into the
folder on that day. The backup isn't as "perfect" as the incremental backup
process, because it doesn't preserve information about messages deleted from
the backed-up folder. However, it should be much more space-efficient, and
in a recovery situation, it seems better to restore too much data rather
than too little.
What is this extension?
=======================
This is a Cedar Backup extension used to back up mbox email files via the Cedar
Backup command line. Individual mbox files or directories containing mbox
files can be backed up using the same collect modes allowed for filesystems in
the standard Cedar Backup collect action: weekly, daily, incremental. It
implements the "smart" incremental backup process discussed above, using
functionality provided by the ``grepmail`` utility.
This extension requires a new configuration section <mbox> and is intended to
be run either immediately before or immediately after the standard collect
action. Aside from its own configuration, it requires the options and collect
configuration sections in the standard Cedar Backup configuration file.
The mbox action is conceptually similar to the standard collect action,
except that mbox directories are not collected recursively. This implies
some configuration changes (i.e. there's no need for global exclusions or an
ignore file). If you back up a directory, all of the mbox files in that
directory are backed up into a single tar file using the indicated
compression method.
:author: Kenneth J. Pronovici <pronovic@ieee.org>
"""
########################################################################
# Imported modules
########################################################################
# System modules
import os
import logging
import datetime
import pickle
import tempfile
from bz2 import BZ2File
from gzip import GzipFile
from functools import total_ordering
# Cedar Backup modules
from CedarBackup3.filesystem import FilesystemList, BackupFileList
from CedarBackup3.xmlutil import createInputDom, addContainerNode, addStringNode
from CedarBackup3.xmlutil import isElement, readChildren, readFirstChild, readString, readStringList
from CedarBackup3.config import VALID_COLLECT_MODES, VALID_COMPRESS_MODES
from CedarBackup3.util import isStartOfWeek, buildNormalizedPath
from CedarBackup3.util import resolveCommand, executeCommand
from CedarBackup3.util import ObjectTypeList, UnorderedList, RegexList, encodePath, changeOwnership
########################################################################
# Module-wide constants and variables
########################################################################
logger = logging.getLogger("CedarBackup3.log.extend.mbox")
GREPMAIL_COMMAND = [ "grepmail", ]
REVISION_PATH_EXTENSION = "mboxlast"
########################################################################
# MboxFile class definition
########################################################################
@total_ordering
[docs]class MboxFile(object):
"""
Class representing mbox file configuration..
The following restrictions exist on data in this class:
- The absolute path must be absolute.
- The collect mode must be one of the values in :any:`VALID_COLLECT_MODES`.
- The compress mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
"""
[docs] def __init__(self, absolutePath=None, collectMode=None, compressMode=None):
"""
Constructor for the ``MboxFile`` class.
You should never directly instantiate this class.
Args:
absolutePath: Absolute path to an mbox file on disk
collectMode: Overridden collect mode for this directory
compressMode: Overridden compression mode for this directory
"""
self._absolutePath = None
self._collectMode = None
self._compressMode = None
self.absolutePath = absolutePath
self.collectMode = collectMode
self.compressMode = compressMode
def __repr__(self):
"""
Official string representation for class instance.
"""
return "MboxFile(%s, %s, %s)" % (self.absolutePath, self.collectMode, self.compressMode)
def __str__(self):
"""
Informal string representation for class instance.
"""
return self.__repr__()
def __eq__(self, other):
"""Equals operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) == 0
def __lt__(self, other):
"""Less-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) < 0
def __gt__(self, other):
"""Greater-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) > 0
def __cmp__(self, other):
"""
Original Python 2 comparison operator.
Args:
other: Other object to compare to
Returns:
-1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
"""
if other is None:
return 1
if self.absolutePath != other.absolutePath:
if str(self.absolutePath or "") < str(other.absolutePath or ""):
return -1
else:
return 1
if self.collectMode != other.collectMode:
if str(self.collectMode or "") < str(other.collectMode or ""):
return -1
else:
return 1
if self.compressMode != other.compressMode:
if str(self.compressMode or "") < str(other.compressMode or ""):
return -1
else:
return 1
return 0
def _setAbsolutePath(self, value):
"""
Property target used to set the absolute path.
The value must be an absolute path if it is not ``None``.
It does not have to exist on disk at the time of assignment.
Raises:
ValueError: If the value is not an absolute path
ValueError: If the value cannot be encoded properly
"""
if value is not None:
if not os.path.isabs(value):
raise ValueError("Absolute path must be, er, an absolute path.")
self._absolutePath = encodePath(value)
def _getAbsolutePath(self):
"""
Property target used to get the absolute path.
"""
return self._absolutePath
def _setCollectMode(self, value):
"""
Property target used to set the collect mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COLLECT_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COLLECT_MODES:
raise ValueError("Collect mode must be one of %s." % VALID_COLLECT_MODES)
self._collectMode = value
def _getCollectMode(self):
"""
Property target used to get the collect mode.
"""
return self._collectMode
def _setCompressMode(self, value):
"""
Property target used to set the compress mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COMPRESS_MODES:
raise ValueError("Compress mode must be one of %s." % VALID_COMPRESS_MODES)
self._compressMode = value
def _getCompressMode(self):
"""
Property target used to get the compress mode.
"""
return self._compressMode
absolutePath = property(_getAbsolutePath, _setAbsolutePath, None, doc="Absolute path to the mbox file.")
collectMode = property(_getCollectMode, _setCollectMode, None, doc="Overridden collect mode for this mbox file.")
compressMode = property(_getCompressMode, _setCompressMode, None, doc="Overridden compress mode for this mbox file.")
########################################################################
# MboxDir class definition
########################################################################
@total_ordering
[docs]class MboxDir(object):
"""
Class representing mbox directory configuration..
The following restrictions exist on data in this class:
- The absolute path must be absolute.
- The collect mode must be one of the values in :any:`VALID_COLLECT_MODES`.
- The compress mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
Unlike collect directory configuration, this is the only place exclusions
are allowed (no global exclusions at the <mbox> configuration level). Also,
we only allow relative exclusions and there is no configured ignore file.
This is because mbox directory backups are not recursive.
"""
[docs] def __init__(self, absolutePath=None, collectMode=None, compressMode=None,
relativeExcludePaths=None, excludePatterns=None):
"""
Constructor for the ``MboxDir`` class.
You should never directly instantiate this class.
Args:
absolutePath: Absolute path to a mbox file on disk
collectMode: Overridden collect mode for this directory
compressMode: Overridden compression mode for this directory
relativeExcludePaths: List of relative paths to exclude
excludePatterns: List of regular expression patterns to exclude
"""
self._absolutePath = None
self._collectMode = None
self._compressMode = None
self._relativeExcludePaths = None
self._excludePatterns = None
self.absolutePath = absolutePath
self.collectMode = collectMode
self.compressMode = compressMode
self.relativeExcludePaths = relativeExcludePaths
self.excludePatterns = excludePatterns
def __repr__(self):
"""
Official string representation for class instance.
"""
return "MboxDir(%s, %s, %s, %s, %s)" % (self.absolutePath, self.collectMode, self.compressMode,
self.relativeExcludePaths, self.excludePatterns)
def __str__(self):
"""
Informal string representation for class instance.
"""
return self.__repr__()
def __eq__(self, other):
"""Equals operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) == 0
def __lt__(self, other):
"""Less-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) < 0
def __gt__(self, other):
"""Greater-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) > 0
def __cmp__(self, other):
"""
Original Python 2 comparison operator.
Args:
other: Other object to compare to
Returns:
-1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
"""
if other is None:
return 1
if self.absolutePath != other.absolutePath:
if str(self.absolutePath or "") < str(other.absolutePath or ""):
return -1
else:
return 1
if self.collectMode != other.collectMode:
if str(self.collectMode or "") < str(other.collectMode or ""):
return -1
else:
return 1
if self.compressMode != other.compressMode:
if str(self.compressMode or "") < str(other.compressMode or ""):
return -1
else:
return 1
if self.relativeExcludePaths != other.relativeExcludePaths:
if self.relativeExcludePaths < other.relativeExcludePaths:
return -1
else:
return 1
if self.excludePatterns != other.excludePatterns:
if self.excludePatterns < other.excludePatterns:
return -1
else:
return 1
return 0
def _setAbsolutePath(self, value):
"""
Property target used to set the absolute path.
The value must be an absolute path if it is not ``None``.
It does not have to exist on disk at the time of assignment.
Raises:
ValueError: If the value is not an absolute path
ValueError: If the value cannot be encoded properly
"""
if value is not None:
if not os.path.isabs(value):
raise ValueError("Absolute path must be, er, an absolute path.")
self._absolutePath = encodePath(value)
def _getAbsolutePath(self):
"""
Property target used to get the absolute path.
"""
return self._absolutePath
def _setCollectMode(self, value):
"""
Property target used to set the collect mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COLLECT_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COLLECT_MODES:
raise ValueError("Collect mode must be one of %s." % VALID_COLLECT_MODES)
self._collectMode = value
def _getCollectMode(self):
"""
Property target used to get the collect mode.
"""
return self._collectMode
def _setCompressMode(self, value):
"""
Property target used to set the compress mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COMPRESS_MODES:
raise ValueError("Compress mode must be one of %s." % VALID_COMPRESS_MODES)
self._compressMode = value
def _getCompressMode(self):
"""
Property target used to get the compress mode.
"""
return self._compressMode
def _setRelativeExcludePaths(self, value):
"""
Property target used to set the relative exclude paths list.
Elements do not have to exist on disk at the time of assignment.
"""
if value is None:
self._relativeExcludePaths = None
else:
try:
saved = self._relativeExcludePaths
self._relativeExcludePaths = UnorderedList()
self._relativeExcludePaths.extend(value)
except Exception as e:
self._relativeExcludePaths = saved
raise e
def _getRelativeExcludePaths(self):
"""
Property target used to get the relative exclude paths list.
"""
return self._relativeExcludePaths
def _setExcludePatterns(self, value):
"""
Property target used to set the exclude patterns list.
"""
if value is None:
self._excludePatterns = None
else:
try:
saved = self._excludePatterns
self._excludePatterns = RegexList()
self._excludePatterns.extend(value)
except Exception as e:
self._excludePatterns = saved
raise e
def _getExcludePatterns(self):
"""
Property target used to get the exclude patterns list.
"""
return self._excludePatterns
absolutePath = property(_getAbsolutePath, _setAbsolutePath, None, doc="Absolute path to the mbox directory.")
collectMode = property(_getCollectMode, _setCollectMode, None, doc="Overridden collect mode for this mbox directory.")
compressMode = property(_getCompressMode, _setCompressMode, None, doc="Overridden compress mode for this mbox directory.")
relativeExcludePaths = property(_getRelativeExcludePaths, _setRelativeExcludePaths, None, "List of relative paths to exclude.")
excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, "List of regular expression patterns to exclude.")
########################################################################
# MboxConfig class definition
########################################################################
@total_ordering
[docs]class MboxConfig(object):
"""
Class representing mbox configuration.
Mbox configuration is used for backing up mbox email files.
The following restrictions exist on data in this class:
- The collect mode must be one of the values in :any:`VALID_COLLECT_MODES`.
- The compress mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
- The ``mboxFiles`` list must be a list of ``MboxFile`` objects
- The ``mboxDirs`` list must be a list of ``MboxDir`` objects
For the ``mboxFiles`` and ``mboxDirs`` lists, validation is accomplished
through the :any:`util.ObjectTypeList` list implementation that overrides common
list methods and transparently ensures that each element is of the proper
type.
Unlike collect configuration, no global exclusions are allowed on this
level. We only allow relative exclusions at the mbox directory level.
Also, there is no configured ignore file. This is because mbox directory
backups are not recursive.
*Note:* Lists within this class are "unordered" for equality comparisons.
"""
[docs] def __init__(self, collectMode=None, compressMode=None, mboxFiles=None, mboxDirs=None):
"""
Constructor for the ``MboxConfig`` class.
Args:
collectMode: Default collect mode
compressMode: Default compress mode
mboxFiles: List of mbox files to back up
mboxDirs: List of mbox directories to back up
Raises:
ValueError: If one of the values is invalid
"""
self._collectMode = None
self._compressMode = None
self._mboxFiles = None
self._mboxDirs = None
self.collectMode = collectMode
self.compressMode = compressMode
self.mboxFiles = mboxFiles
self.mboxDirs = mboxDirs
def __repr__(self):
"""
Official string representation for class instance.
"""
return "MboxConfig(%s, %s, %s, %s)" % (self.collectMode, self.compressMode, self.mboxFiles, self.mboxDirs)
def __str__(self):
"""
Informal string representation for class instance.
"""
return self.__repr__()
def __eq__(self, other):
"""Equals operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) == 0
def __lt__(self, other):
"""Less-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) < 0
def __gt__(self, other):
"""Greater-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) > 0
def __cmp__(self, other):
"""
Original Python 2 comparison operator.
Lists within this class are "unordered" for equality comparisons.
Args:
other: Other object to compare to
Returns:
-1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
"""
if other is None:
return 1
if self.collectMode != other.collectMode:
if str(self.collectMode or "") < str(other.collectMode or ""):
return -1
else:
return 1
if self.compressMode != other.compressMode:
if str(self.compressMode or "") < str(other.compressMode or ""):
return -1
else:
return 1
if self.mboxFiles != other.mboxFiles:
if self.mboxFiles < other.mboxFiles:
return -1
else:
return 1
if self.mboxDirs != other.mboxDirs:
if self.mboxDirs < other.mboxDirs:
return -1
else:
return 1
return 0
def _setCollectMode(self, value):
"""
Property target used to set the collect mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COLLECT_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COLLECT_MODES:
raise ValueError("Collect mode must be one of %s." % VALID_COLLECT_MODES)
self._collectMode = value
def _getCollectMode(self):
"""
Property target used to get the collect mode.
"""
return self._collectMode
def _setCompressMode(self, value):
"""
Property target used to set the compress mode.
If not ``None``, the mode must be one of the values in :any:`VALID_COMPRESS_MODES`.
Raises:
ValueError: If the value is not valid
"""
if value is not None:
if value not in VALID_COMPRESS_MODES:
raise ValueError("Compress mode must be one of %s." % VALID_COMPRESS_MODES)
self._compressMode = value
def _getCompressMode(self):
"""
Property target used to get the compress mode.
"""
return self._compressMode
def _setMboxFiles(self, value):
"""
Property target used to set the mboxFiles list.
Either the value must be ``None`` or each element must be an ``MboxFile``.
Raises:
ValueError: If the value is not an ``MboxFile``
"""
if value is None:
self._mboxFiles = None
else:
try:
saved = self._mboxFiles
self._mboxFiles = ObjectTypeList(MboxFile, "MboxFile")
self._mboxFiles.extend(value)
except Exception as e:
self._mboxFiles = saved
raise e
def _getMboxFiles(self):
"""
Property target used to get the mboxFiles list.
"""
return self._mboxFiles
def _setMboxDirs(self, value):
"""
Property target used to set the mboxDirs list.
Either the value must be ``None`` or each element must be an ``MboxDir``.
Raises:
ValueError: If the value is not an ``MboxDir``
"""
if value is None:
self._mboxDirs = None
else:
try:
saved = self._mboxDirs
self._mboxDirs = ObjectTypeList(MboxDir, "MboxDir")
self._mboxDirs.extend(value)
except Exception as e:
self._mboxDirs = saved
raise e
def _getMboxDirs(self):
"""
Property target used to get the mboxDirs list.
"""
return self._mboxDirs
collectMode = property(_getCollectMode, _setCollectMode, None, doc="Default collect mode.")
compressMode = property(_getCompressMode, _setCompressMode, None, doc="Default compress mode.")
mboxFiles = property(_getMboxFiles, _setMboxFiles, None, doc="List of mbox files to back up.")
mboxDirs = property(_getMboxDirs, _setMboxDirs, None, doc="List of mbox directories to back up.")
########################################################################
# LocalConfig class definition
########################################################################
@total_ordering
[docs]class LocalConfig(object):
"""
Class representing this extension's configuration document.
This is not a general-purpose configuration object like the main Cedar
Backup configuration object. Instead, it just knows how to parse and emit
Mbox-specific configuration values. Third parties who need to read and
write configuration related to this extension should access it through the
constructor, ``validate`` and ``addConfig`` methods.
*Note:* Lists within this class are "unordered" for equality comparisons.
"""
[docs] def __init__(self, xmlData=None, xmlPath=None, validate=True):
"""
Initializes a configuration object.
If you initialize the object without passing either ``xmlData`` or
``xmlPath`` then configuration will be empty and will be invalid until it
is filled in properly.
No reference to the original XML data or original path is saved off by
this class. Once the data has been parsed (successfully or not) this
original information is discarded.
Unless the ``validate`` argument is ``False``, the :any:`LocalConfig.validate`
method will be called (with its default arguments) against configuration
after successfully parsing any passed-in XML. Keep in mind that even if
``validate`` is ``False``, it might not be possible to parse the passed-in
XML document if lower-level validations fail.
*Note:* It is strongly suggested that the ``validate`` option always be set
to ``True`` (the default) unless there is a specific need to read in
invalid configuration from disk.
Args:
xmlData (String data): XML data representing configuration
xmlPath (Absolute path to a file on disk): Path to an XML file on disk
validate (Boolean true/false): Validate the document after parsing it
Raises:
ValueError: If both ``xmlData`` and ``xmlPath`` are passed-in
ValueError: If the XML data in ``xmlData`` or ``xmlPath`` cannot be parsed
ValueError: If the parsed configuration document is not valid
"""
self._mbox = None
self.mbox = None
if xmlData is not None and xmlPath is not None:
raise ValueError("Use either xmlData or xmlPath, but not both.")
if xmlData is not None:
self._parseXmlData(xmlData)
if validate:
self.validate()
elif xmlPath is not None:
with open(xmlPath) as f:
xmlData = f.read()
self._parseXmlData(xmlData)
if validate:
self.validate()
def __repr__(self):
"""
Official string representation for class instance.
"""
return "LocalConfig(%s)" % (self.mbox)
def __str__(self):
"""
Informal string representation for class instance.
"""
return self.__repr__()
def __eq__(self, other):
"""Equals operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) == 0
def __lt__(self, other):
"""Less-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) < 0
def __gt__(self, other):
"""Greater-than operator, iplemented in terms of original Python 2 compare operator."""
return self.__cmp__(other) > 0
def __cmp__(self, other):
"""
Original Python 2 comparison operator.
Lists within this class are "unordered" for equality comparisons.
Args:
other: Other object to compare to
Returns:
-1/0/1 depending on whether self is ``<``, ``=`` or ``>`` other
"""
if other is None:
return 1
if self.mbox != other.mbox:
if self.mbox < other.mbox:
return -1
else:
return 1
return 0
def _setMbox(self, value):
"""
Property target used to set the mbox configuration value.
If not ``None``, the value must be a ``MboxConfig`` object.
Raises:
ValueError: If the value is not a ``MboxConfig``
"""
if value is None:
self._mbox = None
else:
if not isinstance(value, MboxConfig):
raise ValueError("Value must be a ``MboxConfig`` object.")
self._mbox = value
def _getMbox(self):
"""
Property target used to get the mbox configuration value.
"""
return self._mbox
mbox = property(_getMbox, _setMbox, None, "Mbox configuration in terms of a ``MboxConfig`` object.")
[docs] def validate(self):
"""
Validates configuration represented by the object.
Mbox configuration must be filled in. Within that, the collect mode and
compress mode are both optional, but the list of repositories must
contain at least one entry.
Each configured file or directory must contain an absolute path, and then
must be either able to take collect mode and compress mode configuration
from the parent ``MboxConfig`` object, or must set each value on its own.
Raises:
ValueError: If one of the validations fails
"""
if self.mbox is None:
raise ValueError("Mbox section is required.")
if (self.mbox.mboxFiles is None or len(self.mbox.mboxFiles) < 1) and \
(self.mbox.mboxDirs is None or len(self.mbox.mboxDirs) < 1):
raise ValueError("At least one mbox file or directory must be configured.")
if self.mbox.mboxFiles is not None:
for mboxFile in self.mbox.mboxFiles:
if mboxFile.absolutePath is None:
raise ValueError("Each mbox file must set an absolute path.")
if self.mbox.collectMode is None and mboxFile.collectMode is None:
raise ValueError("Collect mode must either be set in parent mbox section or individual mbox file.")
if self.mbox.compressMode is None and mboxFile.compressMode is None:
raise ValueError("Compress mode must either be set in parent mbox section or individual mbox file.")
if self.mbox.mboxDirs is not None:
for mboxDir in self.mbox.mboxDirs:
if mboxDir.absolutePath is None:
raise ValueError("Each mbox directory must set an absolute path.")
if self.mbox.collectMode is None and mboxDir.collectMode is None:
raise ValueError("Collect mode must either be set in parent mbox section or individual mbox directory.")
if self.mbox.compressMode is None and mboxDir.compressMode is None:
raise ValueError("Compress mode must either be set in parent mbox section or individual mbox directory.")
[docs] def addConfig(self, xmlDom, parentNode):
"""
Adds an <mbox> configuration section as the next child of a parent.
Third parties should use this function to write configuration related to
this extension.
We add the following fields to the document::
collectMode //cb_config/mbox/collectMode
compressMode //cb_config/mbox/compressMode
We also add groups of the following items, one list element per
item::
mboxFiles //cb_config/mbox/file
mboxDirs //cb_config/mbox/dir
The mbox files and mbox directories are added by ``_addMboxFile`` and
``_addMboxDir``.
Args:
xmlDom: DOM tree as from ``impl.createDocument()``
parentNode: Parent that the section should be appended to
"""
if self.mbox is not None:
sectionNode = addContainerNode(xmlDom, parentNode, "mbox")
addStringNode(xmlDom, sectionNode, "collect_mode", self.mbox.collectMode)
addStringNode(xmlDom, sectionNode, "compress_mode", self.mbox.compressMode)
if self.mbox.mboxFiles is not None:
for mboxFile in self.mbox.mboxFiles:
LocalConfig._addMboxFile(xmlDom, sectionNode, mboxFile)
if self.mbox.mboxDirs is not None:
for mboxDir in self.mbox.mboxDirs:
LocalConfig._addMboxDir(xmlDom, sectionNode, mboxDir)
def _parseXmlData(self, xmlData):
"""
Internal method to parse an XML string into the object.
This method parses the XML document into a DOM tree (``xmlDom``) and then
calls a static method to parse the mbox configuration section.
Args:
xmlData (String data): XML data to be parsed
Raises:
ValueError: If the XML cannot be successfully parsed
"""
(xmlDom, parentNode) = createInputDom(xmlData)
self._mbox = LocalConfig._parseMbox(parentNode)
@staticmethod
def _parseMbox(parent):
"""
Parses an mbox configuration section.
We read the following individual fields::
collectMode //cb_config/mbox/collect_mode
compressMode //cb_config/mbox/compress_mode
We also read groups of the following item, one list element per
item::
mboxFiles //cb_config/mbox/file
mboxDirs //cb_config/mbox/dir
The mbox files are parsed by :any:`_parseMboxFiles` and the mbox
directories are parsed by :any:`_parseMboxDirs`.
Args:
parent: Parent node to search beneath
Returns:
``MboxConfig`` object or ``None`` if the section does not exist
Raises:
ValueError: If some filled-in value is invalid
"""
mbox = None
section = readFirstChild(parent, "mbox")
if section is not None:
mbox = MboxConfig()
mbox.collectMode = readString(section, "collect_mode")
mbox.compressMode = readString(section, "compress_mode")
mbox.mboxFiles = LocalConfig._parseMboxFiles(section)
mbox.mboxDirs = LocalConfig._parseMboxDirs(section)
return mbox
@staticmethod
def _parseMboxFiles(parent):
"""
Reads a list of ``MboxFile`` objects from immediately beneath the parent.
We read the following individual fields::
absolutePath abs_path
collectMode collect_mode
compressMode compess_mode
Args:
parent: Parent node to search beneath
Returns:
List of ``MboxFile`` objects or ``None`` if none are found
Raises:
ValueError: If some filled-in value is invalid
"""
lst = []
for entry in readChildren(parent, "file"):
if isElement(entry):
mboxFile = MboxFile()
mboxFile.absolutePath = readString(entry, "abs_path")
mboxFile.collectMode = readString(entry, "collect_mode")
mboxFile.compressMode = readString(entry, "compress_mode")
lst.append(mboxFile)
if lst == []:
lst = None
return lst
@staticmethod
def _parseMboxDirs(parent):
"""
Reads a list of ``MboxDir`` objects from immediately beneath the parent.
We read the following individual fields::
absolutePath abs_path
collectMode collect_mode
compressMode compess_mode
We also read groups of the following items, one list element per
item::
relativeExcludePaths exclude/rel_path
excludePatterns exclude/pattern
The exclusions are parsed by :any:`_parseExclusions`.
Args:
parent: Parent node to search beneath
Returns:
List of ``MboxDir`` objects or ``None`` if none are found
Raises:
ValueError: If some filled-in value is invalid
"""
lst = []
for entry in readChildren(parent, "dir"):
if isElement(entry):
mboxDir = MboxDir()
mboxDir.absolutePath = readString(entry, "abs_path")
mboxDir.collectMode = readString(entry, "collect_mode")
mboxDir.compressMode = readString(entry, "compress_mode")
(mboxDir.relativeExcludePaths, mboxDir.excludePatterns) = LocalConfig._parseExclusions(entry)
lst.append(mboxDir)
if lst == []:
lst = None
return lst
@staticmethod
def _parseExclusions(parentNode):
"""
Reads exclusions data from immediately beneath the parent.
We read groups of the following items, one list element per item::
relative exclude/rel_path
patterns exclude/pattern
If there are none of some pattern (i.e. no relative path items) then
``None`` will be returned for that item in the tuple.
Args:
parentNode: Parent node to search beneath
Returns:
Tuple of (relative, patterns) exclusions
"""
section = readFirstChild(parentNode, "exclude")
if section is None:
return (None, None)
else:
relative = readStringList(section, "rel_path")
patterns = readStringList(section, "pattern")
return (relative, patterns)
@staticmethod
def _addMboxFile(xmlDom, parentNode, mboxFile):
"""
Adds an mbox file container as the next child of a parent.
We add the following fields to the document::
absolutePath file/abs_path
collectMode file/collect_mode
compressMode file/compress_mode
The <file> node itself is created as the next child of the parent node.
This method only adds one mbox file node. The parent must loop for each
mbox file in the ``MboxConfig`` object.
If ``mboxFile`` is ``None``, this method call will be a no-op.
Args:
xmlDom: DOM tree as from ``impl.createDocument()``
parentNode: Parent that the section should be appended to
mboxFile: MboxFile to be added to the document
"""
if mboxFile is not None:
sectionNode = addContainerNode(xmlDom, parentNode, "file")
addStringNode(xmlDom, sectionNode, "abs_path", mboxFile.absolutePath)
addStringNode(xmlDom, sectionNode, "collect_mode", mboxFile.collectMode)
addStringNode(xmlDom, sectionNode, "compress_mode", mboxFile.compressMode)
@staticmethod
def _addMboxDir(xmlDom, parentNode, mboxDir):
"""
Adds an mbox directory container as the next child of a parent.
We add the following fields to the document::
absolutePath dir/abs_path
collectMode dir/collect_mode
compressMode dir/compress_mode
We also add groups of the following items, one list element per item::
relativeExcludePaths dir/exclude/rel_path
excludePatterns dir/exclude/pattern
The <dir> node itself is created as the next child of the parent node.
This method only adds one mbox directory node. The parent must loop for
each mbox directory in the ``MboxConfig`` object.
If ``mboxDir`` is ``None``, this method call will be a no-op.
Args:
xmlDom: DOM tree as from ``impl.createDocument()``
parentNode: Parent that the section should be appended to
mboxDir: MboxDir to be added to the document
"""
if mboxDir is not None:
sectionNode = addContainerNode(xmlDom, parentNode, "dir")
addStringNode(xmlDom, sectionNode, "abs_path", mboxDir.absolutePath)
addStringNode(xmlDom, sectionNode, "collect_mode", mboxDir.collectMode)
addStringNode(xmlDom, sectionNode, "compress_mode", mboxDir.compressMode)
if ((mboxDir.relativeExcludePaths is not None and mboxDir.relativeExcludePaths != []) or
(mboxDir.excludePatterns is not None and mboxDir.excludePatterns != [])):
excludeNode = addContainerNode(xmlDom, sectionNode, "exclude")
if mboxDir.relativeExcludePaths is not None:
for relativePath in mboxDir.relativeExcludePaths:
addStringNode(xmlDom, excludeNode, "rel_path", relativePath)
if mboxDir.excludePatterns is not None:
for pattern in mboxDir.excludePatterns:
addStringNode(xmlDom, excludeNode, "pattern", pattern)
########################################################################
# Public functions
########################################################################
###########################
# executeAction() function
###########################
[docs]def executeAction(configPath, options, config):
"""
Executes the mbox backup action.
Args:
configPath (String representing a path on disk): Path to configuration file on disk
options (Options object): Program command-line options
config (Config object): Program configuration
Raises:
ValueError: Under many generic error conditions
IOError: If a backup could not be written for some reason
"""
logger.debug("Executing mbox extended action.")
newRevision = datetime.datetime.today() # mark here so all actions are after this date/time
if config.options is None or config.collect is None:
raise ValueError("Cedar Backup configuration is not properly filled in.")
local = LocalConfig(xmlPath=configPath)
todayIsStart = isStartOfWeek(config.options.startingDay)
fullBackup = options.full or todayIsStart
logger.debug("Full backup flag is [%s]", fullBackup)
if local.mbox.mboxFiles is not None:
for mboxFile in local.mbox.mboxFiles:
logger.debug("Working with mbox file [%s]", mboxFile.absolutePath)
collectMode = _getCollectMode(local, mboxFile)
compressMode = _getCompressMode(local, mboxFile)
lastRevision = _loadLastRevision(config, mboxFile, fullBackup, collectMode)
if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
logger.debug("Mbox file meets criteria to be backed up today.")
_backupMboxFile(config, mboxFile.absolutePath, fullBackup,
collectMode, compressMode, lastRevision, newRevision)
else:
logger.debug("Mbox file will not be backed up, per collect mode.")
if collectMode == 'incr':
_writeNewRevision(config, mboxFile, newRevision)
if local.mbox.mboxDirs is not None:
for mboxDir in local.mbox.mboxDirs:
logger.debug("Working with mbox directory [%s]", mboxDir.absolutePath)
collectMode = _getCollectMode(local, mboxDir)
compressMode = _getCompressMode(local, mboxDir)
lastRevision = _loadLastRevision(config, mboxDir, fullBackup, collectMode)
(excludePaths, excludePatterns) = _getExclusions(mboxDir)
if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
logger.debug("Mbox directory meets criteria to be backed up today.")
_backupMboxDir(config, mboxDir.absolutePath,
fullBackup, collectMode, compressMode,
lastRevision, newRevision,
excludePaths, excludePatterns)
else:
logger.debug("Mbox directory will not be backed up, per collect mode.")
if collectMode == 'incr':
_writeNewRevision(config, mboxDir, newRevision)
logger.info("Executed the mbox extended action successfully.")
def _getCollectMode(local, item):
"""
Gets the collect mode that should be used for an mbox file or directory.
Use file- or directory-specific value if possible, otherwise take from mbox section.
Args:
local: LocalConfig object
item: Mbox file or directory
Returns:
Collect mode to use
"""
if item.collectMode is None:
collectMode = local.mbox.collectMode
else:
collectMode = item.collectMode
logger.debug("Collect mode is [%s]", collectMode)
return collectMode
def _getCompressMode(local, item):
"""
Gets the compress mode that should be used for an mbox file or directory.
Use file- or directory-specific value if possible, otherwise take from mbox section.
Args:
local: LocalConfig object
item: Mbox file or directory
Returns:
Compress mode to use
"""
if item.compressMode is None:
compressMode = local.mbox.compressMode
else:
compressMode = item.compressMode
logger.debug("Compress mode is [%s]", compressMode)
return compressMode
def _getRevisionPath(config, item):
"""
Gets the path to the revision file associated with a repository.
Args:
config: Cedar Backup configuration
item: Mbox file or directory
Returns:
Absolute path to the revision file associated with the repository
"""
normalized = buildNormalizedPath(item.absolutePath)
filename = "%s.%s" % (normalized, REVISION_PATH_EXTENSION)
revisionPath = os.path.join(config.options.workingDir, filename)
logger.debug("Revision file path is [%s]", revisionPath)
return revisionPath
def _loadLastRevision(config, item, fullBackup, collectMode):
"""
Loads the last revision date for this item from disk and returns it.
If this is a full backup, or if the revision file cannot be loaded for some
reason, then ``None`` is returned. This indicates that there is no previous
revision, so the entire mail file or directory should be backed up.
*Note:* We write the actual revision object to disk via pickle, so we don't
deal with the datetime precision or format at all. Whatever's in the object
is what we write.
Args:
config: Cedar Backup configuration
item: Mbox file or directory
fullBackup: Indicates whether this is a full backup
collectMode: Indicates the collect mode for this item
Returns:
Revision date as a datetime.datetime object or ``None``
"""
revisionPath = _getRevisionPath(config, item)
if fullBackup:
revisionDate = None
logger.debug("Revision file ignored because this is a full backup.")
elif collectMode in ['weekly', 'daily']:
revisionDate = None
logger.debug("No revision file based on collect mode [%s].", collectMode)
else:
logger.debug("Revision file will be used for non-full incremental backup.")
if not os.path.isfile(revisionPath):
revisionDate = None
logger.debug("Revision file [%s] does not exist on disk.", revisionPath)
else:
try:
with open(revisionPath, "rb") as f:
revisionDate = pickle.load(f, fix_imports=True) # be compatible with Python 2
logger.debug("Loaded revision file [%s] from disk: [%s]", revisionPath, revisionDate)
except Exception as e:
revisionDate = None
logger.error("Failed loading revision file [%s] from disk: %s", revisionPath, e)
return revisionDate
def _writeNewRevision(config, item, newRevision):
"""
Writes new revision information to disk.
If we can't write the revision file successfully for any reason, we'll log
the condition but won't throw an exception.
*Note:* We write the actual revision object to disk via pickle, so we don't
deal with the datetime precision or format at all. Whatever's in the object
is what we write.
Args:
config: Cedar Backup configuration
item: Mbox file or directory
newRevision: Revision date as a datetime.datetime object
"""
revisionPath = _getRevisionPath(config, item)
try:
with open(revisionPath, "wb") as f:
pickle.dump(newRevision, f, 0, fix_imports=True) # be compatible with Python 2
changeOwnership(revisionPath, config.options.backupUser, config.options.backupGroup)
logger.debug("Wrote new revision file [%s] to disk: [%s]", revisionPath, newRevision)
except Exception as e:
logger.error("Failed to write revision file [%s] to disk: %s", revisionPath, e)
def _getExclusions(mboxDir):
"""
Gets exclusions (file and patterns) associated with an mbox directory.
The returned files value is a list of absolute paths to be excluded from the
backup for a given directory. It is derived from the mbox directory's
relative exclude paths.
The returned patterns value is a list of patterns to be excluded from the
backup for a given directory. It is derived from the mbox directory's list
of patterns.
Args:
mboxDir: Mbox directory object
Returns:
Tuple (files, patterns) indicating what to exclude
"""
paths = []
if mboxDir.relativeExcludePaths is not None:
for relativePath in mboxDir.relativeExcludePaths:
paths.append(os.path.join(mboxDir.absolutePath, relativePath))
patterns = []
if mboxDir.excludePatterns is not None:
patterns.extend(mboxDir.excludePatterns)
logger.debug("Exclude paths: %s", paths)
logger.debug("Exclude patterns: %s", patterns)
return(paths, patterns)
def _getBackupPath(config, mboxPath, compressMode, newRevision, targetDir=None):
"""
Gets the backup file path (including correct extension) associated with an mbox path.
We assume that if the target directory is passed in, that we're backing up a
directory. Under these circumstances, we'll just use the basename of the
individual path as the output file.
*Note:* The backup path only contains the current date in YYYYMMDD format,
but that's OK because the index information (stored elsewhere) is the actual
date object.
Args:
config: Cedar Backup configuration
mboxPath: Path to the indicated mbox file or directory
compressMode: Compress mode to use for this mbox path
newRevision: Revision this backup path represents
targetDir: Target directory in which the path should exist
Returns:
Absolute path to the backup file associated with the repository
"""
if targetDir is None:
normalizedPath = buildNormalizedPath(mboxPath)
revisionDate = newRevision.strftime("%Y%m%d")
filename = "mbox-%s-%s" % (revisionDate, normalizedPath)
else:
filename = os.path.basename(mboxPath)
if compressMode == 'gzip':
filename = "%s.gz" % filename
elif compressMode == 'bzip2':
filename = "%s.bz2" % filename
if targetDir is None:
backupPath = os.path.join(config.collect.targetDir, filename)
else:
backupPath = os.path.join(targetDir, filename)
logger.debug("Backup file path is [%s]", backupPath)
return backupPath
def _getTarfilePath(config, mboxPath, compressMode, newRevision):
"""
Gets the tarfile backup file path (including correct extension) associated
with an mbox path.
Along with the path, the tar archive mode is returned in a form that can
be used with :any:`BackupFileList.generateTarfile`.
*Note:* The tarfile path only contains the current date in YYYYMMDD format,
but that's OK because the index information (stored elsewhere) is the actual
date object.
Args:
config: Cedar Backup configuration
mboxPath: Path to the indicated mbox file or directory
compressMode: Compress mode to use for this mbox path
newRevision: Revision this backup path represents
Returns:
Tuple of (absolute path to tarfile, tar archive mode)
"""
normalizedPath = buildNormalizedPath(mboxPath)
revisionDate = newRevision.strftime("%Y%m%d")
filename = "mbox-%s-%s.tar" % (revisionDate, normalizedPath)
if compressMode == 'gzip':
filename = "%s.gz" % filename
archiveMode = "targz"
elif compressMode == 'bzip2':
filename = "%s.bz2" % filename
archiveMode = "tarbz2"
else:
archiveMode = "tar"
tarfilePath = os.path.join(config.collect.targetDir, filename)
logger.debug("Tarfile path is [%s]", tarfilePath)
return (tarfilePath, archiveMode)
def _getOutputFile(backupPath, compressMode):
"""
Opens the output file used for saving backup information.
If the compress mode is "gzip", we'll open a ``GzipFile``, and if the
compress mode is "bzip2", we'll open a ``BZ2File``. Otherwise, we'll just
return an object from the normal ``open()`` method.
Args:
backupPath: Path to file to open
compressMode: Compress mode of file ("none", "gzip", "bzip")
Returns:
Output file object, opened in binary mode for use with executeCommand()
"""
if compressMode == "gzip":
return GzipFile(backupPath, "wb")
elif compressMode == "bzip2":
return BZ2File(backupPath, "wb")
else:
return open(backupPath, "wb")
def _backupMboxFile(config, absolutePath,
fullBackup, collectMode, compressMode,
lastRevision, newRevision, targetDir=None):
"""
Backs up an individual mbox file.
Args:
config: Cedar Backup configuration
absolutePath: Path to mbox file to back up
fullBackup: Indicates whether this should be a full backup
collectMode: Indicates the collect mode for this item
compressMode: Compress mode of file ("none", "gzip", "bzip")
lastRevision: Date of last backup as datetime.datetime
newRevision: Date of new (current) backup as datetime.datetime
targetDir: Target directory to write the backed-up file into
Raises:
ValueError: If some value is missing or invalid
IOError: If there is a problem backing up the mbox file
"""
if fullBackup or collectMode != "incr" or lastRevision is None:
args = [ "-a", "-u", absolutePath, ] # remove duplicates but fetch entire mailbox
else:
revisionDate = lastRevision.strftime("%Y-%m-%dT%H:%M:%S") # ISO-8601 format; grepmail calls Date::Parse::str2time()
args = [ "-a", "-u", "-d", "since %s" % revisionDate, absolutePath, ]
command = resolveCommand(GREPMAIL_COMMAND)
backupPath = _getBackupPath(config, absolutePath, compressMode, newRevision, targetDir=targetDir)
with _getOutputFile(backupPath, compressMode) as outputFile:
result = executeCommand(command, args, returnOutput=False, ignoreStderr=True, doNotLog=True, outputFile=outputFile)[0]
if result != 0:
raise IOError("Error [%d] executing grepmail on [%s]." % (result, absolutePath))
logger.debug("Completed backing up mailbox [%s].", absolutePath)
return backupPath
def _backupMboxDir(config, absolutePath,
fullBackup, collectMode, compressMode,
lastRevision, newRevision,
excludePaths, excludePatterns):
"""
Backs up a directory containing mbox files.
Args:
config: Cedar Backup configuration
absolutePath: Path to mbox directory to back up
fullBackup: Indicates whether this should be a full backup
collectMode: Indicates the collect mode for this item
compressMode: Compress mode of file ("none", "gzip", "bzip")
lastRevision: Date of last backup as datetime.datetime
newRevision: Date of new (current) backup as datetime.datetime
excludePaths: List of absolute paths to exclude
excludePatterns: List of patterns to exclude
Raises:
ValueError: If some value is missing or invalid
IOError: If there is a problem backing up the mbox file
"""
try:
tmpdir = tempfile.mkdtemp(dir=config.options.workingDir)
mboxList = FilesystemList()
mboxList.excludeDirs = True
mboxList.excludePaths = excludePaths
mboxList.excludePatterns = excludePatterns
mboxList.addDirContents(absolutePath, recursive=False)
tarList = BackupFileList()
for item in mboxList:
backupPath = _backupMboxFile(config, item, fullBackup,
collectMode, "none", # no need to compress inside compressed tar
lastRevision, newRevision,
targetDir=tmpdir)
tarList.addFile(backupPath)
(tarfilePath, archiveMode) = _getTarfilePath(config, absolutePath, compressMode, newRevision)
tarList.generateTarfile(tarfilePath, archiveMode, ignore=True, flat=True)
changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
logger.debug("Completed backing up directory [%s].", absolutePath)
finally:
try:
for cleanitem in tarList:
if os.path.exists(cleanitem):
try:
os.remove(cleanitem)
except: pass
except: pass
try:
os.rmdir(tmpdir)
except: pass