Viewing file: __init__.py (14.24 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
""" magic is a wrapper around the libmagic file identification library.
See README for more information.
Usage:
>>> import magic >>> magic.from_file("testdata/test.pdf") 'PDF document, version 1.2' >>> magic.from_file("testdata/test.pdf", mime=True) 'application/pdf' >>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 'PDF document, version 1.2' >>>
"""
import sys import glob import ctypes import ctypes.util import threading import logging
from ctypes import c_char_p, c_int, c_size_t, c_void_p, byref, POINTER
# avoid shadowing the real open with the version from compat.py _real_open = open
class MagicException(Exception): def __init__(self, message): super(Exception, self).__init__(message) self.message = message
class Magic: """ Magic is a wrapper around the libmagic C library. """
def __init__(self, mime=False, magic_file=None, mime_encoding=False, keep_going=False, uncompress=False, raw=False, extension=False): """ Create a new libmagic wrapper.
mime - if True, mimetypes are returned instead of textual descriptions mime_encoding - if True, codec is returned magic_file - use a mime database other than the system default keep_going - don't stop at the first match, keep going uncompress - Try to look inside compressed files. raw - Do not try to decode "non-printable" chars. extension - Print a slash-separated list of valid extensions for the file type found. """ self.flags = MAGIC_NONE if mime: self.flags |= MAGIC_MIME_TYPE if mime_encoding: self.flags |= MAGIC_MIME_ENCODING if keep_going: self.flags |= MAGIC_CONTINUE if uncompress: self.flags |= MAGIC_COMPRESS if raw: self.flags |= MAGIC_RAW if extension: self.flags |= MAGIC_EXTENSION
self.cookie = magic_open(self.flags) self.lock = threading.Lock()
magic_load(self.cookie, magic_file)
# MAGIC_EXTENSION was added in 523 or 524, so bail if # it doesn't appear to be available if extension and (not _has_version or version() < 524): raise NotImplementedError('MAGIC_EXTENSION is not supported in this version of libmagic')
# For https://github.com/ahupp/python-magic/issues/190 # libmagic has fixed internal limits that some files exceed, causing # an error. We can avoid this (at least for the sample file given) # by bumping the limit up. It's not clear if this is a general solution # or whether other internal limits should be increased, but given # the lack of other reports I'll assume this is rare. if _has_param: try: self.setparam(MAGIC_PARAM_NAME_MAX, 64) except MagicException as e: # some versions of libmagic fail this call, # so rather than fail hard just use default behavior pass
def from_buffer(self, buf): """ Identify the contents of `buf` """ with self.lock: try: # if we're on python3, convert buf to bytes # otherwise this string is passed as wchar* # which is not what libmagic expects if type(buf) == str and str != bytes: buf = buf.encode('utf-8', errors='replace') return maybe_decode(magic_buffer(self.cookie, buf)) except MagicException as e: return self._handle509Bug(e)
def from_file(self, filename): # raise FileNotFoundException or IOError if the file does not exist with _real_open(filename): pass
with self.lock: try: return maybe_decode(magic_file(self.cookie, filename)) except MagicException as e: return self._handle509Bug(e)
def from_descriptor(self, fd): with self.lock: try: return maybe_decode(magic_descriptor(self.cookie, fd)) except MagicException as e: return self._handle509Bug(e)
def _handle509Bug(self, e): # libmagic 5.09 has a bug where it might fail to identify the # mimetype of a file and returns null from magic_file (and # likely _buffer), but also does not return an error message. if e.message is None and (self.flags & MAGIC_MIME_TYPE): return "application/octet-stream" else: raise e
def setparam(self, param, val): return magic_setparam(self.cookie, param, val)
def getparam(self, param): return magic_getparam(self.cookie, param)
def __del__(self): # no _thread_check here because there can be no other # references to this object at this point.
# during shutdown magic_close may have been cleared already so # make sure it exists before using it.
# the self.cookie check should be unnecessary and was an # incorrect fix for a threading problem, however I'm leaving # it in because it's harmless and I'm slightly afraid to # remove it. if hasattr(self, 'cookie') and self.cookie and magic_close: magic_close(self.cookie) self.cookie = None
_instances = {}
def _get_magic_type(mime): i = _instances.get(mime) if i is None: i = _instances[mime] = Magic(mime=mime) return i
def from_file(filename, mime=False): """" Accepts a filename and returns the detected filetype. Return value is the mimetype if mime=True, otherwise a human readable name.
>>> magic.from_file("testdata/test.pdf", mime=True) 'application/pdf' """ m = _get_magic_type(mime) return m.from_file(filename)
def from_buffer(buffer, mime=False): """ Accepts a binary string and returns the detected filetype. Return value is the mimetype if mime=True, otherwise a human readable name.
>>> magic.from_buffer(open("testdata/test.pdf").read(1024)) 'PDF document, version 1.2' """ m = _get_magic_type(mime) return m.from_buffer(buffer)
def from_descriptor(fd, mime=False): """ Accepts a file descriptor and returns the detected filetype. Return value is the mimetype if mime=True, otherwise a human readable name.
>>> f = open("testdata/test.pdf") >>> magic.from_descriptor(f.fileno()) 'PDF document, version 1.2' """ m = _get_magic_type(mime) return m.from_descriptor(fd)
from . import loader libmagic = loader.load_lib()
magic_t = ctypes.c_void_p
def errorcheck_null(result, func, args): if result is None: err = magic_error(args[0]) raise MagicException(err) else: return result
def errorcheck_negative_one(result, func, args): if result == -1: err = magic_error(args[0]) raise MagicException(err) else: return result
# return str on python3. Don't want to unconditionally # decode because that results in unicode on python2 def maybe_decode(s): if str == bytes: return s else: # backslashreplace here because sometimes libmagic will return metadata in the charset # of the file, which is unknown to us (e.g the title of a Word doc) return s.decode('utf-8', 'backslashreplace')
def coerce_filename(filename): if filename is None: return None # ctypes will implicitly convert unicode strings to bytes with # .encode('ascii'). If you use the filesystem encoding # then you'll get inconsistent behavior (crashes) depending on the user's # LANG environment variable is_unicode = (sys.version_info[0] <= 2 and isinstance(filename, unicode)) or \ (sys.version_info[0] >= 3 and isinstance(filename, str)) if is_unicode: return filename.encode('utf-8', 'surrogateescape') else: return filename
magic_open = libmagic.magic_open magic_open.restype = magic_t magic_open.argtypes = [c_int]
magic_close = libmagic.magic_close magic_close.restype = None magic_close.argtypes = [magic_t]
magic_error = libmagic.magic_error magic_error.restype = c_char_p magic_error.argtypes = [magic_t]
magic_errno = libmagic.magic_errno magic_errno.restype = c_int magic_errno.argtypes = [magic_t]
_magic_file = libmagic.magic_file _magic_file.restype = c_char_p _magic_file.argtypes = [magic_t, c_char_p] _magic_file.errcheck = errorcheck_null
def magic_file(cookie, filename): return _magic_file(cookie, coerce_filename(filename))
_magic_buffer = libmagic.magic_buffer _magic_buffer.restype = c_char_p _magic_buffer.argtypes = [magic_t, c_void_p, c_size_t] _magic_buffer.errcheck = errorcheck_null
def magic_buffer(cookie, buf): return _magic_buffer(cookie, buf, len(buf))
magic_descriptor = libmagic.magic_descriptor magic_descriptor.restype = c_char_p magic_descriptor.argtypes = [magic_t, c_int] magic_descriptor.errcheck = errorcheck_null
_magic_descriptor = libmagic.magic_descriptor _magic_descriptor.restype = c_char_p _magic_descriptor.argtypes = [magic_t, c_int] _magic_descriptor.errcheck = errorcheck_null
def magic_descriptor(cookie, fd): return _magic_descriptor(cookie, fd)
_magic_load = libmagic.magic_load _magic_load.restype = c_int _magic_load.argtypes = [magic_t, c_char_p] _magic_load.errcheck = errorcheck_negative_one
def magic_load(cookie, filename): return _magic_load(cookie, coerce_filename(filename))
magic_setflags = libmagic.magic_setflags magic_setflags.restype = c_int magic_setflags.argtypes = [magic_t, c_int]
magic_check = libmagic.magic_check magic_check.restype = c_int magic_check.argtypes = [magic_t, c_char_p]
magic_compile = libmagic.magic_compile magic_compile.restype = c_int magic_compile.argtypes = [magic_t, c_char_p]
_has_param = False if hasattr(libmagic, 'magic_setparam') and hasattr(libmagic, 'magic_getparam'): _has_param = True _magic_setparam = libmagic.magic_setparam _magic_setparam.restype = c_int _magic_setparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] _magic_setparam.errcheck = errorcheck_negative_one
_magic_getparam = libmagic.magic_getparam _magic_getparam.restype = c_int _magic_getparam.argtypes = [magic_t, c_int, POINTER(c_size_t)] _magic_getparam.errcheck = errorcheck_negative_one
def magic_setparam(cookie, param, val): if not _has_param: raise NotImplementedError("magic_setparam not implemented") v = c_size_t(val) return _magic_setparam(cookie, param, byref(v))
def magic_getparam(cookie, param): if not _has_param: raise NotImplementedError("magic_getparam not implemented") val = c_size_t() _magic_getparam(cookie, param, byref(val)) return val.value
_has_version = False if hasattr(libmagic, "magic_version"): _has_version = True magic_version = libmagic.magic_version magic_version.restype = c_int magic_version.argtypes = []
def version(): if not _has_version: raise NotImplementedError("magic_version not implemented") return magic_version()
MAGIC_NONE = 0x000000 # No flags MAGIC_DEBUG = 0x000001 # Turn on debugging MAGIC_SYMLINK = 0x000002 # Follow symlinks MAGIC_COMPRESS = 0x000004 # Check inside compressed files MAGIC_DEVICES = 0x000008 # Look at the contents of devices MAGIC_MIME_TYPE = 0x000010 # Return a mime string MAGIC_MIME_ENCODING = 0x000400 # Return the MIME encoding # TODO: should be # MAGIC_MIME = MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING MAGIC_MIME = 0x000010 # Return a mime string MAGIC_EXTENSION = 0x1000000 # Return a /-separated list of extensions
MAGIC_CONTINUE = 0x000020 # Return all matches MAGIC_CHECK = 0x000040 # Print warnings to stderr MAGIC_PRESERVE_ATIME = 0x000080 # Restore access time on exit MAGIC_RAW = 0x000100 # Don't translate unprintable chars MAGIC_ERROR = 0x000200 # Handle ENOENT etc as real errors
MAGIC_NO_CHECK_COMPRESS = 0x001000 # Don't check for compressed files MAGIC_NO_CHECK_TAR = 0x002000 # Don't check for tar files MAGIC_NO_CHECK_SOFT = 0x004000 # Don't check magic entries MAGIC_NO_CHECK_APPTYPE = 0x008000 # Don't check application type MAGIC_NO_CHECK_ELF = 0x010000 # Don't check for elf details MAGIC_NO_CHECK_ASCII = 0x020000 # Don't check for ascii files MAGIC_NO_CHECK_TROFF = 0x040000 # Don't check ascii/troff MAGIC_NO_CHECK_FORTRAN = 0x080000 # Don't check ascii/fortran MAGIC_NO_CHECK_TOKENS = 0x100000 # Don't check ascii/tokens
MAGIC_PARAM_INDIR_MAX = 0 # Recursion limit for indirect magic MAGIC_PARAM_NAME_MAX = 1 # Use count limit for name/use magic MAGIC_PARAM_ELF_PHNUM_MAX = 2 # Max ELF notes processed MAGIC_PARAM_ELF_SHNUM_MAX = 3 # Max ELF program sections processed MAGIC_PARAM_ELF_NOTES_MAX = 4 # # Max ELF sections processed MAGIC_PARAM_REGEX_MAX = 5 # Length limit for regex searches MAGIC_PARAM_BYTES_MAX = 6 # Max number of bytes to read from file
# This package name conflicts with the one provided by upstream # libmagic. This is a common source of confusion for users. To # resolve, We ship a copy of that module, and expose it's functions # wrapped in deprecation warnings. def _add_compat(to_module): import warnings, re from magic import compat
def deprecation_wrapper(fn): def _(*args, **kwargs): warnings.warn( "Using compatibility mode with libmagic's python binding. " "See https://github.com/ahupp/python-magic/blob/master/COMPAT.md for details.", PendingDeprecationWarning)
return fn(*args, **kwargs)
return _
fn = ['detect_from_filename', 'detect_from_content', 'detect_from_fobj', 'open'] for fname in fn: to_module[fname] = deprecation_wrapper(compat.__dict__[fname])
# copy constants over, ensuring there's no conflicts is_const_re = re.compile("^[A-Z_]+$") allowed_inconsistent = set(['MAGIC_MIME']) for name, value in compat.__dict__.items(): if is_const_re.match(name): if name in to_module: if name in allowed_inconsistent: continue if to_module[name] != value: raise Exception("inconsistent value for " + name) else: continue else: to_module[name] = value
_add_compat(globals())
|