Как разобрать Манифест.mbdb-файл в резервной копии iTunes iOS 4.0
в iOS 4.0 Apple переработала процесс резервного копирования.
iTunes используется для хранения списка имен файлов, связанных с файлами резервных копий в Манифесте.plist-файл, но в iOS 4.0 он переместил эту информацию в манифест.мбдб
вы можете увидеть пример этого файла, сделав резервную копию с вашего устройства iOS 4.0 и глядя в папке ~/Library/Application Support/MobileSync/Backup (смотрите внутри вложенных папок с самой последней датой)
здесь скриншот того, как выглядит файл в текстовом редакторе:
alt текст http://supercrazyawesome.com/images/mbdb.png
Как мне разобрать это в приложение Cocoa, чтобы я мог обновить свое (бесплатное) приложение iPhone Backup Extractor (http://supercrazyawesome.com) для iOS 4.0?
8 ответов:
спасибо, user374559 и уточняли-что код и описание очень помогает.
мой удар в какой-то Python, чтобы разобрать и распечатать информацию в формате Unix ls-l like:
#!/usr/bin/env python import sys def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo return mbdb def process_mbdx_file(filename): mbdx = {} # Map offset of info in the MBDB file => fileID string data = open(filename).read() if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file") offset = 4 offset = offset + 2 # value 0x02 0x00, not sure what this is filecount, offset = getint(data, offset, 4) # 4-byte count of records while offset < len(data): # 26 byte record, made up of ... fileID = data[offset:offset+20] # 20 bytes of fileID fileID_string = ''.join(['%02x' % ord(b) for b in fileID]) offset = offset + 20 mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog mode, offset = getint(data, offset, 2) # 2-byte mode field mbdx[mbdb_offset] = fileID_string return mbdx def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose)
в iOS 5, Манифест.файл mbdx был удален. Для целей этой статьи он был избыточным в любом случае, потому что домен и путь находятся в Манифесте.mbdb и хэш идентификатора могут быть сгенерированы с помощью SHA1.
вот мое обновление кода galloglass, поэтому он работает с резервными копиями устройств iOS 5. Единственными изменениями являются исключение process_mbdx_file () и добавление нескольких строк в process_mbdb_file ().
протестировано с резервными копиями iPhone 4S и iPad 1, оба с множество приложений и файлов.
#!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value<<8) + ord(data[offset]) offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF): return '', offset+2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset+length] return value, (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename).read() if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r+w+x return mode(val>>6) + mode((val>>3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose)
Я закончил свою работу над этим материалом-то есть iOS 4 + iTunes 9.2 обновление моей резервной библиотеки декодера для Python -http://www.iki.fi/fingon/iphonebackupdb.py
Он делает то, что мне нужно, немного документации, но не стесняйтесь копировать идеи оттуда ; -)
(кажется, хорошо работает с моими резервными копиями, по крайней мере.)
вы можете найти информацию и небольшое описание формата MBDB/MBDX здесь:
http://code.google.com/p/iphonebackupbrowser/
Это мое приложение для просмотра файлов резервного копирования. Я попытался документировать формат новых файлов, которые поставляются с iTunes 9.2.
этот скрипт python является удивительным.
вот моя версия Ruby (с небольшим улучшением) и возможностями поиска. (для iOS 5)
# encoding: utf-8 require 'fileutils' require 'digest/sha1' class ManifestParser def initialize(mbdb_filename, verbose = false) @verbose = verbose process_mbdb_file(mbdb_filename) end # Returns the numbers of records in the Manifest files. def record_number @mbdb.size end # Returns a huge string containing the parsing of the Manifest files. def to_s s = '' @mbdb.each do |v| s += "#{fileinfo_str(v)}\n" end s end def to_file(filename) File.open(filename, 'w') do |f| @mbdb.each do |v| f.puts fileinfo_str(v) end end end # Copy the backup files to their real path/name. # * domain_match Can be a regexp to restrict the files to copy. # * filename_match Can be a regexp to restrict the files to copy. def rename_files(domain_match = nil, filename_match = nil) @mbdb.each do |v| if v[:type] == '-' # Only rename files. if (domain_match.nil? or v[:domain] =~ domain_match) and (filename_match.nil? or v[:filename] =~ filename_match) dst = "#{v[:domain]}/#{v[:filename]}" puts "Creating: #{dst}" FileUtils.mkdir_p(File.dirname(dst)) FileUtils.cp(v[:fileID], dst) end end end end # Return the filename that math the given regexp. def search(regexp) result = Array.new @mbdb.each do |v| if "#{v[:domain]}::#{v[:filename]}" =~ regexp result << v end end result end private # Retrieve an integer (big-endian) and new offset from the current offset def getint(data, offset, intsize) value = 0 while intsize > 0 value = (value<<8) + data[offset].ord offset += 1 intsize -= 1 end return value, offset end # Retrieve a string and new offset from the current offset into the data def getstring(data, offset) return '', offset + 2 if data[offset] == 0xFF.chr and data[offset + 1] == 0xFF.chr # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset...(offset + length)] return value, (offset + length) end def process_mbdb_file(filename) @mbdb = Array.new data = File.open(filename, 'rb') { |f| f.read } puts "MBDB file read. Size: #{data.size}" raise 'This does not look like an MBDB file' if data[0...4] != 'mbdb' offset = 4 offset += 2 # value x05 x00, not sure what this is while offset < data.size fileinfo = Hash.new fileinfo[:start_offset] = offset fileinfo[:domain], offset = getstring(data, offset) fileinfo[:filename], offset = getstring(data, offset) fileinfo[:linktarget], offset = getstring(data, offset) fileinfo[:datahash], offset = getstring(data, offset) fileinfo[:unknown1], offset = getstring(data, offset) fileinfo[:mode], offset = getint(data, offset, 2) if (fileinfo[:mode] & 0xE000) == 0xA000 # Symlink fileinfo[:type] = 'l' elsif (fileinfo[:mode] & 0xE000) == 0x8000 # File fileinfo[:type] = '-' elsif (fileinfo[:mode] & 0xE000) == 0x4000 # Dir fileinfo[:type] = 'd' else # $stderr.puts "Unknown file type %04x for #{fileinfo_str(f, false)}" % f['mode'] fileinfo[:type] = '?' end fileinfo[:unknown2], offset = getint(data, offset, 4) fileinfo[:unknown3], offset = getint(data, offset, 4) fileinfo[:userid], offset = getint(data, offset, 4) fileinfo[:groupid], offset = getint(data, offset, 4) fileinfo[:mtime], offset = getint(data, offset, 4) fileinfo[:atime], offset = getint(data, offset, 4) fileinfo[:ctime], offset = getint(data, offset, 4) fileinfo[:filelen], offset = getint(data, offset, 8) fileinfo[:flag], offset = getint(data, offset, 1) fileinfo[:numprops], offset = getint(data, offset, 1) fileinfo[:properties] = Hash.new (0...(fileinfo[:numprops])).each do |ii| propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo[:properties][propname] = propval end # Compute the ID of the file. fullpath = fileinfo[:domain] + '-' + fileinfo[:filename] fileinfo[:fileID] = Digest::SHA1.hexdigest(fullpath) # We add the file to the list of files. @mbdb << fileinfo end @mbdb end def modestr(val) def mode(val) r = (val & 0x4) ? 'r' : '-' w = (val & 0x2) ? 'w' : '-' x = (val & 0x1) ? 'x' : '-' r + w + x end mode(val >> 6) + mode(val >> 3) + mode(val) end def fileinfo_str(f) return "(#{f[:fileID]})#{f[:domain]}::#{f[:filename]}" unless @verbose data = [f[:type], modestr(f[:mode]), f[:userid], f[:groupid], f[:filelen], f[:mtime], f[:atime], f[:ctime], f[:fileID], f[:domain], f[:filename]] info = "%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % data info += ' -> ' + f[:linktarget] if f[:type] == 'l' # Symlink destination f[:properties].each do |k, v| info += " #{k}=#{v.inspect}" end info end end if __FILE__ == mp = ManifestParser.new 'Manifest.mbdb', true mp.to_file 'filenames.txt' end
Мне понравился код galloglas, и я изменил основную функцию, чтобы она показывала отсортированный список общего размера по приложению:
verbose = True if __name__ == '__main__': mbdb = process_mbdb_file("Manifest.mbdb") mbdx = process_mbdx_file("Manifest.mbdx") sizes = {} for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print fileinfo_str(fileinfo, verbose) if (fileinfo['mode'] & 0xE000) == 0x8000: sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen'] for domain in sorted(sizes, key=sizes.get): print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
таким образом, вы можете выяснить, какое приложение ест все пространство.
для тех, кто ищет реализацию Java для чтения файлов MBDB, есть несколько там:
проект"анализатор iPhone" (очень чистый код): http://sourceforge.net/p/iphoneanalyzer/code/HEAD/tree/trunk/library/src/main/java/com/crypticbit/ipa/io/parser/manifest/Mbdb.java
" iPhone Сталкер" проект: https://code.google.com/p/iphonestalker/source/browse/trunk/src/iphonestalker/util/io/MBDBReader.java
благодаря ответу galloglass. Код отлично работает с Python 2.7. Есть только одна вещь, которую я хочу знать. Когда читал манифест.mbdb файл, вы должны использовать двоичный режим. В противном случае не все содержимое будет прочитано.
Я также внес некоторые незначительные изменения, чтобы заставить код работать с Python 3.4. Вот этот код.
#!/usr/bin/env python import sys import hashlib mbdx = {} def getint(data, offset, intsize): """Retrieve an integer (big-endian) and new offset from the current offset""" value = 0 while intsize > 0: value = (value << 8) + data[offset] offset = offset + 1 intsize = intsize - 1 return value, offset def getstring(data, offset): """Retrieve a string and new offset from the current offset into the data""" if chr(data[offset]) == chr(0xFF) and chr(data[offset + 1]) == chr(0xFF): return '', offset + 2 # Blank string length, offset = getint(data, offset, 2) # 2-byte length value = data[offset:offset + length] return value.decode(encoding='latin-1'), (offset + length) def process_mbdb_file(filename): mbdb = {} # Map offset of info in this file => file info data = open(filename, 'rb').read() # 'b' is needed to read all content at once if data[0:4].decode() != "mbdb": raise Exception("This does not look like an MBDB file") offset = 4 offset = offset + 2 # value x05 x00, not sure what this is while offset < len(data): fileinfo = {} fileinfo['start_offset'] = offset fileinfo['domain'], offset = getstring(data, offset) fileinfo['filename'], offset = getstring(data, offset) fileinfo['linktarget'], offset = getstring(data, offset) fileinfo['datahash'], offset = getstring(data, offset) fileinfo['unknown1'], offset = getstring(data, offset) fileinfo['mode'], offset = getint(data, offset, 2) fileinfo['unknown2'], offset = getint(data, offset, 4) fileinfo['unknown3'], offset = getint(data, offset, 4) fileinfo['userid'], offset = getint(data, offset, 4) fileinfo['groupid'], offset = getint(data, offset, 4) fileinfo['mtime'], offset = getint(data, offset, 4) fileinfo['atime'], offset = getint(data, offset, 4) fileinfo['ctime'], offset = getint(data, offset, 4) fileinfo['filelen'], offset = getint(data, offset, 8) fileinfo['flag'], offset = getint(data, offset, 1) fileinfo['numprops'], offset = getint(data, offset, 1) fileinfo['properties'] = {} for ii in range(fileinfo['numprops']): propname, offset = getstring(data, offset) propval, offset = getstring(data, offset) fileinfo['properties'][propname] = propval mbdb[fileinfo['start_offset']] = fileinfo fullpath = fileinfo['domain'] + '-' + fileinfo['filename'] id = hashlib.sha1(fullpath.encode()) mbdx[fileinfo['start_offset']] = id.hexdigest() return mbdb def modestr(val): def mode(val): if (val & 0x4): r = 'r' else: r = '-' if (val & 0x2): w = 'w' else: w = '-' if (val & 0x1): x = 'x' else: x = '-' return r + w + x return mode(val >> 6) + mode((val >> 3)) + mode(val) def fileinfo_str(f, verbose=False): if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir else: print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)) type = '?' # unknown info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % (type, modestr(f['mode'] & 0x0FFF), f['userid'], f['groupid'], f['filelen'], f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination for name, value in f['properties'].items(): # extra properties info = info + ' ' + name + '=' + repr(value) return info verbose = True if __name__ == '__main__': mbdb = process_mbdb_file( r"Manifest.mbdb") for offset, fileinfo in mbdb.items(): if offset in mbdx: fileinfo['fileID'] = mbdx[offset] else: fileinfo['fileID'] = "<nofileID>" print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo) print(fileinfo_str(fileinfo, verbose))