/* -*- mode: C++; c-basic-offset: 4; tab-width: 4 -*- * * Copyright (c) 2011 Apple Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NO_ULEB #include "Architectures.hpp" #include "MachOFileAbstraction.hpp" #include "dsc_iterator.h" #include "dsc_extractor.h" #include #include #include #include #include struct seg_info { seg_info(const char* n, uint64_t o, uint64_t s) : segName(n), offset(o), sizem(s) { } const char* segName; uint64_t offset; uint64_t sizem; }; class CStringEquals { public: bool operator()(const char* left, const char* right) const { return (strcmp(left, right) == 0); } }; typedef __gnu_cxx::hash_map, __gnu_cxx::hash, CStringEquals> NameToSegments; template int optimize_linkedit(macho_header* mh, const void* mapped_cache, uint64_t* newSize) { typedef typename A::P P; typedef typename A::P::E E; typedef typename A::P::uint_t pint_t; // update header flags mh->set_flags(mh->flags() & 0x7FFFFFFF); // remove in-cache bit // update load commands uint64_t cumulativeFileSize = 0; const macho_load_command

* const cmds = (macho_load_command

*)((uint8_t*)mh + sizeof(macho_header

)); const uint32_t cmd_count = mh->ncmds(); const macho_load_command

* cmd = cmds; macho_segment_command

* linkEditSegCmd = NULL; macho_symtab_command

* symtab = NULL; macho_dysymtab_command

* dynamicSymTab = NULL; macho_linkedit_data_command

* functionStarts = NULL; macho_linkedit_data_command

* dataInCode = NULL; for (uint32_t i = 0; i < cmd_count; ++i) { if ( cmd->cmd() == macho_segment_command

::CMD ) { // update segment/section file offsets macho_segment_command

* segCmd = (macho_segment_command

*)cmd; segCmd->set_fileoff(cumulativeFileSize); macho_section

* const sectionsStart = (macho_section

*)((char*)segCmd + sizeof(macho_segment_command

)); macho_section

* const sectionsEnd = §ionsStart[segCmd->nsects()]; for(macho_section

* sect = sectionsStart; sect < sectionsEnd; ++sect) { if ( sect->offset() != 0 ) sect->set_offset(cumulativeFileSize+sect->addr()-segCmd->vmaddr()); } if ( strcmp(segCmd->segname(), "__LINKEDIT") == 0 ) { linkEditSegCmd = segCmd; } cumulativeFileSize += segCmd->filesize(); } else if ( cmd->cmd() == LC_DYLD_INFO_ONLY ) { // zero out all dyld info macho_dyld_info_command

* dyldInfo = (macho_dyld_info_command

*)cmd; dyldInfo->set_rebase_off(0); dyldInfo->set_rebase_size(0); dyldInfo->set_bind_off(0); dyldInfo->set_bind_size(0); dyldInfo->set_weak_bind_off(0); dyldInfo->set_weak_bind_size(0); dyldInfo->set_lazy_bind_off(0); dyldInfo->set_lazy_bind_size(0); dyldInfo->set_export_off(0); dyldInfo->set_export_size(0); } else if ( cmd->cmd() == LC_SYMTAB ) { symtab = (macho_symtab_command

*)cmd; } else if ( cmd->cmd() == LC_DYSYMTAB ) { dynamicSymTab = (macho_dysymtab_command

*)cmd; } else if ( cmd->cmd() == LC_FUNCTION_STARTS ) { functionStarts = (macho_linkedit_data_command

*)cmd; } else if ( cmd->cmd() == LC_DATA_IN_CODE ) { dataInCode = (macho_linkedit_data_command

*)cmd; } cmd = (const macho_load_command

*)(((uint8_t*)cmd)+cmd->cmdsize()); } // rebuild symbol table if ( linkEditSegCmd == NULL ) { fprintf(stderr, "__LINKEDIT not found\n"); return -1; } if ( symtab == NULL ) { fprintf(stderr, "LC_SYMTAB not found\n"); return -1; } if ( dynamicSymTab == NULL ) { fprintf(stderr, "LC_DYSYMTAB not found\n"); return -1; } const uint32_t newFunctionStartsOffset = linkEditSegCmd->fileoff(); uint32_t functionStartsSize = 0; if ( functionStarts != NULL ) { // copy function starts from original cache file to new mapped dylib file functionStartsSize = functionStarts->datasize(); memcpy((char*)mh + newFunctionStartsOffset, (char*)mapped_cache + functionStarts->dataoff(), functionStartsSize); } const uint32_t newDataInCodeOffset = (newFunctionStartsOffset + functionStartsSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align uint32_t dataInCodeSize = 0; if ( dataInCode != NULL ) { // copy data-in-code info from original cache file to new mapped dylib file dataInCodeSize = dataInCode->datasize(); memcpy((char*)mh + newDataInCodeOffset, (char*)mapped_cache + dataInCode->dataoff(), dataInCodeSize); } // copy symbol entries and strings from original cache file to new mapped dylib file const uint32_t newSymTabOffset = (newDataInCodeOffset + dataInCodeSize + sizeof(pint_t) - 1) & (-sizeof(pint_t)); // pointer align const uint32_t newIndSymTabOffset = newSymTabOffset + symtab->nsyms()*sizeof(macho_nlist

); const uint32_t newStringPoolOffset = newIndSymTabOffset + dynamicSymTab->nindirectsyms()*sizeof(uint32_t); macho_nlist

* const newSymTabStart = (macho_nlist

*)(((uint8_t*)mh) + newSymTabOffset); char* const newStringPoolStart = (char*)mh + newStringPoolOffset; uint32_t* newIndSymTab = (uint32_t*)((char*)mh + newIndSymTabOffset); const uint32_t* mergedIndSymTab = (uint32_t*)((char*)mapped_cache + dynamicSymTab->indirectsymoff()); const macho_nlist

* const mergedSymTabStart = (macho_nlist

*)(((uint8_t*)mapped_cache) + symtab->symoff()); const macho_nlist

* const mergedSymTabend = &mergedSymTabStart[symtab->nsyms()]; const char* mergedStringPoolStart = (char*)mapped_cache + symtab->stroff(); macho_nlist

* t = newSymTabStart; int poolOffset = 0; newStringPoolStart[poolOffset++] = '\0'; // first pool entry is always empty string for (const macho_nlist

* s = mergedSymTabStart; s != mergedSymTabend; ++s) { *t = *s; t->set_n_strx(poolOffset); strcpy(&newStringPoolStart[poolOffset], &mergedStringPoolStart[s->n_strx()]); poolOffset += (strlen(&newStringPoolStart[poolOffset]) + 1); ++t; } // pointer align string pool size while ( (poolOffset % sizeof(pint_t)) != 0 ) ++poolOffset; // copy indirect symbol table memcpy(newIndSymTab, mergedIndSymTab, dynamicSymTab->nindirectsyms()*sizeof(uint32_t)); // update load commands if ( functionStarts != NULL ) { functionStarts->set_dataoff(newFunctionStartsOffset); functionStarts->set_datasize(functionStartsSize); } if ( dataInCode != NULL ) { dataInCode->set_dataoff(newDataInCodeOffset); dataInCode->set_datasize(dataInCodeSize); } symtab->set_symoff(newSymTabOffset); symtab->set_stroff(newStringPoolOffset); symtab->set_strsize(poolOffset); dynamicSymTab->set_extreloff(0); dynamicSymTab->set_nextrel(0); dynamicSymTab->set_locreloff(0); dynamicSymTab->set_nlocrel(0); dynamicSymTab->set_indirectsymoff(newIndSymTabOffset); linkEditSegCmd->set_filesize(symtab->stroff()+symtab->strsize() - linkEditSegCmd->fileoff()); linkEditSegCmd->set_vmsize( (linkEditSegCmd->filesize()+4095) & (-4096) ); // return new size *newSize = (symtab->stroff()+symtab->strsize()+4095) & (-4096); return 0; } static void make_dirs(const char* file_path) { //printf("make_dirs(%s)\n", file_path); char dirs[strlen(file_path)+1]; strcpy(dirs, file_path); char* lastSlash = strrchr(dirs, '/'); if ( lastSlash == NULL ) return; lastSlash[1] = '\0'; struct stat stat_buf; if ( stat(dirs, &stat_buf) != 0 ) { const char* afterSlash = &dirs[1]; char* slash; while ( (slash = strchr(afterSlash, '/')) != NULL ) { *slash = '\0'; ::mkdir(dirs, S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); //printf("mkdir(%s)\n", dirs); *slash = '/'; afterSlash = slash+1; } } } template size_t dylib_maker(const void* mapped_cache, std::vector &dylib_data, const std::vector& segments) { typedef typename A::P P; size_t additionalSize = 0; for(std::vector::const_iterator it=segments.begin(); it != segments.end(); ++it) { additionalSize += it->sizem; } dylib_data.reserve(dylib_data.size() + additionalSize); uint32_t nfat_archs = 0; uint32_t offsetInFatFile = 4096; uint8_t *base_ptr = &dylib_data.front(); #define FH reinterpret_cast(base_ptr) #define FA reinterpret_cast(base_ptr + (8 + (nfat_archs - 1) * sizeof(fat_arch))) if(dylib_data.size() >= 4096 && OSSwapBigToHostInt32(FH->magic) == FAT_MAGIC) { // have fat header, append new arch to end nfat_archs = OSSwapBigToHostInt32(FH->nfat_arch); offsetInFatFile = OSSwapBigToHostInt32(FA->offset) + OSSwapBigToHostInt32(FA->size); } dylib_data.resize(offsetInFatFile); base_ptr = &dylib_data.front(); FH->magic = OSSwapHostToBigInt32(FAT_MAGIC); FH->nfat_arch = OSSwapHostToBigInt32(++nfat_archs); FA->cputype = 0; // filled in later FA->cpusubtype = 0; // filled in later FA->offset = OSSwapHostToBigInt32(offsetInFatFile); FA->size = 0; // filled in later FA->align = OSSwapHostToBigInt32(12); // Write regular segments into the buffer uint32_t totalSize = 0; for( std::vector::const_iterator it=segments.begin(); it != segments.end(); ++it) { if(strcmp(it->segName, "__TEXT") == 0 ) { const macho_header

*textMH = reinterpret_cast*>((uint8_t*)mapped_cache+it->offset); FA->cputype = OSSwapHostToBigInt32(textMH->cputype()); FA->cpusubtype = OSSwapHostToBigInt32(textMH->cpusubtype()); // if this cputype/subtype already exist in fat header, then return immediately for(uint32_t i=0; i < nfat_archs-1; ++i) { fat_arch *afa = reinterpret_cast(base_ptr+8)+i; if( afa->cputype == FA->cputype && afa->cpusubtype == FA->cpusubtype) { fprintf(stderr, "arch already exists in fat dylib\n"); dylib_data.resize(offsetInFatFile); return offsetInFatFile; } } } //printf("segName=%s, offset=0x%llX, size=0x%0llX\n", it->segName, it->offset, it->sizem); std::copy(((uint8_t*)mapped_cache)+it->offset, ((uint8_t*)mapped_cache)+it->offset+it->sizem, std::back_inserter(dylib_data)); base_ptr = &dylib_data.front(); totalSize += it->sizem; } FA->size = OSSwapHostToBigInt32(totalSize); // optimize linkedit uint64_t newSize = dylib_data.size(); optimize_linkedit(((macho_header

*)(base_ptr+offsetInFatFile)), mapped_cache, &newSize); // update fat header with new file size dylib_data.resize(offsetInFatFile+newSize); base_ptr = &dylib_data.front(); FA->size = OSSwapHostToBigInt32(newSize); #undef FH #undef FA return offsetInFatFile; } int dyld_shared_cache_extract_dylibs_progress(const char* shared_cache_file_path, const char* extraction_root_path, void (^progress)(unsigned current, unsigned total)) { struct stat statbuf; if (stat(shared_cache_file_path, &statbuf)) { fprintf(stderr, "Error: stat failed for dyld shared cache at %s\n", shared_cache_file_path); return -1; } int cache_fd = open(shared_cache_file_path, O_RDONLY); if (cache_fd < 0) { fprintf(stderr, "Error: failed to open shared cache file at %s\n", shared_cache_file_path); return -1; } void* mapped_cache = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, cache_fd, 0); if (mapped_cache == MAP_FAILED) { fprintf(stderr, "Error: mmap() for shared cache at %s failed, errno=%d\n", shared_cache_file_path, errno); return -1; } close(cache_fd); // instantiate arch specific dylib maker size_t (*dylib_create_func)(const void*, std::vector&, const std::vector&) = NULL; if ( strcmp((char*)mapped_cache, "dyld_v1 i386") == 0 ) dylib_create_func = dylib_maker; else if ( strcmp((char*)mapped_cache, "dyld_v1 x86_64") == 0 ) dylib_create_func = dylib_maker; else if ( strcmp((char*)mapped_cache, "dyld_v1 armv5") == 0 ) dylib_create_func = dylib_maker; else if ( strcmp((char*)mapped_cache, "dyld_v1 armv6") == 0 ) dylib_create_func = dylib_maker; else if ( strcmp((char*)mapped_cache, "dyld_v1 armv7") == 0 ) dylib_create_func = dylib_maker; else if ( strncmp((char*)mapped_cache, "dyld_v1 armv7", 14) == 0 ) dylib_create_func = dylib_maker; else { fprintf(stderr, "Error: unrecognized dyld shared cache magic.\n"); munmap(mapped_cache, statbuf.st_size); return -1; } // iterate through all images in cache and build map of dylibs and segments __block NameToSegments map; dyld_shared_cache_iterate_segments_with_slide(mapped_cache, ^(const char* dylib, const char* segName, uint64_t offset, uint64_t sizem, uint64_t mappedddress, uint64_t slide) { map[dylib].push_back(seg_info(segName, offset, sizem)); }); // for each dylib instantiate a dylib file dispatch_group_t group = dispatch_group_create(); dispatch_semaphore_t sema = dispatch_semaphore_create(2); dispatch_queue_t process_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_LOW, 0); dispatch_queue_t writer_queue = dispatch_queue_create("dyld writer queue", 0); __block int cumulativeResult = 0; __block unsigned count = 0; for ( NameToSegments::iterator it = map.begin(); it != map.end(); ++it) { dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER); dispatch_group_async(group, process_queue, ^{ char dylib_path[PATH_MAX]; strcpy(dylib_path, extraction_root_path); strcat(dylib_path, "/"); strcat(dylib_path, it->first); //printf("%s with %lu segments\n", dylib_path, segments.size()); // make sure all directories in this path exist make_dirs(dylib_path); // open file, create if does not already exist int fd = ::open(dylib_path, O_CREAT | O_EXLOCK | O_RDWR, 0644); if ( fd == -1 ) { fprintf(stderr, "can't open or create dylib file %s, errnor=%d\n", dylib_path, errno); cumulativeResult = -1; return; } struct stat statbuf; if (fstat(fd, &statbuf)) { fprintf(stderr, "Error: stat failed for dyld file %s, errnor=%d\n", dylib_path, errno); close(fd); cumulativeResult = -1; return; } std::vector *vec = new std::vector(statbuf.st_size); if(pread(fd, &vec->front(), vec->size(), 0) != (long)vec->size()) { fprintf(stderr, "can't read dylib file %s, errnor=%d\n", dylib_path, errno); close(fd); cumulativeResult = -1; return; } const size_t offset = dylib_create_func(mapped_cache, *vec, it->second); dispatch_group_async(group, writer_queue, ^{ progress(count++, map.size()); if(offset != vec->size()) { //Write out the first page, and everything after offset if( pwrite(fd, &vec->front(), 4096, 0) == -1 || pwrite(fd, &vec->front() + offset, vec->size() - offset, offset) == -1) { fprintf(stderr, "error writing, errnor=%d\n", errno); cumulativeResult = -1; } } delete vec; close(fd); dispatch_semaphore_signal(sema); }); }); } dispatch_group_wait(group, DISPATCH_TIME_FOREVER); dispatch_release(group); dispatch_release(writer_queue); munmap(mapped_cache, statbuf.st_size); return cumulativeResult; } int dyld_shared_cache_extract_dylibs(const char* shared_cache_file_path, const char* extraction_root_path) { return dyld_shared_cache_extract_dylibs_progress(shared_cache_file_path, extraction_root_path, ^(unsigned , unsigned) {} ); } #if 0 typedef int (*extractor_proc)(const char* shared_cache_file_path, const char* extraction_root_path, void (^progress)(unsigned current, unsigned total)); int main(int argc, const char* argv[]) { if ( argc != 3 ) { fprintf(stderr, "usage: dsc_extractor \n"); return 1; } void* handle = dlopen("/Developer/Platforms/iPhoneOS.platform/usr/lib/dsc_extractor.bundle", RTLD_LAZY); if ( handle == NULL ) { fprintf(stderr, "dsc_extractor.bundle could not be loaded\n"); return 1; } extractor_proc proc = (extractor_proc)dlsym(handle, "dyld_shared_cache_extract_dylibs_progress"); if ( proc == NULL ) { fprintf(stderr, "dsc_extractor.bundle did not have dyld_shared_cache_extract_dylibs_progress symbol\n"); return 1; } int result = (*proc)(argv[1], argv[2], ^(unsigned c, unsigned total) { printf("%d/%d\n", c, total); } ); fprintf(stderr, "dyld_shared_cache_extract_dylibs_progress() => %d\n", result); return 0; } #endif