diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:09:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:09:23 +0000 |
commit | f73363f1dd94996356cefbf24388f561891acf0b (patch) | |
tree | e3c31248bdb36eaec5fd833490d4278162dba2a0 /tools/debugserver/source/MacOSX | |
parent | 160ee69dd7ae18978f4068116777639ea98dc951 (diff) |
Notes
Diffstat (limited to 'tools/debugserver/source/MacOSX')
12 files changed, 1246 insertions, 446 deletions
diff --git a/tools/debugserver/source/MacOSX/MachException.cpp b/tools/debugserver/source/MacOSX/MachException.cpp index cc309e47d86bf..da2b2fe92980b 100644 --- a/tools/debugserver/source/MacOSX/MachException.cpp +++ b/tools/debugserver/source/MacOSX/MachException.cpp @@ -86,8 +86,6 @@ extern "C" kern_return_t catch_mach_exception_raise_state_identity( (uint64_t)(exc_data_count > 0 ? exc_data[0] : 0xBADDBADD), (uint64_t)(exc_data_count > 1 ? exc_data[1] : 0xBADDBADD)); } - mach_port_deallocate(mach_task_self(), task_port); - mach_port_deallocate(mach_task_self(), thread_port); return KERN_FAILURE; } diff --git a/tools/debugserver/source/MacOSX/MachProcess.h b/tools/debugserver/source/MacOSX/MachProcess.h index 9ab06bcda9c64..2fb4dc5dbb6e1 100644 --- a/tools/debugserver/source/MacOSX/MachProcess.h +++ b/tools/debugserver/source/MacOSX/MachProcess.h @@ -246,7 +246,10 @@ public: uint64_t plo_pthread_tsd_base_address_offset, uint64_t plo_pthread_tsd_base_offset, uint64_t plo_pthread_tsd_entry_size); - + const char * + GetDeploymentInfo(const struct load_command&, uint64_t load_command_address, + uint32_t& major_version, uint32_t& minor_version, + uint32_t& patch_version); bool GetMachOInformationFromMemory(nub_addr_t mach_o_header_addr, int wordsize, struct mach_o_information &inf); diff --git a/tools/debugserver/source/MacOSX/MachProcess.mm b/tools/debugserver/source/MacOSX/MachProcess.mm index d4dff223bde0a..4ddc5f8b10dcd 100644 --- a/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/tools/debugserver/source/MacOSX/MachProcess.mm @@ -572,6 +572,76 @@ nub_addr_t MachProcess::GetTSDAddressForThread( plo_pthread_tsd_entry_size); } + +const char *MachProcess::GetDeploymentInfo(const struct load_command& lc, + uint64_t load_command_address, + uint32_t& major_version, + uint32_t& minor_version, + uint32_t& patch_version) { + uint32_t cmd = lc.cmd & ~LC_REQ_DYLD; + bool lc_cmd_known = + cmd == LC_VERSION_MIN_IPHONEOS || cmd == LC_VERSION_MIN_MACOSX || + cmd == LC_VERSION_MIN_TVOS || cmd == LC_VERSION_MIN_WATCHOS; + + if (lc_cmd_known) { + struct version_min_command vers_cmd; + if (ReadMemory(load_command_address, sizeof(struct version_min_command), + &vers_cmd) != sizeof(struct version_min_command)) { + return nullptr; + } + major_version = vers_cmd.sdk >> 16; + minor_version = (vers_cmd.sdk >> 8) & 0xffu; + patch_version = vers_cmd.sdk & 0xffu; + + switch (cmd) { + case LC_VERSION_MIN_IPHONEOS: + return "ios"; + case LC_VERSION_MIN_MACOSX: + return "macosx"; + case LC_VERSION_MIN_TVOS: + return "tvos"; + case LC_VERSION_MIN_WATCHOS: + return "watchos"; + default: + return nullptr; + } + } +#if defined (LC_BUILD_VERSION) +#ifndef PLATFORM_IOSSIMULATOR +#define PLATFORM_IOSSIMULATOR 7 +#define PLATFORM_TVOSSIMULATOR 8 +#define PLATFORM_WATCHOSSIMULATOR 9 +#endif + if (cmd == LC_BUILD_VERSION) { + struct build_version_command build_vers; + if (ReadMemory(load_command_address, sizeof(struct build_version_command), + &build_vers) != sizeof(struct build_version_command)) { + return nullptr; + } + major_version = build_vers.sdk >> 16;; + minor_version = (build_vers.sdk >> 8) & 0xffu; + patch_version = build_vers.sdk & 0xffu; + + switch (build_vers.platform) { + case PLATFORM_MACOS: + return "macosx"; + case PLATFORM_IOS: + case PLATFORM_IOSSIMULATOR: + return "ios"; + case PLATFORM_TVOS: + case PLATFORM_TVOSSIMULATOR: + return "tvos"; + case PLATFORM_WATCHOS: + case PLATFORM_WATCHOSSIMULATOR: + return "watchos"; + case PLATFORM_BRIDGEOS: + return "bridgeos"; + } + } +#endif + return nullptr; +} + // Given an address, read the mach-o header and load commands out of memory to // fill in // the mach_o_information "inf" object. @@ -670,52 +740,22 @@ bool MachProcess::GetMachOInformationFromMemory( sizeof(struct uuid_command)) uuid_copy(inf.uuid, uuidcmd.uuid); } - bool lc_cmd_known = - lc.cmd == LC_VERSION_MIN_IPHONEOS || lc.cmd == LC_VERSION_MIN_MACOSX; -#if defined(LC_VERSION_MIN_TVOS) - lc_cmd_known |= lc.cmd == LC_VERSION_MIN_TVOS; -#endif -#if defined(LC_VERSION_MIN_WATCHOS) - lc_cmd_known |= lc.cmd == LC_VERSION_MIN_WATCHOS; -#endif - if (lc_cmd_known) { - struct version_min_command vers_cmd; - if (ReadMemory(load_cmds_p, sizeof(struct version_min_command), - &vers_cmd) != sizeof(struct version_min_command)) { - return false; - } - switch (lc.cmd) { - case LC_VERSION_MIN_IPHONEOS: - inf.min_version_os_name = "iphoneos"; - break; - case LC_VERSION_MIN_MACOSX: - inf.min_version_os_name = "macosx"; - break; -#if defined(LC_VERSION_MIN_TVOS) - case LC_VERSION_MIN_TVOS: - inf.min_version_os_name = "tvos"; - break; -#endif -#if defined(LC_VERSION_MIN_WATCHOS) - case LC_VERSION_MIN_WATCHOS: - inf.min_version_os_name = "watchos"; - break; -#endif - default: - return false; - } - uint32_t xxxx = vers_cmd.sdk >> 16; - uint32_t yy = (vers_cmd.sdk >> 8) & 0xffu; - uint32_t zz = vers_cmd.sdk & 0xffu; + + uint32_t major_version, minor_version, patch_version; + if (const char *platform = GetDeploymentInfo(lc, load_cmds_p, + major_version, minor_version, + patch_version)) { + inf.min_version_os_name = platform; inf.min_version_os_version = ""; - inf.min_version_os_version += std::to_string(xxxx); + inf.min_version_os_version += std::to_string(major_version); inf.min_version_os_version += "."; - inf.min_version_os_version += std::to_string(yy); - if (zz != 0) { + inf.min_version_os_version += std::to_string(minor_version); + if (patch_version != 0) { inf.min_version_os_version += "."; - inf.min_version_os_version += std::to_string(zz); + inf.min_version_os_version += std::to_string(patch_version); } } + load_cmds_p += lc.cmdsize; } return true; @@ -1454,7 +1494,7 @@ bool MachProcess::Detach() { // Resume our task m_task.Resume(); - // NULL our task out as we have already retored all exception ports + // NULL our task out as we have already restored all exception ports m_task.Clear(); // Clear out any notion of the process we once were @@ -1765,7 +1805,7 @@ bool MachProcess::DisableBreakpoint(nub_addr_t addr, bool remove) { break_op_size) { bool verify = false; if (bp->IsEnabled()) { - // Make sure we have the a breakpoint opcode exists at this address + // Make sure a breakpoint opcode exists at this address if (memcmp(curr_break_op, break_op, break_op_size) == 0) { break_op_found = true; // We found a valid breakpoint opcode at this address, now restore diff --git a/tools/debugserver/source/MacOSX/MachTask.mm b/tools/debugserver/source/MacOSX/MachTask.mm index 1d177bd53cb70..d05f50029f16b 100644 --- a/tools/debugserver/source/MacOSX/MachTask.mm +++ b/tools/debugserver/source/MacOSX/MachTask.mm @@ -348,23 +348,15 @@ std::string MachTask::GetProfileData(DNBProfileDataScanType scanType) { threads_used_usec); } -#if defined(HOST_VM_INFO64_COUNT) vm_statistics64_data_t vminfo; -#else - struct vm_statistics vminfo; -#endif - uint64_t physical_memory; - mach_vm_size_t rprvt = 0; - mach_vm_size_t rsize = 0; - mach_vm_size_t vprvt = 0; - mach_vm_size_t vsize = 0; - mach_vm_size_t dirty_size = 0; - mach_vm_size_t purgeable = 0; - mach_vm_size_t anonymous = 0; + uint64_t physical_memory = 0; + uint64_t anonymous = 0; + uint64_t phys_footprint = 0; + uint64_t memory_cap = 0; if (m_vm_memory.GetMemoryProfile(scanType, task, task_info, m_process->GetCPUType(), pid, vminfo, - physical_memory, rprvt, rsize, vprvt, vsize, - dirty_size, purgeable, anonymous)) { + physical_memory, anonymous, + phys_footprint, memory_cap)) { std::ostringstream profile_data_stream; if (scanType & eProfileHostCPU) { @@ -413,57 +405,28 @@ std::string MachTask::GetProfileData(DNBProfileDataScanType scanType) { profile_data_stream << "total:" << physical_memory << ';'; if (scanType & eProfileMemory) { -#if defined(HOST_VM_INFO64_COUNT) && defined(_VM_PAGE_SIZE_H_) static vm_size_t pagesize = vm_kernel_page_size; -#else - static vm_size_t pagesize; - static bool calculated = false; - if (!calculated) { - calculated = true; - pagesize = PageSize(); - } -#endif -/* Unused values. Optimized out for transfer performance. -profile_data_stream << "wired:" << vminfo.wire_count * pagesize << ';'; -profile_data_stream << "active:" << vminfo.active_count * pagesize << ';'; -profile_data_stream << "inactive:" << vminfo.inactive_count * pagesize << ';'; - */ -#if defined(HOST_VM_INFO64_COUNT) // This mimicks Activity Monitor. uint64_t total_used_count = (physical_memory / pagesize) - (vminfo.free_count - vminfo.speculative_count) - vminfo.external_page_count - vminfo.purgeable_count; -#else - uint64_t total_used_count = - vminfo.wire_count + vminfo.inactive_count + vminfo.active_count; -#endif profile_data_stream << "used:" << total_used_count * pagesize << ';'; - /* Unused values. Optimized out for transfer performance. - profile_data_stream << "free:" << vminfo.free_count * pagesize << ';'; - */ - - profile_data_stream << "rprvt:" << rprvt << ';'; - /* Unused values. Optimized out for transfer performance. - profile_data_stream << "rsize:" << rsize << ';'; - profile_data_stream << "vprvt:" << vprvt << ';'; - profile_data_stream << "vsize:" << vsize << ';'; - */ - - if (scanType & eProfileMemoryDirtyPage) - profile_data_stream << "dirty:" << dirty_size << ';'; if (scanType & eProfileMemoryAnonymous) { - profile_data_stream << "purgeable:" << purgeable << ';'; profile_data_stream << "anonymous:" << anonymous << ';'; } + + profile_data_stream << "phys_footprint:" << phys_footprint << ';'; } - -// proc_pid_rusage pm_sample_task_and_pid pm_energy_impact needs to be tested -// for weakness in Cab + + if (scanType & eProfileMemoryCap) { + profile_data_stream << "mem_cap:" << memory_cap << ';'; + } + #ifdef LLDB_ENERGY - if ((scanType & eProfileEnergy) && (pm_sample_task_and_pid != NULL)) { + if (scanType & eProfileEnergy) { struct rusage_info_v2 info; int rc = proc_pid_rusage(pid, RUSAGE_INFO_V2, (rusage_info_t *)&info); if (rc == 0) { @@ -993,8 +956,6 @@ nub_bool_t MachTask::DeallocateMemory(nub_addr_t addr) { return false; } -nub_size_t MachTask::PageSize() { return m_vm_memory.PageSize(m_task); } - void MachTask::TaskPortChanged(task_t task) { m_task = task; diff --git a/tools/debugserver/source/MacOSX/MachVMMemory.cpp b/tools/debugserver/source/MacOSX/MachVMMemory.cpp index 12f16ccb9f538..754fb82dba3ec 100644 --- a/tools/debugserver/source/MacOSX/MachVMMemory.cpp +++ b/tools/debugserver/source/MacOSX/MachVMMemory.cpp @@ -19,6 +19,12 @@ #include <mach/shared_region.h> #include <sys/sysctl.h> +#if defined(WITH_FBS) || defined(WITH_BKS) +extern "C" { +#import <sys/kern_memorystatus.h> +} +#endif + static const vm_size_t kInvalidPageSize = ~0; MachVMMemory::MachVMMemory() : m_page_size(kInvalidPageSize), m_err(0) {} @@ -99,99 +105,6 @@ nub_bool_t MachVMMemory::GetMemoryRegionInfo(task_t task, nub_addr_t address, return true; } -// For integrated graphics chip, this makes the accounting info for 'wired' -// memory more like top. -uint64_t MachVMMemory::GetStolenPages(task_t task) { - static uint64_t stolenPages = 0; - static bool calculated = false; - if (calculated) - return stolenPages; - - static int mib_reserved[CTL_MAXNAME]; - static int mib_unusable[CTL_MAXNAME]; - static int mib_other[CTL_MAXNAME]; - static size_t mib_reserved_len = 0; - static size_t mib_unusable_len = 0; - static size_t mib_other_len = 0; - int r; - - /* This can be used for testing: */ - // tsamp->pages_stolen = (256 * 1024 * 1024ULL) / tsamp->pagesize; - - if (0 == mib_reserved_len) { - mib_reserved_len = CTL_MAXNAME; - - r = sysctlnametomib("machdep.memmap.Reserved", mib_reserved, - &mib_reserved_len); - - if (-1 == r) { - mib_reserved_len = 0; - return 0; - } - - mib_unusable_len = CTL_MAXNAME; - - r = sysctlnametomib("machdep.memmap.Unusable", mib_unusable, - &mib_unusable_len); - - if (-1 == r) { - mib_reserved_len = 0; - return 0; - } - - mib_other_len = CTL_MAXNAME; - - r = sysctlnametomib("machdep.memmap.Other", mib_other, &mib_other_len); - - if (-1 == r) { - mib_reserved_len = 0; - return 0; - } - } - - if (mib_reserved_len > 0 && mib_unusable_len > 0 && mib_other_len > 0) { - uint64_t reserved = 0, unusable = 0, other = 0; - size_t reserved_len; - size_t unusable_len; - size_t other_len; - - reserved_len = sizeof(reserved); - unusable_len = sizeof(unusable); - other_len = sizeof(other); - - /* These are all declared as QUAD/uint64_t sysctls in the kernel. */ - - if (sysctl(mib_reserved, static_cast<u_int>(mib_reserved_len), &reserved, - &reserved_len, NULL, 0)) { - return 0; - } - - if (sysctl(mib_unusable, static_cast<u_int>(mib_unusable_len), &unusable, - &unusable_len, NULL, 0)) { - return 0; - } - - if (sysctl(mib_other, static_cast<u_int>(mib_other_len), &other, &other_len, - NULL, 0)) { - return 0; - } - - if (reserved_len == sizeof(reserved) && unusable_len == sizeof(unusable) && - other_len == sizeof(other)) { - uint64_t stolen = reserved + unusable + other; - uint64_t mb128 = 128 * 1024 * 1024ULL; - - if (stolen >= mb128) { - stolen = (stolen & ~((128 * 1024 * 1024ULL) - 1)); // rounding down - stolenPages = stolen / PageSize(task); - } - } - } - - calculated = true; - return stolenPages; -} - static uint64_t GetPhysicalMemory() { // This doesn't change often at all. No need to poll each time. static uint64_t physical_memory = 0; @@ -206,231 +119,45 @@ static uint64_t GetPhysicalMemory() { return physical_memory; } -// rsize and dirty_size is not adjusted for dyld shared cache and multiple -// __LINKEDIT segment, as in vmmap. In practice, dirty_size doesn't differ much -// but rsize may. There is performance penalty for the adjustment. Right now, -// only use the dirty_size. -void MachVMMemory::GetRegionSizes(task_t task, mach_vm_size_t &rsize, - mach_vm_size_t &dirty_size) { -#if defined(TASK_VM_INFO) && TASK_VM_INFO >= 22 - - task_vm_info_data_t vm_info; - mach_msg_type_number_t info_count; - kern_return_t kr; - - info_count = TASK_VM_INFO_COUNT; - kr = task_info(task, TASK_VM_INFO_PURGEABLE, (task_info_t)&vm_info, - &info_count); - if (kr == KERN_SUCCESS) - dirty_size = vm_info.internal; -#endif -} - -// Test whether the virtual address is within the architecture's shared region. -static bool InSharedRegion(mach_vm_address_t addr, cpu_type_t type) { - mach_vm_address_t base = 0, size = 0; - - switch (type) { -#if defined(CPU_TYPE_ARM64) && defined(SHARED_REGION_BASE_ARM64) - case CPU_TYPE_ARM64: - base = SHARED_REGION_BASE_ARM64; - size = SHARED_REGION_SIZE_ARM64; - break; -#endif - - case CPU_TYPE_ARM: - base = SHARED_REGION_BASE_ARM; - size = SHARED_REGION_SIZE_ARM; - break; - - case CPU_TYPE_X86_64: - base = SHARED_REGION_BASE_X86_64; - size = SHARED_REGION_SIZE_X86_64; - break; - - case CPU_TYPE_I386: - base = SHARED_REGION_BASE_I386; - size = SHARED_REGION_SIZE_I386; - break; - - default: { - // Log error abut unknown CPU type - break; - } - } - - return (addr >= base && addr < (base + size)); -} - -void MachVMMemory::GetMemorySizes(task_t task, cpu_type_t cputype, - nub_process_t pid, mach_vm_size_t &rprvt, - mach_vm_size_t &vprvt) { - // Collecting some other info cheaply but not reporting for now. - mach_vm_size_t empty = 0; - mach_vm_size_t fw_private = 0; - - mach_vm_size_t aliased = 0; - bool global_shared_text_data_mapped = false; - vm_size_t pagesize = PageSize(task); - - for (mach_vm_address_t addr = 0, size = 0;; addr += size) { - vm_region_top_info_data_t info; - mach_msg_type_number_t count = VM_REGION_TOP_INFO_COUNT; - mach_port_t object_name; - - kern_return_t kr = - mach_vm_region(task, &addr, &size, VM_REGION_TOP_INFO, - (vm_region_info_t)&info, &count, &object_name); - if (kr != KERN_SUCCESS) - break; - - if (InSharedRegion(addr, cputype)) { - // Private Shared - fw_private += info.private_pages_resident * pagesize; - - // Check if this process has the globally shared text and data regions - // mapped in. If so, set global_shared_text_data_mapped to TRUE and avoid - // checking again. - if (global_shared_text_data_mapped == FALSE && - info.share_mode == SM_EMPTY) { - vm_region_basic_info_data_64_t b_info; - mach_vm_address_t b_addr = addr; - mach_vm_size_t b_size = size; - count = VM_REGION_BASIC_INFO_COUNT_64; - - kr = mach_vm_region(task, &b_addr, &b_size, VM_REGION_BASIC_INFO, - (vm_region_info_t)&b_info, &count, &object_name); - if (kr != KERN_SUCCESS) - break; - - if (b_info.reserved) { - global_shared_text_data_mapped = TRUE; - } - } - - // Short circuit the loop if this isn't a shared private region, since - // that's the only region type we care about within the current address - // range. - if (info.share_mode != SM_PRIVATE) { - continue; - } - } - - // Update counters according to the region type. - if (info.share_mode == SM_COW && info.ref_count == 1) { - // Treat single reference SM_COW as SM_PRIVATE - info.share_mode = SM_PRIVATE; - } - - switch (info.share_mode) { - case SM_LARGE_PAGE: - // Treat SM_LARGE_PAGE the same as SM_PRIVATE - // since they are not shareable and are wired. - case SM_PRIVATE: - rprvt += info.private_pages_resident * pagesize; - rprvt += info.shared_pages_resident * pagesize; - vprvt += size; - break; - - case SM_EMPTY: - empty += size; - break; - - case SM_COW: - case SM_SHARED: { - if (pid == 0) { - // Treat kernel_task specially - if (info.share_mode == SM_COW) { - rprvt += info.private_pages_resident * pagesize; - vprvt += size; - } - break; - } - - if (info.share_mode == SM_COW) { - rprvt += info.private_pages_resident * pagesize; - vprvt += info.private_pages_resident * pagesize; - } - break; - } - default: - // log that something is really bad. - break; - } - } - - rprvt += aliased; -} - -static void GetPurgeableAndAnonymous(task_t task, uint64_t &purgeable, - uint64_t &anonymous) { -#if defined(TASK_VM_INFO) && TASK_VM_INFO >= 22 - - kern_return_t kr; - mach_msg_type_number_t info_count; - task_vm_info_data_t vm_info; - - info_count = TASK_VM_INFO_COUNT; - kr = task_info(task, TASK_VM_INFO_PURGEABLE, (task_info_t)&vm_info, - &info_count); - if (kr == KERN_SUCCESS) { - purgeable = vm_info.purgeable_volatile_resident; - anonymous = - vm_info.internal + vm_info.compressed - vm_info.purgeable_volatile_pmap; - } - -#endif -} - -#if defined(HOST_VM_INFO64_COUNT) nub_bool_t MachVMMemory::GetMemoryProfile( DNBProfileDataScanType scanType, task_t task, struct task_basic_info ti, cpu_type_t cputype, nub_process_t pid, vm_statistics64_data_t &vminfo, - uint64_t &physical_memory, mach_vm_size_t &rprvt, mach_vm_size_t &rsize, - mach_vm_size_t &vprvt, mach_vm_size_t &vsize, mach_vm_size_t &dirty_size, - mach_vm_size_t &purgeable, mach_vm_size_t &anonymous) -#else -nub_bool_t MachVMMemory::GetMemoryProfile( - DNBProfileDataScanType scanType, task_t task, struct task_basic_info ti, - cpu_type_t cputype, nub_process_t pid, vm_statistics_data_t &vminfo, - uint64_t &physical_memory, mach_vm_size_t &rprvt, mach_vm_size_t &rsize, - mach_vm_size_t &vprvt, mach_vm_size_t &vsize, mach_vm_size_t &dirty_size, - mach_vm_size_t &purgeable, mach_vm_size_t &anonymous) -#endif + uint64_t &physical_memory, uint64_t &anonymous, + uint64_t &phys_footprint, uint64_t &memory_cap) { if (scanType & eProfileHostMemory) physical_memory = GetPhysicalMemory(); if (scanType & eProfileMemory) { static mach_port_t localHost = mach_host_self(); -#if defined(HOST_VM_INFO64_COUNT) mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; host_statistics64(localHost, HOST_VM_INFO64, (host_info64_t)&vminfo, &count); -#else - mach_msg_type_number_t count = HOST_VM_INFO_COUNT; - host_statistics(localHost, HOST_VM_INFO, (host_info_t)&vminfo, &count); - vminfo.wire_count += GetStolenPages(task); -#endif + + kern_return_t kr; + mach_msg_type_number_t info_count; + task_vm_info_data_t vm_info; + + info_count = TASK_VM_INFO_COUNT; + kr = task_info(task, TASK_VM_INFO_PURGEABLE, (task_info_t)&vm_info, &info_count); + if (kr == KERN_SUCCESS) { + if (scanType & eProfileMemoryAnonymous) { + anonymous = vm_info.internal + vm_info.compressed - vm_info.purgeable_volatile_pmap; + } - /* We are no longer reporting these. Let's not waste time. - GetMemorySizes(task, cputype, pid, rprvt, vprvt); - rsize = ti.resident_size; - vsize = ti.virtual_size; - - if (scanType & eProfileMemoryDirtyPage) - { - // This uses vmmap strategy. We don't use the returned rsize for now. We - prefer to match top's version since that's what we do for the rest of the - metrics. - GetRegionSizes(task, rsize, dirty_size); + phys_footprint = vm_info.phys_footprint; } - */ + } - if (scanType & eProfileMemoryAnonymous) { - GetPurgeableAndAnonymous(task, purgeable, anonymous); +#if defined(WITH_FBS) || defined(WITH_BKS) + if (scanType & eProfileMemoryCap) { + memorystatus_memlimit_properties_t memlimit_properties; + memset(&memlimit_properties, 0, sizeof(memlimit_properties)); + if (memorystatus_control(MEMORYSTATUS_CMD_GET_MEMLIMIT_PROPERTIES, pid, 0, &memlimit_properties, sizeof(memlimit_properties)) == 0) { + memory_cap = memlimit_properties.memlimit_active; } } +#endif return true; } diff --git a/tools/debugserver/source/MacOSX/MachVMMemory.h b/tools/debugserver/source/MacOSX/MachVMMemory.h index c4d3f5331374a..513b69ee709fc 100644 --- a/tools/debugserver/source/MacOSX/MachVMMemory.h +++ b/tools/debugserver/source/MacOSX/MachVMMemory.h @@ -29,35 +29,15 @@ public: nub_size_t PageSize(task_t task); nub_bool_t GetMemoryRegionInfo(task_t task, nub_addr_t address, DNBRegionInfo *region_info); -#if defined(HOST_VM_INFO64_COUNT) nub_bool_t GetMemoryProfile(DNBProfileDataScanType scanType, task_t task, struct task_basic_info ti, cpu_type_t cputype, nub_process_t pid, vm_statistics64_data_t &vminfo, - uint64_t &physical_memory, mach_vm_size_t &rprvt, - mach_vm_size_t &rsize, mach_vm_size_t &vprvt, - mach_vm_size_t &vsize, mach_vm_size_t &dirty_size, - mach_vm_size_t &purgeable, - mach_vm_size_t &anonymous); -#else - nub_bool_t GetMemoryProfile(DNBProfileDataScanType scanType, task_t task, - struct task_basic_info ti, cpu_type_t cputype, - nub_process_t pid, vm_statistics_data_t &vminfo, - uint64_t &physical_memory, mach_vm_size_t &rprvt, - mach_vm_size_t &rsize, mach_vm_size_t &vprvt, - mach_vm_size_t &vsize, mach_vm_size_t &dirty_size, - mach_vm_size_t &purgeable, - mach_vm_size_t &anonymous); -#endif + uint64_t &physical_memory, uint64_t &anonymous, + uint64_t &phys_footprint, uint64_t &memory_cap); protected: nub_size_t MaxBytesLeftInPage(task_t task, nub_addr_t addr, nub_size_t count); - uint64_t GetStolenPages(task_t task); - void GetRegionSizes(task_t task, mach_vm_size_t &rsize, - mach_vm_size_t &dirty_size); - void GetMemorySizes(task_t task, cpu_type_t cputype, nub_process_t pid, - mach_vm_size_t &rprvt, mach_vm_size_t &vprvt); - nub_size_t WriteRegion(task_t task, const nub_addr_t address, const void *data, const nub_size_t data_count); diff --git a/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp b/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp index e0e8e27a1c2da..adcd650021919 100644 --- a/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp +++ b/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.cpp @@ -21,7 +21,7 @@ #include "MachThread.h" extern "C" bool CPUHasAVX(); // Defined over in DNBArchImplX86_64.cpp - +extern "C" bool CPUHasAVX512f(); // Defined over in DNBArchImplX86_64.cpp #if defined(LLDB_DEBUGSERVER_RELEASE) || defined(LLDB_DEBUGSERVER_DEBUG) enum debugState { debugStateUnknown, debugStateOff, debugStateOn }; @@ -132,6 +132,22 @@ enum { fpu_ymm5, fpu_ymm6, fpu_ymm7, + fpu_k0, + fpu_k1, + fpu_k2, + fpu_k3, + fpu_k4, + fpu_k5, + fpu_k6, + fpu_k7, + fpu_zmm0, + fpu_zmm1, + fpu_zmm2, + fpu_zmm3, + fpu_zmm4, + fpu_zmm5, + fpu_zmm6, + fpu_zmm7, k_num_fpu_regs, // Aliases @@ -205,6 +221,22 @@ enum { dwarf_ymm5 = dwarf_xmm5, dwarf_ymm6 = dwarf_xmm6, dwarf_ymm7 = dwarf_xmm7, + dwarf_zmm0 = dwarf_xmm0, + dwarf_zmm1 = dwarf_xmm1, + dwarf_zmm2 = dwarf_xmm2, + dwarf_zmm3 = dwarf_xmm3, + dwarf_zmm4 = dwarf_xmm4, + dwarf_zmm5 = dwarf_xmm5, + dwarf_zmm6 = dwarf_xmm6, + dwarf_zmm7 = dwarf_xmm7, + dwarf_k0 = 118, + dwarf_k1, + dwarf_k2, + dwarf_k3, + dwarf_k4, + dwarf_k5, + dwarf_k6, + dwarf_k7, }; enum { @@ -271,7 +303,23 @@ enum { debugserver_ymm4 = debugserver_xmm4, debugserver_ymm5 = debugserver_xmm5, debugserver_ymm6 = debugserver_xmm6, - debugserver_ymm7 = debugserver_xmm7 + debugserver_ymm7 = debugserver_xmm7, + debugserver_zmm0 = debugserver_xmm0, + debugserver_zmm1 = debugserver_xmm1, + debugserver_zmm2 = debugserver_xmm2, + debugserver_zmm3 = debugserver_xmm3, + debugserver_zmm4 = debugserver_xmm4, + debugserver_zmm5 = debugserver_xmm5, + debugserver_zmm6 = debugserver_xmm6, + debugserver_zmm7 = debugserver_xmm7, + debugserver_k0 = 118, + debugserver_k1 = 119, + debugserver_k2 = 120, + debugserver_k3 = 121, + debugserver_k4 = 122, + debugserver_k5 = 123, + debugserver_k6 = 124, + debugserver_k7 = 125, }; uint64_t DNBArchImplI386::GetPC(uint64_t failValue) { @@ -390,7 +438,8 @@ kern_return_t DNBArchImplI386::GetFPUState(bool force) { m_state.context.fpu.no_avx.__fpu_reserved1 = -1; if (CPUHasAVX() || FORCE_AVX_REGS) { - for (int i = 0; i < sizeof(m_state.context.fpu.avx.__avx_reserved1); ++i) + for (int i = 0; i < sizeof(m_state.context.fpu.avx.__avx_reserved1); + ++i) m_state.context.fpu.avx.__avx_reserved1[i] = INT8_MIN; for (int i = 0; i < 16; ++i) { @@ -404,12 +453,54 @@ kern_return_t DNBArchImplI386::GetFPUState(bool force) { m_state.context.fpu.avx.__fpu_ymmh7.__xmm_reg[i] = '7'; } } + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (int i = 0; i < 8; ++i) { + m_state.context.fpu.avx512f.__fpu_k0.__opmask_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_k1.__opmask_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_k2.__opmask_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_k3.__opmask_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_k4.__opmask_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_k5.__opmask_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_k6.__opmask_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_k7.__opmask_reg[i] = '7'; + } + + for (int i = 0; i < 32; ++i) { + m_state.context.fpu.avx512f.__fpu_zmmh0.__ymm_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_zmmh1.__ymm_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_zmmh2.__ymm_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_zmmh3.__ymm_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_zmmh4.__ymm_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_zmmh5.__ymm_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_zmmh6.__ymm_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_zmmh7.__ymm_reg[i] = '7'; + } + } m_state.SetError(e_regSetFPU, Read, 0); } else { mach_msg_type_number_t count = e_regSetWordSizeFPU; int flavor = __i386_FLOAT_STATE; - if (CPUHasAVX() || FORCE_AVX_REGS) { + // On a machine with the AVX512 register set, a process only gets a + // full AVX512 register context after it uses the AVX512 registers; + // if the process has not yet triggered this change, trying to fetch + // the AVX512 registers will fail. Fall through to fetching the AVX + // registers. + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + count = e_regSetWordSizeAVX512f; + flavor = __i386_AVX512F_STATE; + m_state.SetError(e_regSetFPU, Read, + ::thread_get_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, + &count)); + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state (0x%4.4x, %u, &fpu, %u => 0x%8.8x", + m_thread->MachPortNumber(), flavor, (uint32_t)count, + m_state.GetError(e_regSetFPU, Read)); + if (m_state.GetError(e_regSetFPU, Read) == KERN_SUCCESS) + return m_state.GetError(e_regSetFPU, Read); + } + if (CPUHasAVX()) { count = e_regSetWordSizeAVX; flavor = __i386_AVX_STATE; } @@ -457,18 +548,21 @@ kern_return_t DNBArchImplI386::SetFPUState() { m_state.SetError(e_regSetFPU, Write, 0); return m_state.GetError(e_regSetFPU, Write); } else { - if (CPUHasAVX() || FORCE_AVX_REGS) - m_state.SetError( - e_regSetFPU, Write, - ::thread_set_state(m_thread->MachPortNumber(), __i386_AVX_STATE, - (thread_state_t)&m_state.context.fpu.avx, - e_regSetWordSizeAVX)); - else - m_state.SetError( - e_regSetFPU, Write, - ::thread_set_state(m_thread->MachPortNumber(), __i386_FLOAT_STATE, - (thread_state_t)&m_state.context.fpu.no_avx, - e_regSetWordSizeFPU)); + int flavor = __i386_FLOAT_STATE; + mach_msg_type_number_t count = e_regSetWordSizeFPU; + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + flavor = __i386_AVX512F_STATE; + count = e_regSetWordSizeAVX512f; + } else + if (CPUHasAVX()) { + flavor = __i386_AVX_STATE; + count = e_regSetWordSizeAVX; + } + + m_state.SetError(e_regSetFPU, Write, + ::thread_set_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, + count)); return m_state.GetError(e_regSetFPU, Write); } } @@ -965,6 +1059,9 @@ kern_return_t DNBArchImplI386::EnableHardwareSingleStep(bool enable) { #define AVX_OFFSET(reg) \ (offsetof(DNBArchImplI386::AVX, __fpu_##reg) + \ offsetof(DNBArchImplI386::Context, fpu.avx)) +#define AVX512F_OFFSET(reg) \ + (offsetof(DNBArchImplI386::AVX512F, __fpu_##reg) + \ + offsetof(DNBArchImplI386::Context, fpu.avx512f)) #define EXC_OFFSET(reg) \ (offsetof(DNBArchImplI386::EXC, __##reg) + \ offsetof(DNBArchImplI386::Context, exc)) @@ -976,6 +1073,7 @@ kern_return_t DNBArchImplI386::EnableHardwareSingleStep(bool enable) { #define FPU_SIZE_XMM(reg) \ (sizeof(((DNBArchImplI386::FPU *)NULL)->__fpu_##reg.__xmm_reg)) #define FPU_SIZE_YMM(reg) (32) +#define FPU_SIZE_ZMM(reg) (64) #define EXC_SIZE(reg) (sizeof(((DNBArchImplI386::EXC *)NULL)->__##reg)) // This does not accurately identify the location of ymm0...7 in @@ -985,6 +1083,9 @@ kern_return_t DNBArchImplI386::EnableHardwareSingleStep(bool enable) { // -- not to interpret the thread_get_state info. #define AVX_OFFSET_YMM(n) (AVX_OFFSET(xmm7) + FPU_SIZE_XMM(xmm7) + (32 * n)) +// TODO: Test this and come back. +#define AVX512F_OFFSET_ZMM(n) (AVX_OFFSET_YMM(7) + FPU_SIZE_XMM(xmm7) + (64 * n)) + // These macros will auto define the register name, alt name, register size, // register offset, encoding, format and native register. This ensures that // the register state structures are defined correctly and have the correct @@ -1279,6 +1380,141 @@ const DNBRegisterInfo DNBArchImplI386::g_fpu_registers_avx[] = { }; + +#define STR(s) #s + +#define ZMM_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_zmm##reg, STR(zmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_ZMM(zmm##reg), AVX512F_OFFSET_ZMM(reg), INVALID_NUB_REGNUM, \ + dwarf_zmm##reg, INVALID_NUB_REGNUM, debugserver_zmm##reg, NULL, NULL \ + } + +#define YMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_ymm##reg, STR(ymm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_YMM(ymm##reg), 0, INVALID_NUB_REGNUM, dwarf_ymm##reg, \ + INVALID_NUB_REGNUM, debugserver_ymm##reg, g_contained_zmm##reg, NULL \ + } + +#define XMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_xmm##reg, STR(xmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_XMM(xmm##reg), 0, INVALID_NUB_REGNUM, dwarf_xmm##reg, \ + INVALID_NUB_REGNUM, debugserver_xmm##reg, g_contained_zmm##reg, NULL \ + } + +#define AVX512_K_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_k##reg, STR(k##reg), NULL, Vector, VectorOfUInt8, 8, \ + AVX512F_OFFSET(k##reg), dwarf_k##reg, dwarf_k##reg, -1U, \ + debugserver_k##reg, NULL, NULL \ + } + +static const char *g_contained_zmm0[] = {"zmm0", NULL}; +static const char *g_contained_zmm1[] = {"zmm1", NULL}; +static const char *g_contained_zmm2[] = {"zmm2", NULL}; +static const char *g_contained_zmm3[] = {"zmm3", NULL}; +static const char *g_contained_zmm4[] = {"zmm4", NULL}; +static const char *g_contained_zmm5[] = {"zmm5", NULL}; +static const char *g_contained_zmm6[] = {"zmm6", NULL}; +static const char *g_contained_zmm7[] = {"zmm7", NULL}; + +const DNBRegisterInfo DNBArchImplI386::g_fpu_registers_avx512f[] = { + {e_regSetFPU, fpu_fcw, "fctrl", NULL, Uint, Hex, FPU_SIZE_UINT(fcw), + AVX_OFFSET(fcw), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_fsw, "fstat", NULL, Uint, Hex, FPU_SIZE_UINT(fsw), + AVX_OFFSET(fsw), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_ftw, "ftag", NULL, Uint, Hex, 2 /* sizeof __fpu_ftw + sizeof __fpu_rsrv1 */, + FPU_OFFSET(ftw), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_fop, "fop", NULL, Uint, Hex, FPU_SIZE_UINT(fop), + AVX_OFFSET(fop), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_ip, "fioff", NULL, Uint, Hex, FPU_SIZE_UINT(ip), + AVX_OFFSET(ip), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_cs, "fiseg", NULL, Uint, Hex, FPU_SIZE_UINT(cs), + AVX_OFFSET(cs), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_dp, "fooff", NULL, Uint, Hex, FPU_SIZE_UINT(dp), + AVX_OFFSET(dp), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_ds, "foseg", NULL, Uint, Hex, FPU_SIZE_UINT(ds), + AVX_OFFSET(ds), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_mxcsr, "mxcsr", NULL, Uint, Hex, FPU_SIZE_UINT(mxcsr), + AVX_OFFSET(mxcsr), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + {e_regSetFPU, fpu_mxcsrmask, "mxcsrmask", NULL, Uint, Hex, + FPU_SIZE_UINT(mxcsrmask), AVX_OFFSET(mxcsrmask), INVALID_NUB_REGNUM, + INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, NULL, NULL}, + + {e_regSetFPU, fpu_stmm0, "stmm0", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm0), AVX_OFFSET(stmm0), INVALID_NUB_REGNUM, dwarf_stmm0, + INVALID_NUB_REGNUM, debugserver_stmm0, NULL, NULL}, + {e_regSetFPU, fpu_stmm1, "stmm1", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm1), AVX_OFFSET(stmm1), INVALID_NUB_REGNUM, dwarf_stmm1, + INVALID_NUB_REGNUM, debugserver_stmm1, NULL, NULL}, + {e_regSetFPU, fpu_stmm2, "stmm2", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm2), AVX_OFFSET(stmm2), INVALID_NUB_REGNUM, dwarf_stmm2, + INVALID_NUB_REGNUM, debugserver_stmm2, NULL, NULL}, + {e_regSetFPU, fpu_stmm3, "stmm3", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm3), AVX_OFFSET(stmm3), INVALID_NUB_REGNUM, dwarf_stmm3, + INVALID_NUB_REGNUM, debugserver_stmm3, NULL, NULL}, + {e_regSetFPU, fpu_stmm4, "stmm4", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm4), AVX_OFFSET(stmm4), INVALID_NUB_REGNUM, dwarf_stmm4, + INVALID_NUB_REGNUM, debugserver_stmm4, NULL, NULL}, + {e_regSetFPU, fpu_stmm5, "stmm5", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm5), AVX_OFFSET(stmm5), INVALID_NUB_REGNUM, dwarf_stmm5, + INVALID_NUB_REGNUM, debugserver_stmm5, NULL, NULL}, + {e_regSetFPU, fpu_stmm6, "stmm6", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm6), AVX_OFFSET(stmm6), INVALID_NUB_REGNUM, dwarf_stmm6, + INVALID_NUB_REGNUM, debugserver_stmm6, NULL, NULL}, + {e_regSetFPU, fpu_stmm7, "stmm7", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm7), AVX_OFFSET(stmm7), INVALID_NUB_REGNUM, dwarf_stmm7, + INVALID_NUB_REGNUM, debugserver_stmm7, NULL, NULL}, + + AVX512_K_REG_DEF(0), + AVX512_K_REG_DEF(1), + AVX512_K_REG_DEF(2), + AVX512_K_REG_DEF(3), + AVX512_K_REG_DEF(4), + AVX512_K_REG_DEF(5), + AVX512_K_REG_DEF(6), + AVX512_K_REG_DEF(7), + + ZMM_REG_DEF(0), + ZMM_REG_DEF(1), + ZMM_REG_DEF(2), + ZMM_REG_DEF(3), + ZMM_REG_DEF(4), + ZMM_REG_DEF(5), + ZMM_REG_DEF(6), + ZMM_REG_DEF(7), + + YMM_REG_ALIAS(0), + YMM_REG_ALIAS(1), + YMM_REG_ALIAS(2), + YMM_REG_ALIAS(3), + YMM_REG_ALIAS(4), + YMM_REG_ALIAS(5), + YMM_REG_ALIAS(6), + YMM_REG_ALIAS(7), + + XMM_REG_ALIAS(0), + XMM_REG_ALIAS(1), + XMM_REG_ALIAS(2), + XMM_REG_ALIAS(3), + XMM_REG_ALIAS(4), + XMM_REG_ALIAS(5), + XMM_REG_ALIAS(6), + XMM_REG_ALIAS(7) + +}; + const DNBRegisterInfo DNBArchImplI386::g_exc_registers[] = { {e_regSetEXC, exc_trapno, "trapno", NULL, Uint, Hex, EXC_SIZE(trapno), EXC_OFFSET(trapno), INVALID_NUB_REGNUM, INVALID_NUB_REGNUM, @@ -1297,12 +1533,16 @@ const size_t DNBArchImplI386::k_num_fpu_registers_no_avx = sizeof(g_fpu_registers_no_avx) / sizeof(DNBRegisterInfo); const size_t DNBArchImplI386::k_num_fpu_registers_avx = sizeof(g_fpu_registers_avx) / sizeof(DNBRegisterInfo); +const size_t DNBArchImplI386::k_num_fpu_registers_avx512f = + sizeof(g_fpu_registers_avx512f) / sizeof(DNBRegisterInfo); const size_t DNBArchImplI386::k_num_exc_registers = sizeof(g_exc_registers) / sizeof(DNBRegisterInfo); const size_t DNBArchImplI386::k_num_all_registers_no_avx = k_num_gpr_registers + k_num_fpu_registers_no_avx + k_num_exc_registers; const size_t DNBArchImplI386::k_num_all_registers_avx = k_num_gpr_registers + k_num_fpu_registers_avx + k_num_exc_registers; +const size_t DNBArchImplI386::k_num_all_registers_avx512f = + k_num_gpr_registers + k_num_fpu_registers_avx512f + k_num_exc_registers; //---------------------------------------------------------------------- // Register set definitions. The first definitions at register set index @@ -1322,9 +1562,16 @@ const DNBRegisterSetInfo DNBArchImplI386::g_reg_sets_avx[] = { {"Floating Point Registers", g_fpu_registers_avx, k_num_fpu_registers_avx}, {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; +const DNBRegisterSetInfo DNBArchImplI386::g_reg_sets_avx512f[] = { + {"i386 Registers", NULL, k_num_all_registers_avx512f}, + {"General Purpose Registers", g_gpr_registers, k_num_gpr_registers}, + {"Floating Point Registers", g_fpu_registers_avx512f, + k_num_fpu_registers_avx512f}, + {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; + // Total number of register sets for this architecture const size_t DNBArchImplI386::k_num_register_sets = - sizeof(g_reg_sets_no_avx) / sizeof(DNBRegisterSetInfo); + sizeof(g_reg_sets_avx) / sizeof(DNBRegisterSetInfo); DNBArchProtocol *DNBArchImplI386::Create(MachThread *thread) { DNBArchImplI386 *obj = new DNBArchImplI386(thread); @@ -1341,7 +1588,9 @@ const uint8_t *DNBArchImplI386::SoftwareBreakpointOpcode(nub_size_t byte_size) { const DNBRegisterSetInfo * DNBArchImplI386::GetRegisterSetInfo(nub_size_t *num_reg_sets) { *num_reg_sets = k_num_register_sets; - if (CPUHasAVX() || FORCE_AVX_REGS) + if (CPUHasAVX512f() || FORCE_AVX_REGS) + return g_reg_sets_avx512f; + if (CPUHasAVX()) return g_reg_sets_avx; else return g_reg_sets_no_avx; @@ -1404,6 +1653,8 @@ bool DNBArchImplI386::GetRegisterValue(uint32_t set, uint32_t reg, case e_regSetFPU: if (reg > fpu_xmm7 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm7 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; switch (reg) { case fpu_fcw: value->value.uint16 = @@ -1534,6 +1785,33 @@ bool DNBArchImplI386::GetRegisterValue(uint32_t set, uint32_t reg, MEMCPY_YMM(7); return true; #undef MEMCPY_YMM + + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy((&value->value.uint8), + &m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + memcpy(&value->value.uint8, + &m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), 16); + memcpy(&value->value.uint8 + 16, + &m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), 16); + memcpy(&value->value.uint8 + 32, + &m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), 32); + return true; } break; @@ -1595,6 +1873,8 @@ bool DNBArchImplI386::SetRegisterValue(uint32_t set, uint32_t reg, case e_regSetFPU: if (reg > fpu_xmm7 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm7 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; switch (reg) { case fpu_fcw: *((uint16_t *)(&m_state.context.fpu.no_avx.__fpu_fcw)) = @@ -1751,6 +2031,33 @@ bool DNBArchImplI386::SetRegisterValue(uint32_t set, uint32_t reg, MEMCPY_YMM(7); return true; #undef MEMCPY_YMM + + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), + &value->value.uint8, 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + memcpy(&m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), + &value->value.uint8, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 16, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 32, 32); + return true; } break; @@ -1771,7 +2078,13 @@ bool DNBArchImplI386::SetRegisterValue(uint32_t set, uint32_t reg, uint32_t DNBArchImplI386::GetRegisterContextSize() { static uint32_t g_cached_size = 0; if (g_cached_size == 0) { - if (CPUHasAVX() || FORCE_AVX_REGS) { + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < k_num_fpu_registers_avx512f; ++i) { + if (g_fpu_registers_avx512f[i].value_regs == NULL) + g_cached_size += g_fpu_registers_avx512f[i].size; + } + } else + if (CPUHasAVX()) { for (size_t i = 0; i < k_num_fpu_registers_avx; ++i) { if (g_fpu_registers_avx[i].value_regs == NULL) g_cached_size += g_fpu_registers_avx[i].size; @@ -1844,6 +2157,13 @@ nub_size_t DNBArchImplI386::GetRegisterContext(void *buf, nub_size_t buf_len) { p += 10; } + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_k0 + i, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 8; ++i) { @@ -1852,6 +2172,12 @@ nub_size_t DNBArchImplI386::GetRegisterContext(void *buf, nub_size_t buf_len) { memcpy(p, &m_state.context.fpu.avx.__fpu_ymmh0 + i, 16); p += 16; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_zmmh0 + i, 32); + p += 32; + } + } } else { // Copy the XMM registers in a single block memcpy(p, &m_state.context.fpu.no_avx.__fpu_xmm0, 8 * 16); @@ -1908,6 +2234,13 @@ nub_size_t DNBArchImplI386::SetRegisterContext(const void *buf, p += 10; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + i, p, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 8; ++i) { @@ -1916,6 +2249,13 @@ nub_size_t DNBArchImplI386::SetRegisterContext(const void *buf, memcpy(&m_state.context.fpu.avx.__fpu_ymmh0 + i, p, 16); p += 16; } + + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + i, p, 32); + p += 32; + } + } } else { // Copy the XMM registers in a single block memcpy(&m_state.context.fpu.no_avx.__fpu_xmm0, p, 8 * 16); diff --git a/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h b/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h index 2e9542b051c66..ce56a540e0924 100644 --- a/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h +++ b/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h @@ -83,6 +83,12 @@ protected: static const size_t k_num_all_registers_avx; static const size_t k_num_register_sets; + typedef __i386_avx512f_state_t AVX512F; + static const DNBRegisterInfo g_fpu_registers_avx512f[]; + static const DNBRegisterSetInfo g_reg_sets_avx512f[]; + static const size_t k_num_fpu_registers_avx512f; + static const size_t k_num_all_registers_avx512f; + typedef enum RegisterSetTag { e_regSetALL = REGISTER_SET_ALL, e_regSetGPR, @@ -97,6 +103,7 @@ protected: e_regSetWordSizeFPU = sizeof(FPU) / sizeof(int), e_regSetWordSizeEXC = sizeof(EXC) / sizeof(int), e_regSetWordSizeAVX = sizeof(AVX) / sizeof(int), + e_regSetWordSizeAVX512f = sizeof(AVX512F) / sizeof(int), e_regSetWordSizeDBG = sizeof(DBG) / sizeof(int) } RegisterSetWordSize; @@ -107,6 +114,7 @@ protected: union { FPU no_avx; AVX avx; + AVX512F avx512f; } fpu; EXC exc; DBG dbg; diff --git a/tools/debugserver/source/MacOSX/i386/MachRegisterStatesI386.h b/tools/debugserver/source/MacOSX/i386/MachRegisterStatesI386.h index 900aa15a75d32..e51ecfd24befc 100644 --- a/tools/debugserver/source/MacOSX/i386/MachRegisterStatesI386.h +++ b/tools/debugserver/source/MacOSX/i386/MachRegisterStatesI386.h @@ -21,6 +21,7 @@ #define __i386_EXCEPTION_STATE 3 #define __i386_DEBUG_STATE 10 #define __i386_AVX_STATE 16 +#define __i386_AVX512F_STATE 19 typedef struct { uint32_t __eax; @@ -158,6 +159,69 @@ typedef struct { __i386_xmm_reg __fpu_ymmh7; } __i386_avx_state_t; +typedef struct { uint8_t __ymm_reg[32]; } __i386_ymm_reg; +typedef struct { uint8_t __opmask_reg[8]; } __i386_opmask_reg; + +typedef struct { + uint32_t __fpu_reserved[2]; + __i386_fp_control_t __fpu_fcw; + __i386_fp_status_t __fpu_fsw; + uint8_t __fpu_ftw; + uint8_t __fpu_rsrv1; + uint16_t __fpu_fop; + uint32_t __fpu_ip; + uint16_t __fpu_cs; + uint16_t __fpu_rsrv2; + uint32_t __fpu_dp; + uint16_t __fpu_ds; + uint16_t __fpu_rsrv3; + uint32_t __fpu_mxcsr; + uint32_t __fpu_mxcsrmask; + __i386_mmst_reg __fpu_stmm0; + __i386_mmst_reg __fpu_stmm1; + __i386_mmst_reg __fpu_stmm2; + __i386_mmst_reg __fpu_stmm3; + __i386_mmst_reg __fpu_stmm4; + __i386_mmst_reg __fpu_stmm5; + __i386_mmst_reg __fpu_stmm6; + __i386_mmst_reg __fpu_stmm7; + __i386_xmm_reg __fpu_xmm0; + __i386_xmm_reg __fpu_xmm1; + __i386_xmm_reg __fpu_xmm2; + __i386_xmm_reg __fpu_xmm3; + __i386_xmm_reg __fpu_xmm4; + __i386_xmm_reg __fpu_xmm5; + __i386_xmm_reg __fpu_xmm6; + __i386_xmm_reg __fpu_xmm7; + uint8_t __fpu_rsrv4[14 * 16]; + uint32_t __fpu_reserved1; + uint8_t __avx_reserved1[64]; + __i386_xmm_reg __fpu_ymmh0; + __i386_xmm_reg __fpu_ymmh1; + __i386_xmm_reg __fpu_ymmh2; + __i386_xmm_reg __fpu_ymmh3; + __i386_xmm_reg __fpu_ymmh4; + __i386_xmm_reg __fpu_ymmh5; + __i386_xmm_reg __fpu_ymmh6; + __i386_xmm_reg __fpu_ymmh7; + __i386_opmask_reg __fpu_k0; + __i386_opmask_reg __fpu_k1; + __i386_opmask_reg __fpu_k2; + __i386_opmask_reg __fpu_k3; + __i386_opmask_reg __fpu_k4; + __i386_opmask_reg __fpu_k5; + __i386_opmask_reg __fpu_k6; + __i386_opmask_reg __fpu_k7; + __i386_ymm_reg __fpu_zmmh0; + __i386_ymm_reg __fpu_zmmh1; + __i386_ymm_reg __fpu_zmmh2; + __i386_ymm_reg __fpu_zmmh3; + __i386_ymm_reg __fpu_zmmh4; + __i386_ymm_reg __fpu_zmmh5; + __i386_ymm_reg __fpu_zmmh6; + __i386_ymm_reg __fpu_zmmh7; +} __i386_avx512f_state_t; + typedef struct { uint32_t __trapno; uint32_t __err; diff --git a/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp b/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp index 86843fd97c065..f0a3d2b001b2b 100644 --- a/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp +++ b/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.cpp @@ -119,6 +119,17 @@ extern "C" bool CPUHasAVX() { return LogAVXAndReturn(g_has_avx, err, buffer); } +extern "C" bool CPUHasAVX512f() { + static AVXPresence g_has_avx512f = eAVXUnknown; + if (g_has_avx512f != eAVXUnknown) + return g_has_avx512f == eAVXPresent; + + g_has_avx512f = DetectHardwareFeature("hw.optional.avx512f") ? eAVXPresent + : eAVXNotPresent; + + return (g_has_avx512f == eAVXPresent); +} + uint64_t DNBArchImplX86_64::GetPC(uint64_t failValue) { // Get program counter if (GetGPRState(false) == KERN_SUCCESS) @@ -338,10 +349,82 @@ kern_return_t DNBArchImplX86_64::GetFPUState(bool force) { for (int i = 0; i < sizeof(m_state.context.fpu.avx.__avx_reserved1); ++i) m_state.context.fpu.avx.__avx_reserved1[i] = INT8_MIN; } + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (int i = 0; i < 8; ++i) { + m_state.context.fpu.avx512f.__fpu_k0.__opmask_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_k1.__opmask_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_k2.__opmask_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_k3.__opmask_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_k4.__opmask_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_k5.__opmask_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_k6.__opmask_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_k7.__opmask_reg[i] = '7'; + } + + for (int i = 0; i < 32; ++i) { + m_state.context.fpu.avx512f.__fpu_zmmh0.__ymm_reg[i] = '0'; + m_state.context.fpu.avx512f.__fpu_zmmh1.__ymm_reg[i] = '1'; + m_state.context.fpu.avx512f.__fpu_zmmh2.__ymm_reg[i] = '2'; + m_state.context.fpu.avx512f.__fpu_zmmh3.__ymm_reg[i] = '3'; + m_state.context.fpu.avx512f.__fpu_zmmh4.__ymm_reg[i] = '4'; + m_state.context.fpu.avx512f.__fpu_zmmh5.__ymm_reg[i] = '5'; + m_state.context.fpu.avx512f.__fpu_zmmh6.__ymm_reg[i] = '6'; + m_state.context.fpu.avx512f.__fpu_zmmh7.__ymm_reg[i] = '7'; + m_state.context.fpu.avx512f.__fpu_zmmh8.__ymm_reg[i] = '8'; + m_state.context.fpu.avx512f.__fpu_zmmh9.__ymm_reg[i] = '9'; + m_state.context.fpu.avx512f.__fpu_zmmh10.__ymm_reg[i] = 'A'; + m_state.context.fpu.avx512f.__fpu_zmmh11.__ymm_reg[i] = 'B'; + m_state.context.fpu.avx512f.__fpu_zmmh12.__ymm_reg[i] = 'C'; + m_state.context.fpu.avx512f.__fpu_zmmh13.__ymm_reg[i] = 'D'; + m_state.context.fpu.avx512f.__fpu_zmmh14.__ymm_reg[i] = 'E'; + m_state.context.fpu.avx512f.__fpu_zmmh15.__ymm_reg[i] = 'F'; + } + for (int i = 0; i < 64; ++i) { + m_state.context.fpu.avx512f.__fpu_zmm16.__zmm_reg[i] = 'G'; + m_state.context.fpu.avx512f.__fpu_zmm17.__zmm_reg[i] = 'H'; + m_state.context.fpu.avx512f.__fpu_zmm18.__zmm_reg[i] = 'I'; + m_state.context.fpu.avx512f.__fpu_zmm19.__zmm_reg[i] = 'J'; + m_state.context.fpu.avx512f.__fpu_zmm20.__zmm_reg[i] = 'K'; + m_state.context.fpu.avx512f.__fpu_zmm21.__zmm_reg[i] = 'L'; + m_state.context.fpu.avx512f.__fpu_zmm22.__zmm_reg[i] = 'M'; + m_state.context.fpu.avx512f.__fpu_zmm23.__zmm_reg[i] = 'N'; + m_state.context.fpu.avx512f.__fpu_zmm24.__zmm_reg[i] = 'O'; + m_state.context.fpu.avx512f.__fpu_zmm25.__zmm_reg[i] = 'P'; + m_state.context.fpu.avx512f.__fpu_zmm26.__zmm_reg[i] = 'Q'; + m_state.context.fpu.avx512f.__fpu_zmm27.__zmm_reg[i] = 'R'; + m_state.context.fpu.avx512f.__fpu_zmm28.__zmm_reg[i] = 'S'; + m_state.context.fpu.avx512f.__fpu_zmm29.__zmm_reg[i] = 'T'; + m_state.context.fpu.avx512f.__fpu_zmm30.__zmm_reg[i] = 'U'; + m_state.context.fpu.avx512f.__fpu_zmm31.__zmm_reg[i] = 'V'; + } + } m_state.SetError(e_regSetFPU, Read, 0); } else { mach_msg_type_number_t count = e_regSetWordSizeFPU; int flavor = __x86_64_FLOAT_STATE; + // On a machine with the AVX512 register set, a process only gets a + // full AVX512 register context after it uses the AVX512 registers; + // if the process has not yet triggered this change, trying to fetch + // the AVX512 registers will fail. Fall through to fetching the AVX + // registers. + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + count = e_regSetWordSizeAVX512f; + flavor = __x86_64_AVX512F_STATE; + m_state.SetError(e_regSetFPU, Read, + ::thread_get_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, + &count)); + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state (0x%4.4x, %u, &fpu, %u => 0x%8.8x", + m_thread->MachPortNumber(), flavor, (uint32_t)count, + m_state.GetError(e_regSetFPU, Read)); + + if (m_state.GetError(e_regSetFPU, Read) == KERN_SUCCESS) + return m_state.GetError(e_regSetFPU, Read); + else + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state attempted fetch of avx512 fpu regctx failed, will try fetching avx"); + } if (CPUHasAVX() || FORCE_AVX_REGS) { count = e_regSetWordSizeAVX; flavor = __x86_64_AVX_STATE; @@ -413,6 +496,20 @@ kern_return_t DNBArchImplX86_64::SetFPUState() { } else { int flavor = __x86_64_FLOAT_STATE; mach_msg_type_number_t count = e_regSetWordSizeFPU; + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + count = e_regSetWordSizeAVX512f; + flavor = __x86_64_AVX512F_STATE; + m_state.SetError( + e_regSetFPU, Write, + ::thread_set_state(m_thread->MachPortNumber(), flavor, + (thread_state_t)&m_state.context.fpu, count)); + if (m_state.GetError(e_regSetFPU, Write) == KERN_SUCCESS) + return m_state.GetError(e_regSetFPU, Write); + else + DNBLogThreadedIf(LOG_THREAD, + "::thread_get_state attempted save of avx512 fpu regctx failed, will try saving avx regctx"); + } + if (CPUHasAVX() || FORCE_AVX_REGS) { flavor = __x86_64_AVX_STATE; count = e_regSetWordSizeAVX; @@ -1019,6 +1116,46 @@ enum { fpu_ymm13, fpu_ymm14, fpu_ymm15, + fpu_k0, + fpu_k1, + fpu_k2, + fpu_k3, + fpu_k4, + fpu_k5, + fpu_k6, + fpu_k7, + fpu_zmm0, + fpu_zmm1, + fpu_zmm2, + fpu_zmm3, + fpu_zmm4, + fpu_zmm5, + fpu_zmm6, + fpu_zmm7, + fpu_zmm8, + fpu_zmm9, + fpu_zmm10, + fpu_zmm11, + fpu_zmm12, + fpu_zmm13, + fpu_zmm14, + fpu_zmm15, + fpu_zmm16, + fpu_zmm17, + fpu_zmm18, + fpu_zmm19, + fpu_zmm20, + fpu_zmm21, + fpu_zmm22, + fpu_zmm23, + fpu_zmm24, + fpu_zmm25, + fpu_zmm26, + fpu_zmm27, + fpu_zmm28, + fpu_zmm29, + fpu_zmm30, + fpu_zmm31, k_num_fpu_regs, // Aliases @@ -1095,7 +1232,47 @@ enum ehframe_dwarf_regnums { ehframe_dwarf_ymm12 = ehframe_dwarf_xmm12, ehframe_dwarf_ymm13 = ehframe_dwarf_xmm13, ehframe_dwarf_ymm14 = ehframe_dwarf_xmm14, - ehframe_dwarf_ymm15 = ehframe_dwarf_xmm15 + ehframe_dwarf_ymm15 = ehframe_dwarf_xmm15, + ehframe_dwarf_zmm0 = ehframe_dwarf_xmm0, + ehframe_dwarf_zmm1 = ehframe_dwarf_xmm1, + ehframe_dwarf_zmm2 = ehframe_dwarf_xmm2, + ehframe_dwarf_zmm3 = ehframe_dwarf_xmm3, + ehframe_dwarf_zmm4 = ehframe_dwarf_xmm4, + ehframe_dwarf_zmm5 = ehframe_dwarf_xmm5, + ehframe_dwarf_zmm6 = ehframe_dwarf_xmm6, + ehframe_dwarf_zmm7 = ehframe_dwarf_xmm7, + ehframe_dwarf_zmm8 = ehframe_dwarf_xmm8, + ehframe_dwarf_zmm9 = ehframe_dwarf_xmm9, + ehframe_dwarf_zmm10 = ehframe_dwarf_xmm10, + ehframe_dwarf_zmm11 = ehframe_dwarf_xmm11, + ehframe_dwarf_zmm12 = ehframe_dwarf_xmm12, + ehframe_dwarf_zmm13 = ehframe_dwarf_xmm13, + ehframe_dwarf_zmm14 = ehframe_dwarf_xmm14, + ehframe_dwarf_zmm15 = ehframe_dwarf_xmm15, + ehframe_dwarf_zmm16 = 67, + ehframe_dwarf_zmm17, + ehframe_dwarf_zmm18, + ehframe_dwarf_zmm19, + ehframe_dwarf_zmm20, + ehframe_dwarf_zmm21, + ehframe_dwarf_zmm22, + ehframe_dwarf_zmm23, + ehframe_dwarf_zmm24, + ehframe_dwarf_zmm25, + ehframe_dwarf_zmm26, + ehframe_dwarf_zmm27, + ehframe_dwarf_zmm28, + ehframe_dwarf_zmm29, + ehframe_dwarf_zmm30, + ehframe_dwarf_zmm31, + ehframe_dwarf_k0 = 118, + ehframe_dwarf_k1, + ehframe_dwarf_k2, + ehframe_dwarf_k3, + ehframe_dwarf_k4, + ehframe_dwarf_k5, + ehframe_dwarf_k6, + ehframe_dwarf_k7, }; enum debugserver_regnums { @@ -1178,7 +1355,47 @@ enum debugserver_regnums { debugserver_ymm12 = debugserver_xmm12, debugserver_ymm13 = debugserver_xmm13, debugserver_ymm14 = debugserver_xmm14, - debugserver_ymm15 = debugserver_xmm15 + debugserver_ymm15 = debugserver_xmm15, + debugserver_zmm0 = debugserver_xmm0, + debugserver_zmm1 = debugserver_xmm1, + debugserver_zmm2 = debugserver_xmm2, + debugserver_zmm3 = debugserver_xmm3, + debugserver_zmm4 = debugserver_xmm4, + debugserver_zmm5 = debugserver_xmm5, + debugserver_zmm6 = debugserver_xmm6, + debugserver_zmm7 = debugserver_xmm7, + debugserver_zmm8 = debugserver_xmm8, + debugserver_zmm9 = debugserver_xmm9, + debugserver_zmm10 = debugserver_xmm10, + debugserver_zmm11 = debugserver_xmm11, + debugserver_zmm12 = debugserver_xmm12, + debugserver_zmm13 = debugserver_xmm13, + debugserver_zmm14 = debugserver_xmm14, + debugserver_zmm15 = debugserver_xmm15, + debugserver_zmm16 = 67, + debugserver_zmm17 = 68, + debugserver_zmm18 = 69, + debugserver_zmm19 = 70, + debugserver_zmm20 = 71, + debugserver_zmm21 = 72, + debugserver_zmm22 = 73, + debugserver_zmm23 = 74, + debugserver_zmm24 = 75, + debugserver_zmm25 = 76, + debugserver_zmm26 = 77, + debugserver_zmm27 = 78, + debugserver_zmm28 = 79, + debugserver_zmm29 = 80, + debugserver_zmm30 = 81, + debugserver_zmm31 = 82, + debugserver_k0 = 118, + debugserver_k1 = 119, + debugserver_k2 = 120, + debugserver_k3 = 121, + debugserver_k4 = 122, + debugserver_k5 = 123, + debugserver_k6 = 124, + debugserver_k7 = 125, }; #define GPR_OFFSET(reg) (offsetof(DNBArchImplX86_64::GPR, __##reg)) @@ -1188,10 +1405,14 @@ enum debugserver_regnums { #define AVX_OFFSET(reg) \ (offsetof(DNBArchImplX86_64::AVX, __fpu_##reg) + \ offsetof(DNBArchImplX86_64::Context, fpu.avx)) +#define AVX512F_OFFSET(reg) \ + (offsetof(DNBArchImplX86_64::AVX512F, __fpu_##reg) + \ + offsetof(DNBArchImplX86_64::Context, fpu.avx512f)) #define EXC_OFFSET(reg) \ (offsetof(DNBArchImplX86_64::EXC, __##reg) + \ offsetof(DNBArchImplX86_64::Context, exc)) #define AVX_OFFSET_YMM(n) (AVX_OFFSET(ymmh0) + (32 * n)) +#define AVX512F_OFFSET_ZMM(n) (AVX512F_OFFSET(zmmh0) + (64 * n)) #define GPR_SIZE(reg) (sizeof(((DNBArchImplX86_64::GPR *)NULL)->__##reg)) #define FPU_SIZE_UINT(reg) \ @@ -1201,6 +1422,7 @@ enum debugserver_regnums { #define FPU_SIZE_XMM(reg) \ (sizeof(((DNBArchImplX86_64::FPU *)NULL)->__fpu_##reg.__xmm_reg)) #define FPU_SIZE_YMM(reg) (32) +#define FPU_SIZE_ZMM(reg) (64) #define EXC_SIZE(reg) (sizeof(((DNBArchImplX86_64::EXC *)NULL)->__##reg)) // These macros will auto define the register name, alt name, register size, @@ -1638,6 +1860,183 @@ const DNBRegisterInfo DNBArchImplX86_64::g_fpu_registers_avx[] = { }; +static const char *g_contained_zmm0[] = {"zmm0", NULL}; +static const char *g_contained_zmm1[] = {"zmm1", NULL}; +static const char *g_contained_zmm2[] = {"zmm2", NULL}; +static const char *g_contained_zmm3[] = {"zmm3", NULL}; +static const char *g_contained_zmm4[] = {"zmm4", NULL}; +static const char *g_contained_zmm5[] = {"zmm5", NULL}; +static const char *g_contained_zmm6[] = {"zmm6", NULL}; +static const char *g_contained_zmm7[] = {"zmm7", NULL}; +static const char *g_contained_zmm8[] = {"zmm8", NULL}; +static const char *g_contained_zmm9[] = {"zmm9", NULL}; +static const char *g_contained_zmm10[] = {"zmm10", NULL}; +static const char *g_contained_zmm11[] = {"zmm11", NULL}; +static const char *g_contained_zmm12[] = {"zmm12", NULL}; +static const char *g_contained_zmm13[] = {"zmm13", NULL}; +static const char *g_contained_zmm14[] = {"zmm14", NULL}; +static const char *g_contained_zmm15[] = {"zmm15", NULL}; + +#define STR(s) #s + +#define ZMM_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_zmm##reg, STR(zmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_ZMM(zmm##reg), AVX512F_OFFSET_ZMM(reg), \ + ehframe_dwarf_zmm##reg, ehframe_dwarf_zmm##reg, -1U, \ + debugserver_zmm##reg, NULL, NULL \ + } + +#define YMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_ymm##reg, STR(ymm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_YMM(ymm##reg), 0, ehframe_dwarf_ymm##reg, \ + ehframe_dwarf_ymm##reg, -1U, debugserver_ymm##reg, \ + g_contained_zmm##reg, NULL \ + } + +#define XMM_REG_ALIAS(reg) \ + { \ + e_regSetFPU, fpu_xmm##reg, STR(xmm##reg), NULL, Vector, VectorOfUInt8, \ + FPU_SIZE_XMM(xmm##reg), 0, ehframe_dwarf_xmm##reg, \ + ehframe_dwarf_xmm##reg, -1U, debugserver_xmm##reg, \ + g_contained_zmm##reg, NULL \ + } + +#define AVX512_K_REG_DEF(reg) \ + { \ + e_regSetFPU, fpu_k##reg, STR(k##reg), NULL, Vector, VectorOfUInt8, 8, \ + AVX512F_OFFSET(k##reg), ehframe_dwarf_k##reg, ehframe_dwarf_k##reg, \ + -1U, debugserver_k##reg, NULL, NULL \ + } + +const DNBRegisterInfo DNBArchImplX86_64::g_fpu_registers_avx512f[] = { + {e_regSetFPU, fpu_fcw, "fctrl", NULL, Uint, Hex, FPU_SIZE_UINT(fcw), + AVX_OFFSET(fcw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_fsw, "fstat", NULL, Uint, Hex, FPU_SIZE_UINT(fsw), + AVX_OFFSET(fsw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ftw, "ftag", NULL, Uint, Hex, 2 /* sizeof __fpu_ftw + sizeof __fpu_rsrv1 */, + AVX_OFFSET(ftw), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_fop, "fop", NULL, Uint, Hex, FPU_SIZE_UINT(fop), + AVX_OFFSET(fop), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ip, "fioff", NULL, Uint, Hex, FPU_SIZE_UINT(ip), + AVX_OFFSET(ip), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_cs, "fiseg", NULL, Uint, Hex, FPU_SIZE_UINT(cs), + AVX_OFFSET(cs), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_dp, "fooff", NULL, Uint, Hex, FPU_SIZE_UINT(dp), + AVX_OFFSET(dp), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_ds, "foseg", NULL, Uint, Hex, FPU_SIZE_UINT(ds), + AVX_OFFSET(ds), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_mxcsr, "mxcsr", NULL, Uint, Hex, FPU_SIZE_UINT(mxcsr), + AVX_OFFSET(mxcsr), -1U, -1U, -1U, -1U, NULL, NULL}, + {e_regSetFPU, fpu_mxcsrmask, "mxcsrmask", NULL, Uint, Hex, + FPU_SIZE_UINT(mxcsrmask), AVX_OFFSET(mxcsrmask), -1U, -1U, -1U, -1U, NULL, + NULL}, + + {e_regSetFPU, fpu_stmm0, "stmm0", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm0), AVX_OFFSET(stmm0), ehframe_dwarf_stmm0, + ehframe_dwarf_stmm0, -1U, debugserver_stmm0, NULL, NULL}, + {e_regSetFPU, fpu_stmm1, "stmm1", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm1), AVX_OFFSET(stmm1), ehframe_dwarf_stmm1, + ehframe_dwarf_stmm1, -1U, debugserver_stmm1, NULL, NULL}, + {e_regSetFPU, fpu_stmm2, "stmm2", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm2), AVX_OFFSET(stmm2), ehframe_dwarf_stmm2, + ehframe_dwarf_stmm2, -1U, debugserver_stmm2, NULL, NULL}, + {e_regSetFPU, fpu_stmm3, "stmm3", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm3), AVX_OFFSET(stmm3), ehframe_dwarf_stmm3, + ehframe_dwarf_stmm3, -1U, debugserver_stmm3, NULL, NULL}, + {e_regSetFPU, fpu_stmm4, "stmm4", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm4), AVX_OFFSET(stmm4), ehframe_dwarf_stmm4, + ehframe_dwarf_stmm4, -1U, debugserver_stmm4, NULL, NULL}, + {e_regSetFPU, fpu_stmm5, "stmm5", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm5), AVX_OFFSET(stmm5), ehframe_dwarf_stmm5, + ehframe_dwarf_stmm5, -1U, debugserver_stmm5, NULL, NULL}, + {e_regSetFPU, fpu_stmm6, "stmm6", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm6), AVX_OFFSET(stmm6), ehframe_dwarf_stmm6, + ehframe_dwarf_stmm6, -1U, debugserver_stmm6, NULL, NULL}, + {e_regSetFPU, fpu_stmm7, "stmm7", NULL, Vector, VectorOfUInt8, + FPU_SIZE_MMST(stmm7), AVX_OFFSET(stmm7), ehframe_dwarf_stmm7, + ehframe_dwarf_stmm7, -1U, debugserver_stmm7, NULL, NULL}, + + AVX512_K_REG_DEF(0), + AVX512_K_REG_DEF(1), + AVX512_K_REG_DEF(2), + AVX512_K_REG_DEF(3), + AVX512_K_REG_DEF(4), + AVX512_K_REG_DEF(5), + AVX512_K_REG_DEF(6), + AVX512_K_REG_DEF(7), + + ZMM_REG_DEF(0), + ZMM_REG_DEF(1), + ZMM_REG_DEF(2), + ZMM_REG_DEF(3), + ZMM_REG_DEF(4), + ZMM_REG_DEF(5), + ZMM_REG_DEF(6), + ZMM_REG_DEF(7), + ZMM_REG_DEF(8), + ZMM_REG_DEF(9), + ZMM_REG_DEF(10), + ZMM_REG_DEF(11), + ZMM_REG_DEF(12), + ZMM_REG_DEF(13), + ZMM_REG_DEF(14), + ZMM_REG_DEF(15), + ZMM_REG_DEF(16), + ZMM_REG_DEF(17), + ZMM_REG_DEF(18), + ZMM_REG_DEF(19), + ZMM_REG_DEF(20), + ZMM_REG_DEF(21), + ZMM_REG_DEF(22), + ZMM_REG_DEF(23), + ZMM_REG_DEF(24), + ZMM_REG_DEF(25), + ZMM_REG_DEF(26), + ZMM_REG_DEF(27), + ZMM_REG_DEF(28), + ZMM_REG_DEF(29), + ZMM_REG_DEF(30), + ZMM_REG_DEF(31), + + YMM_REG_ALIAS(0), + YMM_REG_ALIAS(1), + YMM_REG_ALIAS(2), + YMM_REG_ALIAS(3), + YMM_REG_ALIAS(4), + YMM_REG_ALIAS(5), + YMM_REG_ALIAS(6), + YMM_REG_ALIAS(7), + YMM_REG_ALIAS(8), + YMM_REG_ALIAS(9), + YMM_REG_ALIAS(10), + YMM_REG_ALIAS(11), + YMM_REG_ALIAS(12), + YMM_REG_ALIAS(13), + YMM_REG_ALIAS(14), + YMM_REG_ALIAS(15), + + XMM_REG_ALIAS(0), + XMM_REG_ALIAS(1), + XMM_REG_ALIAS(2), + XMM_REG_ALIAS(3), + XMM_REG_ALIAS(4), + XMM_REG_ALIAS(5), + XMM_REG_ALIAS(6), + XMM_REG_ALIAS(7), + XMM_REG_ALIAS(8), + XMM_REG_ALIAS(9), + XMM_REG_ALIAS(10), + XMM_REG_ALIAS(11), + XMM_REG_ALIAS(12), + XMM_REG_ALIAS(13), + XMM_REG_ALIAS(14), + XMM_REG_ALIAS(15), + +}; + + // Exception registers const DNBRegisterInfo DNBArchImplX86_64::g_exc_registers[] = { @@ -1662,6 +2061,10 @@ const size_t DNBArchImplX86_64::k_num_all_registers_no_avx = k_num_gpr_registers + k_num_fpu_registers_no_avx + k_num_exc_registers; const size_t DNBArchImplX86_64::k_num_all_registers_avx = k_num_gpr_registers + k_num_fpu_registers_avx + k_num_exc_registers; +const size_t DNBArchImplX86_64::k_num_fpu_registers_avx512f = + sizeof(g_fpu_registers_avx512f) / sizeof(DNBRegisterInfo); +const size_t DNBArchImplX86_64::k_num_all_registers_avx512f = + k_num_gpr_registers + k_num_fpu_registers_avx512f + k_num_exc_registers; //---------------------------------------------------------------------- // Register set definitions. The first definitions at register set index @@ -1681,6 +2084,13 @@ const DNBRegisterSetInfo DNBArchImplX86_64::g_reg_sets_avx[] = { {"Floating Point Registers", g_fpu_registers_avx, k_num_fpu_registers_avx}, {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; +const DNBRegisterSetInfo DNBArchImplX86_64::g_reg_sets_avx512f[] = { + {"x86_64 Registers", NULL, k_num_all_registers_avx}, + {"General Purpose Registers", g_gpr_registers, k_num_gpr_registers}, + {"Floating Point Registers", g_fpu_registers_avx512f, + k_num_fpu_registers_avx512f}, + {"Exception State Registers", g_exc_registers, k_num_exc_registers}}; + // Total number of register sets for this architecture const size_t DNBArchImplX86_64::k_num_register_sets = sizeof(g_reg_sets_avx) / sizeof(DNBRegisterSetInfo); @@ -1702,6 +2112,8 @@ const DNBRegisterSetInfo * DNBArchImplX86_64::GetRegisterSetInfo(nub_size_t *num_reg_sets) { *num_reg_sets = k_num_register_sets; + if (CPUHasAVX512f() || FORCE_AVX_REGS) + return g_reg_sets_avx512f; if (CPUHasAVX() || FORCE_AVX_REGS) return g_reg_sets_avx; else @@ -1765,6 +2177,8 @@ bool DNBArchImplX86_64::GetRegisterValue(uint32_t set, uint32_t reg, case e_regSetFPU: if (reg > fpu_xmm15 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm15 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; switch (reg) { case fpu_fcw: @@ -1853,6 +2267,59 @@ bool DNBArchImplX86_64::GetRegisterValue(uint32_t set, uint32_t reg, memcpy((&value->value.uint8) + 16, &m_state.context.fpu.avx.__fpu_ymmh0 + (reg - fpu_ymm0), 16); return true; + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy((&value->value.uint8), + &m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + case fpu_zmm8: + case fpu_zmm9: + case fpu_zmm10: + case fpu_zmm11: + case fpu_zmm12: + case fpu_zmm13: + case fpu_zmm14: + case fpu_zmm15: + memcpy(&value->value.uint8, + &m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), 16); + memcpy((&value->value.uint8) + 16, + &m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), 16); + memcpy((&value->value.uint8) + 32, + &m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), 32); + return true; + case fpu_zmm16: + case fpu_zmm17: + case fpu_zmm18: + case fpu_zmm19: + case fpu_zmm20: + case fpu_zmm21: + case fpu_zmm22: + case fpu_zmm23: + case fpu_zmm24: + case fpu_zmm25: + case fpu_zmm26: + case fpu_zmm27: + case fpu_zmm28: + case fpu_zmm29: + case fpu_zmm30: + case fpu_zmm31: + memcpy(&value->value.uint8, + &m_state.context.fpu.avx512f.__fpu_zmm16 + (reg - fpu_zmm16), 64); + return true; } break; @@ -1919,6 +2386,8 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, break; if (reg > fpu_xmm15 && !(CPUHasAVX() || FORCE_AVX_REGS)) return false; + if (reg > fpu_ymm15 && !(CPUHasAVX512f() || FORCE_AVX_REGS)) + return false; case e_regSetFPU: switch (reg) { case fpu_fcw: @@ -2019,6 +2488,59 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, memcpy(&m_state.context.fpu.avx.__fpu_ymmh0 + (reg - fpu_ymm0), (&value->value.uint8) + 16, 16); return true; + case fpu_k0: + case fpu_k1: + case fpu_k2: + case fpu_k3: + case fpu_k4: + case fpu_k5: + case fpu_k6: + case fpu_k7: + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + (reg - fpu_k0), + &value->value.uint8, 8); + return true; + case fpu_zmm0: + case fpu_zmm1: + case fpu_zmm2: + case fpu_zmm3: + case fpu_zmm4: + case fpu_zmm5: + case fpu_zmm6: + case fpu_zmm7: + case fpu_zmm8: + case fpu_zmm9: + case fpu_zmm10: + case fpu_zmm11: + case fpu_zmm12: + case fpu_zmm13: + case fpu_zmm14: + case fpu_zmm15: + memcpy(&m_state.context.fpu.avx512f.__fpu_xmm0 + (reg - fpu_zmm0), + &value->value.uint8, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_ymmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 16, 16); + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + (reg - fpu_zmm0), + &value->value.uint8 + 32, 32); + return true; + case fpu_zmm16: + case fpu_zmm17: + case fpu_zmm18: + case fpu_zmm19: + case fpu_zmm20: + case fpu_zmm21: + case fpu_zmm22: + case fpu_zmm23: + case fpu_zmm24: + case fpu_zmm25: + case fpu_zmm26: + case fpu_zmm27: + case fpu_zmm28: + case fpu_zmm29: + case fpu_zmm30: + case fpu_zmm31: + memcpy(&m_state.context.fpu.avx512f.__fpu_zmm16 + (reg - fpu_zmm16), + &value->value.uint8, 64); + return true; } break; @@ -2049,7 +2571,12 @@ bool DNBArchImplX86_64::SetRegisterValue(uint32_t set, uint32_t reg, uint32_t DNBArchImplX86_64::GetRegisterContextSize() { static uint32_t g_cached_size = 0; if (g_cached_size == 0) { - if (CPUHasAVX() || FORCE_AVX_REGS) { + if (CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < k_num_fpu_registers_avx512f; ++i) { + if (g_fpu_registers_avx512f[i].value_regs == NULL) + g_cached_size += g_fpu_registers_avx512f[i].size; + } + } else if (CPUHasAVX() || FORCE_AVX_REGS) { for (size_t i = 0; i < k_num_fpu_registers_avx; ++i) { if (g_fpu_registers_avx[i].value_regs == NULL) g_cached_size += g_fpu_registers_avx[i].size; @@ -2106,7 +2633,9 @@ nub_size_t DNBArchImplX86_64::GetRegisterContext(void *buf, // Walk around the gaps in the FPU regs memcpy(p, &m_state.context.fpu.no_avx.__fpu_fcw, 5); - p += 5; + // We read 5 bytes, but we skip 6 to account for __fpu_rsrv1 + // to match the g_fpu_registers_* tables. + p += 6; memcpy(p, &m_state.context.fpu.no_avx.__fpu_fop, 8); p += 8; memcpy(p, &m_state.context.fpu.no_avx.__fpu_dp, 6); @@ -2121,6 +2650,13 @@ nub_size_t DNBArchImplX86_64::GetRegisterContext(void *buf, p += 10; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_k0 + i, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 16; ++i) { @@ -2129,6 +2665,16 @@ nub_size_t DNBArchImplX86_64::GetRegisterContext(void *buf, memcpy(p, &m_state.context.fpu.avx.__fpu_ymmh0 + i, 16); p += 16; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 16; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_zmmh0 + i, 32); + p += 32; + } + for (size_t i = 0; i < 16; ++i) { + memcpy(p, &m_state.context.fpu.avx512f.__fpu_zmm16 + i, 64); + p += 64; + } + } } else { // Copy the XMM registers in a single block memcpy(p, &m_state.context.fpu.no_avx.__fpu_xmm0, 16 * 16); @@ -2171,7 +2717,9 @@ nub_size_t DNBArchImplX86_64::SetRegisterContext(const void *buf, // Copy fcw through mxcsrmask as there is no padding memcpy(&m_state.context.fpu.no_avx.__fpu_fcw, p, 5); - p += 5; + // We wrote 5 bytes, but we skip 6 to account for __fpu_rsrv1 + // to match the g_fpu_registers_* tables. + p += 6; memcpy(&m_state.context.fpu.no_avx.__fpu_fop, p, 8); p += 8; memcpy(&m_state.context.fpu.no_avx.__fpu_dp, p, 6); @@ -2186,6 +2734,13 @@ nub_size_t DNBArchImplX86_64::SetRegisterContext(const void *buf, p += 10; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 8; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_k0 + i, p, 8); + p += 8; + } + } + if (CPUHasAVX() || FORCE_AVX_REGS) { // Interleave the XMM and YMMH registers to make the YMM registers for (size_t i = 0; i < 16; ++i) { @@ -2194,6 +2749,16 @@ nub_size_t DNBArchImplX86_64::SetRegisterContext(const void *buf, memcpy(&m_state.context.fpu.avx.__fpu_ymmh0 + i, p, 16); p += 16; } + if(CPUHasAVX512f() || FORCE_AVX_REGS) { + for (size_t i = 0; i < 16; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_zmmh0 + i, p, 32); + p += 32; + } + for (size_t i = 0; i < 16; ++i) { + memcpy(&m_state.context.fpu.avx512f.__fpu_zmm16 + i, p, 64); + p += 64; + } + } } else { // Copy the XMM registers in a single block memcpy(&m_state.context.fpu.no_avx.__fpu_xmm0, p, 16 * 16); diff --git a/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h b/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h index 1b8a3c7da4b5b..ef2ba1ee09a6b 100644 --- a/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h +++ b/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h @@ -82,6 +82,12 @@ protected: static const size_t k_num_all_registers_avx; static const size_t k_num_register_sets; + typedef __x86_64_avx512f_state_t AVX512F; + static const DNBRegisterInfo g_fpu_registers_avx512f[]; + static const DNBRegisterSetInfo g_reg_sets_avx512f[]; + static const size_t k_num_fpu_registers_avx512f; + static const size_t k_num_all_registers_avx512f; + typedef enum RegisterSetTag { e_regSetALL = REGISTER_SET_ALL, e_regSetGPR, @@ -96,6 +102,7 @@ protected: e_regSetWordSizeFPU = sizeof(FPU) / sizeof(int), e_regSetWordSizeEXC = sizeof(EXC) / sizeof(int), e_regSetWordSizeAVX = sizeof(AVX) / sizeof(int), + e_regSetWordSizeAVX512f = sizeof(AVX512F) / sizeof(int), e_regSetWordSizeDBG = sizeof(DBG) / sizeof(int) } RegisterSetWordSize; @@ -106,6 +113,7 @@ protected: union { FPU no_avx; AVX avx; + AVX512F avx512f; } fpu; EXC exc; DBG dbg; diff --git a/tools/debugserver/source/MacOSX/x86_64/MachRegisterStatesX86_64.h b/tools/debugserver/source/MacOSX/x86_64/MachRegisterStatesX86_64.h index 60e61262ab694..fcb648dac59ef 100644 --- a/tools/debugserver/source/MacOSX/x86_64/MachRegisterStatesX86_64.h +++ b/tools/debugserver/source/MacOSX/x86_64/MachRegisterStatesX86_64.h @@ -22,6 +22,7 @@ #define __x86_64_EXCEPTION_STATE 6 #define __x86_64_DEBUG_STATE 11 #define __x86_64_AVX_STATE 17 +#define __x86_64_AVX512F_STATE 20 typedef struct { uint64_t __rax; @@ -188,6 +189,111 @@ typedef struct { __x86_64_xmm_reg __fpu_ymmh15; } __x86_64_avx_state_t; +typedef struct { uint8_t __ymm_reg[32]; } __x86_64_ymm_reg; +typedef struct { uint8_t __zmm_reg[64]; } __x86_64_zmm_reg; +typedef struct { uint8_t __opmask_reg[8]; } __x86_64_opmask_reg; + +typedef struct { + uint32_t __fpu_reserved[2]; + __x86_64_fp_control_t __fpu_fcw; + __x86_64_fp_status_t __fpu_fsw; + uint8_t __fpu_ftw; + uint8_t __fpu_rsrv1; + uint16_t __fpu_fop; + uint32_t __fpu_ip; + uint16_t __fpu_cs; + uint16_t __fpu_rsrv2; + uint32_t __fpu_dp; + uint16_t __fpu_ds; + uint16_t __fpu_rsrv3; + uint32_t __fpu_mxcsr; + uint32_t __fpu_mxcsrmask; + __x86_64_mmst_reg __fpu_stmm0; + __x86_64_mmst_reg __fpu_stmm1; + __x86_64_mmst_reg __fpu_stmm2; + __x86_64_mmst_reg __fpu_stmm3; + __x86_64_mmst_reg __fpu_stmm4; + __x86_64_mmst_reg __fpu_stmm5; + __x86_64_mmst_reg __fpu_stmm6; + __x86_64_mmst_reg __fpu_stmm7; + __x86_64_xmm_reg __fpu_xmm0; + __x86_64_xmm_reg __fpu_xmm1; + __x86_64_xmm_reg __fpu_xmm2; + __x86_64_xmm_reg __fpu_xmm3; + __x86_64_xmm_reg __fpu_xmm4; + __x86_64_xmm_reg __fpu_xmm5; + __x86_64_xmm_reg __fpu_xmm6; + __x86_64_xmm_reg __fpu_xmm7; + __x86_64_xmm_reg __fpu_xmm8; + __x86_64_xmm_reg __fpu_xmm9; + __x86_64_xmm_reg __fpu_xmm10; + __x86_64_xmm_reg __fpu_xmm11; + __x86_64_xmm_reg __fpu_xmm12; + __x86_64_xmm_reg __fpu_xmm13; + __x86_64_xmm_reg __fpu_xmm14; + __x86_64_xmm_reg __fpu_xmm15; + uint8_t __fpu_rsrv4[6 * 16]; + uint32_t __fpu_reserved1; + uint8_t __avx_reserved1[64]; + __x86_64_xmm_reg __fpu_ymmh0; + __x86_64_xmm_reg __fpu_ymmh1; + __x86_64_xmm_reg __fpu_ymmh2; + __x86_64_xmm_reg __fpu_ymmh3; + __x86_64_xmm_reg __fpu_ymmh4; + __x86_64_xmm_reg __fpu_ymmh5; + __x86_64_xmm_reg __fpu_ymmh6; + __x86_64_xmm_reg __fpu_ymmh7; + __x86_64_xmm_reg __fpu_ymmh8; + __x86_64_xmm_reg __fpu_ymmh9; + __x86_64_xmm_reg __fpu_ymmh10; + __x86_64_xmm_reg __fpu_ymmh11; + __x86_64_xmm_reg __fpu_ymmh12; + __x86_64_xmm_reg __fpu_ymmh13; + __x86_64_xmm_reg __fpu_ymmh14; + __x86_64_xmm_reg __fpu_ymmh15; + __x86_64_opmask_reg __fpu_k0; + __x86_64_opmask_reg __fpu_k1; + __x86_64_opmask_reg __fpu_k2; + __x86_64_opmask_reg __fpu_k3; + __x86_64_opmask_reg __fpu_k4; + __x86_64_opmask_reg __fpu_k5; + __x86_64_opmask_reg __fpu_k6; + __x86_64_opmask_reg __fpu_k7; + __x86_64_ymm_reg __fpu_zmmh0; + __x86_64_ymm_reg __fpu_zmmh1; + __x86_64_ymm_reg __fpu_zmmh2; + __x86_64_ymm_reg __fpu_zmmh3; + __x86_64_ymm_reg __fpu_zmmh4; + __x86_64_ymm_reg __fpu_zmmh5; + __x86_64_ymm_reg __fpu_zmmh6; + __x86_64_ymm_reg __fpu_zmmh7; + __x86_64_ymm_reg __fpu_zmmh8; + __x86_64_ymm_reg __fpu_zmmh9; + __x86_64_ymm_reg __fpu_zmmh10; + __x86_64_ymm_reg __fpu_zmmh11; + __x86_64_ymm_reg __fpu_zmmh12; + __x86_64_ymm_reg __fpu_zmmh13; + __x86_64_ymm_reg __fpu_zmmh14; + __x86_64_ymm_reg __fpu_zmmh15; + __x86_64_zmm_reg __fpu_zmm16; + __x86_64_zmm_reg __fpu_zmm17; + __x86_64_zmm_reg __fpu_zmm18; + __x86_64_zmm_reg __fpu_zmm19; + __x86_64_zmm_reg __fpu_zmm20; + __x86_64_zmm_reg __fpu_zmm21; + __x86_64_zmm_reg __fpu_zmm22; + __x86_64_zmm_reg __fpu_zmm23; + __x86_64_zmm_reg __fpu_zmm24; + __x86_64_zmm_reg __fpu_zmm25; + __x86_64_zmm_reg __fpu_zmm26; + __x86_64_zmm_reg __fpu_zmm27; + __x86_64_zmm_reg __fpu_zmm28; + __x86_64_zmm_reg __fpu_zmm29; + __x86_64_zmm_reg __fpu_zmm30; + __x86_64_zmm_reg __fpu_zmm31; + +} __x86_64_avx512f_state_t; + typedef struct { uint32_t __trapno; uint32_t __err; |