binary-analysis-patterns
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseBinary Analysis Patterns
二进制分析模式
Comprehensive patterns and techniques for analyzing compiled binaries, understanding assembly code, and reconstructing program logic.
本文涵盖了分析编译二进制文件、理解汇编代码以及重构程序逻辑的各类模式与技术。
Disassembly Fundamentals
反汇编基础
x86-64 Instruction Patterns
x86-64 指令模式
Function Prologue/Epilogue
函数序言/尾声
asm
; Standard prologue
push rbp ; Save base pointer
mov rbp, rsp ; Set up stack frame
sub rsp, 0x20 ; Allocate local variables
; Leaf function (no calls)
; May skip frame pointer setup
sub rsp, 0x18 ; Just allocate locals
; Standard epilogue
mov rsp, rbp ; Restore stack pointer
pop rbp ; Restore base pointer
ret
; Leave instruction (equivalent)
leave ; mov rsp, rbp; pop rbp
retasm
; Standard prologue
push rbp ; Save base pointer
mov rbp, rsp ; Set up stack frame
sub rsp, 0x20 ; Allocate local variables
; Leaf function (no calls)
; May skip frame pointer setup
sub rsp, 0x18 ; Just allocate locals
; Standard epilogue
mov rsp, rbp ; Restore stack pointer
pop rbp ; Restore base pointer
ret
; Leave instruction (equivalent)
leave ; mov rsp, rbp; pop rbp
retCalling Conventions
调用约定
System V AMD64 (Linux, macOS)
asm
; Arguments: RDI, RSI, RDX, RCX, R8, R9, then stack
; Return: RAX (and RDX for 128-bit)
; Caller-saved: RAX, RCX, RDX, RSI, RDI, R8-R11
; Callee-saved: RBX, RBP, R12-R15
; Example: func(a, b, c, d, e, f, g)
mov rdi, [a] ; 1st arg
mov rsi, [b] ; 2nd arg
mov rdx, [c] ; 3rd arg
mov rcx, [d] ; 4th arg
mov r8, [e] ; 5th arg
mov r9, [f] ; 6th arg
push [g] ; 7th arg on stack
call funcMicrosoft x64 (Windows)
asm
; Arguments: RCX, RDX, R8, R9, then stack
; Shadow space: 32 bytes reserved on stack
; Return: RAX
; Example: func(a, b, c, d, e)
sub rsp, 0x28 ; Shadow space + alignment
mov rcx, [a] ; 1st arg
mov rdx, [b] ; 2nd arg
mov r8, [c] ; 3rd arg
mov r9, [d] ; 4th arg
mov [rsp+0x20], [e] ; 5th arg on stack
call func
add rsp, 0x28System V AMD64 (Linux, macOS)
asm
; Arguments: RDI, RSI, RDX, RCX, R8, R9, then stack
; Return: RAX (and RDX for 128-bit)
; Caller-saved: RAX, RCX, RDX, RSI, RDI, R8-R11
; Callee-saved: RBX, RBP, R12-R15
; Example: func(a, b, c, d, e, f, g)
mov rdi, [a] ; 1st arg
mov rsi, [b] ; 2nd arg
mov rdx, [c] ; 3rd arg
mov rcx, [d] ; 4th arg
mov r8, [e] ; 5th arg
mov r9, [f] ; 6th arg
push [g] ; 7th arg on stack
call funcMicrosoft x64 (Windows)
asm
; Arguments: RCX, RDX, R8, R9, then stack
; Shadow space: 32 bytes reserved on stack
; Return: RAX
; Example: func(a, b, c, d, e)
sub rsp, 0x28 ; Shadow space + alignment
mov rcx, [a] ; 1st arg
mov rdx, [b] ; 2nd arg
mov r8, [c] ; 3rd arg
mov r9, [d] ; 4th arg
mov [rsp+0x20], [e] ; 5th arg on stack
call func
add rsp, 0x28ARM Assembly Patterns
ARM 汇编模式
ARM64 (AArch64) Calling Convention
ARM64 (AArch64) 调用约定
asm
; Arguments: X0-X7
; Return: X0 (and X1 for 128-bit)
; Frame pointer: X29
; Link register: X30
; Function prologue
stp x29, x30, [sp, #-16]! ; Save FP and LR
mov x29, sp ; Set frame pointer
; Function epilogue
ldp x29, x30, [sp], #16 ; Restore FP and LR
retasm
; Arguments: X0-X7
; Return: X0 (and X1 for 128-bit)
; Frame pointer: X29
; Link register: X30
; Function prologue
stp x29, x30, [sp, #-16]! ; Save FP and LR
mov x29, sp ; Set frame pointer
; Function epilogue
ldp x29, x30, [sp], #16 ; Restore FP and LR
retARM32 Calling Convention
ARM32 调用约定
asm
; Arguments: R0-R3, then stack
; Return: R0 (and R1 for 64-bit)
; Link register: LR (R14)
; Function prologue
push {fp, lr}
add fp, sp, #4
; Function epilogue
pop {fp, pc} ; Return by popping PCasm
; Arguments: R0-R3, then stack
; Return: R0 (and R1 for 64-bit)
; Link register: LR (R14)
; Function prologue
push {fp, lr}
add fp, sp, #4
; Function epilogue
pop {fp, pc} ; Return by popping PCControl Flow Patterns
控制流模式
Conditional Branches
条件分支
asm
; if (a == b)
cmp eax, ebx
jne skip_block
; ... if body ...
skip_block:
; if (a < b) - signed
cmp eax, ebx
jge skip_block ; Jump if greater or equal
; ... if body ...
skip_block:
; if (a < b) - unsigned
cmp eax, ebx
jae skip_block ; Jump if above or equal
; ... if body ...
skip_block:asm
; if (a == b)
cmp eax, ebx
jne skip_block
; ... if body ...
skip_block:
; if (a < b) - signed
cmp eax, ebx
jge skip_block ; Jump if greater or equal
; ... if body ...
skip_block:
; if (a < b) - unsigned
cmp eax, ebx
jae skip_block ; Jump if above or equal
; ... if body ...
skip_block:Loop Patterns
循环模式
asm
; for (int i = 0; i < n; i++)
xor ecx, ecx ; i = 0
loop_start:
cmp ecx, [n] ; i < n
jge loop_end
; ... loop body ...
inc ecx ; i++
jmp loop_start
loop_end:
; while (condition)
jmp loop_check
loop_body:
; ... body ...
loop_check:
cmp eax, ebx
jl loop_body
; do-while
loop_body:
; ... body ...
cmp eax, ebx
jl loop_bodyasm
; for (int i = 0; i < n; i++)
xor ecx, ecx ; i = 0
loop_start:
cmp ecx, [n] ; i < n
jge loop_end
; ... loop body ...
inc ecx ; i++
jmp loop_start
loop_end:
; while (condition)
jmp loop_check
loop_body:
; ... body ...
loop_check:
cmp eax, ebx
jl loop_body
; do-while
loop_body:
; ... body ...
cmp eax, ebx
jl loop_bodySwitch Statement Patterns
开关语句模式
asm
; Jump table pattern
mov eax, [switch_var]
cmp eax, max_case
ja default_case
jmp [jump_table + eax*8]
; Sequential comparison (small switch)
cmp eax, 1
je case_1
cmp eax, 2
je case_2
cmp eax, 3
je case_3
jmp default_caseasm
; Jump table pattern
mov eax, [switch_var]
cmp eax, max_case
ja default_case
jmp [jump_table + eax*8]
; Sequential comparison (small switch)
cmp eax, 1
je case_1
cmp eax, 2
je case_2
cmp eax, 3
je case_3
jmp default_caseData Structure Patterns
数据结构模式
Array Access
数组访问
asm
; array[i] - 4-byte elements
mov eax, [rbx + rcx*4] ; rbx=base, rcx=index
; array[i] - 8-byte elements
mov rax, [rbx + rcx*8]
; Multi-dimensional array[i][j]
; arr[i][j] = base + (i * cols + j) * element_size
imul eax, [cols]
add eax, [j]
mov edx, [rbx + rax*4]asm
; array[i] - 4-byte elements
mov eax, [rbx + rcx*4] ; rbx=base, rcx=index
; array[i] - 8-byte elements
mov rax, [rbx + rcx*8]
; Multi-dimensional array[i][j]
; arr[i][j] = base + (i * cols + j) * element_size
imul eax, [cols]
add eax, [j]
mov edx, [rbx + rax*4]Structure Access
结构体访问
c
struct Example {
int a; // offset 0
char b; // offset 4
// padding // offset 5-7
long c; // offset 8
short d; // offset 16
};asm
; Accessing struct fields
mov rdi, [struct_ptr]
mov eax, [rdi] ; s->a (offset 0)
movzx eax, byte [rdi+4] ; s->b (offset 4)
mov rax, [rdi+8] ; s->c (offset 8)
movzx eax, word [rdi+16] ; s->d (offset 16)c
struct Example {
int a; // offset 0
char b; // offset 4
// padding // offset 5-7
long c; // offset 8
short d; // offset 16
};asm
; Accessing struct fields
mov rdi, [struct_ptr]
mov eax, [rdi] ; s->a (offset 0)
movzx eax, byte [rdi+4] ; s->b (offset 4)
mov rax, [rdi+8] ; s->c (offset 8)
movzx eax, word [rdi+16] ; s->d (offset 16)Linked List Traversal
链表遍历
asm
; while (node != NULL)
list_loop:
test rdi, rdi ; node == NULL?
jz list_done
; ... process node ...
mov rdi, [rdi+8] ; node = node->next (assuming next at offset 8)
jmp list_loop
list_done:asm
; while (node != NULL)
list_loop:
test rdi, rdi ; node == NULL?
jz list_done
; ... process node ...
mov rdi, [rdi+8] ; node = node->next (assuming next at offset 8)
jmp list_loop
list_done:Common Code Patterns
常见代码模式
String Operations
字符串操作
asm
; strlen pattern
xor ecx, ecx
strlen_loop:
cmp byte [rdi + rcx], 0
je strlen_done
inc ecx
jmp strlen_loop
strlen_done:
; ecx contains length
; strcpy pattern
strcpy_loop:
mov al, [rsi]
mov [rdi], al
test al, al
jz strcpy_done
inc rsi
inc rdi
jmp strcpy_loop
strcpy_done:
; memcpy using rep movsb
mov rdi, dest
mov rsi, src
mov rcx, count
rep movsbasm
; strlen pattern
xor ecx, ecx
strlen_loop:
cmp byte [rdi + rcx], 0
je strlen_done
inc ecx
jmp strlen_loop
strlen_done:
; ecx contains length
; strcpy pattern
strcpy_loop:
mov al, [rsi]
mov [rdi], al
test al, al
jz strcpy_done
inc rsi
inc rdi
jmp strcpy_loop
strcpy_done:
; memcpy using rep movsb
mov rdi, dest
mov rsi, src
mov rcx, count
rep movsbArithmetic Patterns
算术模式
asm
; Multiplication by constant
; x * 3
lea eax, [rax + rax*2]
; x * 5
lea eax, [rax + rax*4]
; x * 10
lea eax, [rax + rax*4] ; x * 5
add eax, eax ; * 2
; Division by power of 2 (signed)
mov eax, [x]
cdq ; Sign extend to EDX:EAX
and edx, 7 ; For divide by 8
add eax, edx ; Adjust for negative
sar eax, 3 ; Arithmetic shift right
; Modulo power of 2
and eax, 7 ; x % 8asm
; Multiplication by constant
; x * 3
lea eax, [rax + rax*2]
; x * 5
lea eax, [rax + rax*4]
; x * 10
lea eax, [rax + rax*4] ; x * 5
add eax, eax ; * 2
; Division by power of 2 (signed)
mov eax, [x]
cdq ; Sign extend to EDX:EAX
and edx, 7 ; For divide by 8
add eax, edx ; Adjust for negative
sar eax, 3 ; Arithmetic shift right
; Modulo power of 2
and eax, 7 ; x % 8Bit Manipulation
位操作
asm
; Test specific bit
test eax, 0x80 ; Test bit 7
jnz bit_set
; Set bit
or eax, 0x10 ; Set bit 4
; Clear bit
and eax, ~0x10 ; Clear bit 4
; Toggle bit
xor eax, 0x10 ; Toggle bit 4
; Count leading zeros
bsr eax, ecx ; Bit scan reverse
xor eax, 31 ; Convert to leading zeros
; Population count (popcnt)
popcnt eax, ecx ; Count set bitsasm
; Test specific bit
test eax, 0x80 ; Test bit 7
jnz bit_set
; Set bit
or eax, 0x10 ; Set bit 4
; Clear bit
and eax, ~0x10 ; Clear bit 4
; Toggle bit
xor eax, 0x10 ; Toggle bit 4
; Count leading zeros
bsr eax, ecx ; Bit scan reverse
xor eax, 31 ; Convert to leading zeros
; Population count (popcnt)
popcnt eax, ecx ; Count set bitsDecompilation Patterns
反编译模式
Variable Recovery
变量恢复
asm
; Local variable at rbp-8
mov qword [rbp-8], rax ; Store to local
mov rax, [rbp-8] ; Load from local
; Stack-allocated array
lea rax, [rbp-0x40] ; Array starts at rbp-0x40
mov [rax], edx ; array[0] = value
mov [rax+4], ecx ; array[1] = valueasm
; Local variable at rbp-8
mov qword [rbp-8], rax ; Store to local
mov rax, [rbp-8] ; Load from local
; Stack-allocated array
lea rax, [rbp-0x40] ; Array starts at rbp-0x40
mov [rax], edx ; array[0] = value
mov [rax+4], ecx ; array[1] = valueFunction Signature Recovery
函数签名恢复
asm
; Identify parameters by register usage
func:
; rdi used as first param (System V)
mov [rbp-8], rdi ; Save param to local
; rsi used as second param
mov [rbp-16], rsi
; Identify return by RAX at end
mov rax, [result]
retasm
; Identify parameters by register usage
func:
; rdi used as first param (System V)
mov [rbp-8], rdi ; Save param to local
; rsi used as second param
mov [rbp-16], rsi
; Identify return by RAX at end
mov rax, [result]
retType Recovery
类型恢复
asm
; 1-byte operations suggest char/bool
movzx eax, byte [rdi] ; Zero-extend byte
movsx eax, byte [rdi] ; Sign-extend byte
; 2-byte operations suggest short
movzx eax, word [rdi]
movsx eax, word [rdi]
; 4-byte operations suggest int/float
mov eax, [rdi]
movss xmm0, [rdi] ; Float
; 8-byte operations suggest long/double/pointer
mov rax, [rdi]
movsd xmm0, [rdi] ; Doubleasm
; 1-byte operations suggest char/bool
movzx eax, byte [rdi] ; Zero-extend byte
movsx eax, byte [rdi] ; Sign-extend byte
; 2-byte operations suggest short
movzx eax, word [rdi]
movsx eax, word [rdi]
; 4-byte operations suggest int/float
mov eax, [rdi]
movss xmm0, [rdi] ; Float
; 8-byte operations suggest long/double/pointer
mov rax, [rdi]
movsd xmm0, [rdi] ; DoubleGhidra Analysis Tips
Ghidra 分析技巧
Improving Decompilation
改进反编译结果
java
// In Ghidra scripting
// Fix function signature
Function func = getFunctionAt(toAddr(0x401000));
func.setReturnType(IntegerDataType.dataType, SourceType.USER_DEFINED);
// Create structure type
StructureDataType struct = new StructureDataType("MyStruct", 0);
struct.add(IntegerDataType.dataType, "field_a", null);
struct.add(PointerDataType.dataType, "next", null);
// Apply to memory
createData(toAddr(0x601000), struct);java
// In Ghidra scripting
// Fix function signature
Function func = getFunctionAt(toAddr(0x401000));
func.setReturnType(IntegerDataType.dataType, SourceType.USER_DEFINED);
// Create structure type
StructureDataType struct = new StructureDataType("MyStruct", 0);
struct.add(IntegerDataType.dataType, "field_a", null);
struct.add(PointerDataType.dataType, "next", null);
// Apply to memory
createData(toAddr(0x601000), struct);Pattern Matching Scripts
模式匹配脚本
python
undefinedpython
undefinedFind all calls to dangerous functions
Find all calls to dangerous functions
for func in currentProgram.getFunctionManager().getFunctions(True):
for ref in getReferencesTo(func.getEntryPoint()):
if func.getName() in ["strcpy", "sprintf", "gets"]:
print(f"Dangerous call at {ref.getFromAddress()}")
undefinedfor func in currentProgram.getFunctionManager().getFunctions(True):
for ref in getReferencesTo(func.getEntryPoint()):
if func.getName() in ["strcpy", "sprintf", "gets"]:
print(f"Dangerous call at {ref.getFromAddress()}")
undefinedIDA Pro Patterns
IDA Pro 模式
IDAPython Analysis
IDAPython 分析
python
import idaapi
import idautils
import idcpython
import idaapi
import idautils
import idcFind all function calls
Find all function calls
def find_calls(func_name):
for func_ea in idautils.Functions():
for head in idautils.Heads(func_ea, idc.find_func_end(func_ea)):
if idc.print_insn_mnem(head) == "call":
target = idc.get_operand_value(head, 0)
if idc.get_func_name(target) == func_name:
print(f"Call to {func_name} at {hex(head)}")
def find_calls(func_name):
for func_ea in idautils.Functions():
for head in idautils.Heads(func_ea, idc.find_func_end(func_ea)):
if idc.print_insn_mnem(head) == "call":
target = idc.get_operand_value(head, 0)
if idc.get_func_name(target) == func_name:
print(f"Call to {func_name} at {hex(head)}")
Rename functions based on strings
Rename functions based on strings
def auto_rename():
for s in idautils.Strings():
for xref in idautils.XrefsTo(s.ea):
func = idaapi.get_func(xref.frm)
if func and "sub_" in idc.get_func_name(func.start_ea):
# Use string as hint for naming
pass
undefineddef auto_rename():
for s in idautils.Strings():
for xref in idautils.XrefsTo(s.ea):
func = idaapi.get_func(xref.frm)
if func and "sub_" in idc.get_func_name(func.start_ea):
# Use string as hint for naming
pass
undefinedBest Practices
最佳实践
Analysis Workflow
分析工作流
- Initial triage: File type, architecture, imports/exports
- String analysis: Identify interesting strings, error messages
- Function identification: Entry points, exports, cross-references
- Control flow mapping: Understand program structure
- Data structure recovery: Identify structs, arrays, globals
- Algorithm identification: Crypto, hashing, compression
- Documentation: Comments, renamed symbols, type definitions
- 初始分类:文件类型、架构、导入/导出表
- 字符串分析:识别有趣的字符串、错误信息
- 函数识别:入口点、导出函数、交叉引用
- 控制流映射:理解程序结构
- 数据结构恢复:识别结构体、数组、全局变量
- 算法识别:加密、哈希、压缩算法
- 文档记录:注释、重命名符号、类型定义
Common Pitfalls
常见陷阱
- Optimizer artifacts: Code may not match source structure
- Inline functions: Functions may be expanded inline
- Tail call optimization: instead of
jmp+callret - Dead code: Unreachable code from optimization
- Position-independent code: RIP-relative addressing
- 优化器产物:代码结构可能与源代码不符
- 内联函数:函数可能被内联展开
- 尾调用优化:使用而非
jmp+callret - 死代码:优化产生的不可达代码
- 位置无关代码:RIP相对寻址