Here are some side-by-side comparisons of disassembly and decompiler for PowerPC. Please maximize the window too see both columns simultaneously.
The following examples are displayed on this page:
This simple function calculates the sum of the squares of the first N natural numbers. While the function logic is obvious by just looking at the decompiler output, the assembly listing has too much noise and requires studying it. The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.
f:
.set back_chain, -0x20
.set var_4, -4
stw r31, var_4(r1)
stwu r1, back_chain(r1)
mr r31, r1
stw r3, 0x14(r31)
mr r4, r3
cmpwi r3, 0
stw r4, 8(r31)
bgt loc_30
b loc_24
loc_24:
li r3, 0
stw r3, 0x18(r31)
b loc_88
loc_30:
li r3, 0
stw r3, 0x10(r31)
stw r3, 0xC(r31)
b loc_40
loc_40:
lwz r3, 0x14(r31)
lwz r4, 0xC(r31)
cmpw r4, r3
bge loc_7C
b loc_54
loc_54:
lwz r3, 0xC(r31)
mullw r3, r3, r3
lwz r4, 0x10(r31)
add r3, r4, r3
stw r3, 0x10(r31)
b loc_6C
loc_6C:
lwz r3, 0xC(r31)
addi r3, r3, 1
stw r3, 0xC(r31)
b loc_40
loc_7C:
lwz r3, 0x10(r31)
stw r3, 0x18(r31)
b loc_88
loc_88:
lwz r3, 0x18(r31)
addi r1, r1, 0x20
lwz r31, var_4(r1)
blr
# End of function f
int __fastcall f(int a1)
{
int i; // [sp+Ch] [-14h]@3
int v3; // [sp+10h] [-10h]@3
if ( a1 )
return 0;
v3 = 0;
for ( i = 0; i < a1; ++i )
v3 += i * i;
return v3;
}
The PowerPC processor has a number of instructions which can be used to avoid branches (for example cntlzw). The decompiler restores the conditional logic and makes code easier to understand.
# _DWORD c_eq_s(void)
.globl _Z6c_eq_sv
_Z6c_eq_sv:
.set back_chain, -0x10
.set var_8, -8
.set var_4, -4
.set sender_lr, 4
stwu r1, back_chain(r1)
mflr r0
stw r0, 0x10+sender_lr(r1)
stw r30, 0x10+var_8(r1)
stw r31, 0x10+var_4(r1)
mr r31, r1
bl c
mr r9, r3
extsh r30, r9
bl s
mr r9, r3
xor r9, r30, r9
cntlzw r9, r9
srwi r9, r9, 5
clrlwi r9, r9, 24
mr r3, r9
addi r11, r31, 0x10
lwz r0, 4(r11)
mtlr r0
lwz r30, -8(r11)
lwz r31, -4(r11)
mr r1, r11
blr
# End of function c_eq_s(void)
bool c_eq_s(void)
{
int v0; // r30@1
v0 = c();
return v0 == s();
}
64-bit comparison usually involves several compare and branch instructions which do not improve the code readability.
.globl i_ge_uh
i_ge_uh:
.set back_chain, -0x10
.set var_4, -4
stwu r1, back_chain(r1)
stw r31, 0x10+var_4(r1)
mr r31, r1
lis r9, i@ha
lwz r9, i@l(r9)
mr r8, r9
srawi r9, r9, 0x1F
mr r7, r9
lis r9, uh@ha
addi r9, r9, uh@l
lwz r10, (uh+4 - uh)(r9)
lwz r9, 0(r9)
cmplw cr7, r9, r7
bgt cr7, loc_7028
cmplw cr7, r9, r7
bne cr7, loc_7020
cmplw cr7, r10, r8
bgt cr7, loc_7028
loc_7020:
li r9, 1
b loc_702C
loc_7028:
li r9, 2
loc_702C:
mr r3, r9
addi r11, r31, 0x10
lwz r31, -4(r11)
mr r1, r11
blr
# End of function i_ge_uh
signed int i_ge_uh()
{
signed int v0; // r9@2 7029 TYPED
if ( uh unsigned __int64)i )
v0 = 1;
else
v0 = 2;
return v0;
}
System call is always mysterious, but decompiler helps you with its name and arguments.
mr r3, r26 # set
bl .sigfillset
li r0, 0xAE
li r3, 2
mr r4, r26
mr r5, r29
li r6, 8
sc
mfcr r0
lwz r5, (off_F9A704C - dword_F9A7130)(r30) # sub_F9920A4 # start_routine
mr r4, r31 # attr
mr r6, r28 # arg
addi r3, r1, 0x180+var_54 # newthread
bl .pthread_create
li r0, 0xAE
mr r26, r3
mr r4, r29
li r3, 2
li r5, 0
li r6, 8
sc
mfcr r0
mr r3, r31 # attr
bl .pthread_attr_destroy
...
sigset_t v36; // [sp+8h] [-178h]@47 F992C04 TYPED
sigset_t v37; // [sp+88h] [-F8h]@47 F992BEC TYPED
pthread_attr_t v38; // [sp+108h] [-78h]@47 F992BC4 TYPED
__int16 v39; // [sp+12Ch] [-54h]@47 F992C1C
...
_sigfillset(&v37);
v29 = linux_syscall(__NR_rt_sigprocmask, 2, &v37, &v36);
v30 = _pthread_create((pthread_t *)&v39, &v38, (void *(*)(void *))0x93C10018, v11);
v31 = linux_syscall(__NR_rt_sigprocmask, 2, &v36, 0);
_pthread_attr_destroy(&v38);
Compiler sometime uses helpers and decompiler knows the meaning of the many helpers and uses it to simplify code.
.globl lldiv # weak
lldiv:
.set back_chain, -0x30
.set var_18, -0x18
.set var_14, -0x14
.set var_10, -0x10
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4
stwu r1, back_chain(r1)
mflr r0
stw r28, 0x30+var_10(r1)
mr r28, r5
stw r29, 0x30+var_C(r1)
mr r29, r6
stw r31, 0x30+var_4(r1)
mr r5, r7
mr r31, r3
mr r6, r8
mr r3, r28
mr r4, r29
stw r0, 0x30+sender_lr(r1)
stw r26, 0x30+var_18(r1)
mr r26, r7
stw r27, 0x30+var_14(r1)
mr r27, r8
stw r30, 0x30+var_8(r1)
bl __divdi3
stw r3, 0(r31)
mr r5, r26
stw r4, 4(r31)
mr r6, r27
mr r3, r28
mr r4, r29
bl __moddi3
lwz r0, 0x30+sender_lr(r1)
stw r3, 8(r31)
mr r3, r31
stw r4, 0xC(r31)
mtlr r0
lwz r26, 0x30+var_18(r1)
lwz r27, 0x30+var_14(r1)
lwz r28, 0x30+var_10(r1)
lwz r29, 0x30+var_C(r1)
lwz r30, 0x30+var_8(r1)
lwz r31, 0x30+var_4(r1)
addi r1, r1, 0x30
blr
# End of function lldiv
__int64 *__fastcall lldiv(__int64 *result, int a2, __int64 a3, __int64 a4)
{
*result = a3 / a4;
result[1] = a3 % a4;
return result;
}
The PowerPC processor contains a number of complex floating point instructions which perform several operations at once. It is not easy to recover an expression from the assembler code but not for the decompiler.
.globl _x2y2m1f
_x2y2m1f:
lis r9, unk_20@ha
lfs f0, unk_20@l(r9)
fsub f12, f1, f0
fadd f0, f1, f0
fmul f0, f12, f0
fmadd f1, f1, f2, f0
blr
# End of function _x2y2m1f
double __fastcall x2y2m1f(double a1, double a2)
{
return a1 * ((a1 - 1.0) * (a1 + 1.0)) + a2;
}
Compilers can decompose a multiplication/division instruction into a sequence of cheaper instructions (additions, shifts, etc). This example demonstrates how the decompiler recognizes them and coagulates back to the original operation.
# __int64 __fastcall int_u_mod_10()
.globl int_u_mod_10
int_u_mod_10:
.set back_chain, -0x20
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4
stwu r1, back_chain(r1)
mflr r0
stw r0, 0x20+sender_lr(r1)
stw r29, 0x20+var_C(r1)
stw r30, 0x20+var_8(r1)
stw r31, 0x20+var_4(r1)
mr r31, r1
bl u
mr r10, r3
lis r9, -0x3334
ori r9, r9, 0xCCCD # 0xCCCCCCCD
mulhwu r9, r10, r9
srwi r9, r9, 3
mulli r9, r9, 0xA
subf r9, r9, r10
mr r30, r9
li r29, 0
mr r9, r29
mr r10, r30
mr r3, r9
mr r4, r10
addi r11, r31, 0x20
lwz r0, 4(r11)
mtlr r0
lwz r29, -0xC(r11)
lwz r30, -8(r11)
lwz r31, -4(r11)
mr r1, r11
blr
# End of function int_u_mod_10
__int64 __fastcall int_u_mod_10()
{
return u() % 0xAu;
}
This example demonstrates that the decompiler can handle VLE code without problems.
sub_498E:
se_mr r6, r3
se_mr r7, r4
se_add r7, r6
se_subi r7, 1
se_li r5, 0
se_b loc_49A2
# ---------------------------------------------------------------------------
loc_499A:
se_lbz r4, 0(r6)
se_add r5, r4
se_extzh r5
se_addi r6, 1
loc_49A2:
se_cmpl r6, r7
se_ble loc_499A
se_mr r7, r5
se_mr r3, r7
se_blr
# End of function sub_498E
int __fastcall sub_498E(unsigned __int8 *a1, int a2)
{
unsigned __int8 *v2; // r6@1 498F TYPED
int v3; // r5@1 4997
v2 = a1;
v3 = 0;
while ( v2 a1[a2 - 1] )
v3 = (unsigned __int16)(v3 + *v2++);
return v3;
}
The pseudocode is not something static because the decompiler is interactive the same way as IDA. You can change variable types and names, change function prototypes, add comments and more. The example above presents the result after these modifications.
Surely the result is not ideal, and there is a lot of room for improvement, but we hope that you got the idea.
And you can compare the result with the original: http://lxr.free-electrons.com/source/fs/fat/namei_msdos.c#L224
# int __fastcall msdos_add_entry(struct inode *_dir, const unsigned __int8 *name, int is_dir, int is_hid,
int cluster, struct timespec *_ts, struct fat_slot_info *_sinfo)
msdos_add_entry:
.set back_chain, -0x50
.set de, -0x48
.set date, -0x28
.set time, -0x26
.set var_14, -0x14
.set sender_lr, 4
mflr r0
stw r0, sender_lr(r1)
bl _mcount
stwu r1, back_chain(r1)
mflr r0
stmw r27, 0x50+var_14(r1)
stw r0, 0x50+sender_lr(r1)
subfic r5, r5, 0
mr. r30, r6
lwz r0, 0(r4)
subfe r10, r10, r10
mr r31, r3
lwz r11, 4(r4)
lwz r3, 0x1C(r3)
clrrwi r10, r10, 4
mr r29, r7
lhz r5, 8(r4)
addi r10, r10, 0x20
mr r28, r8
lbz r6, 0xA(r4)
mr r27, r9
lwz r3, 0x2B8(r3)
stw r0, 0x50+de(r1)
stw r11, 0x50+de.name+4(r1)
sth r5, 0x50+de.name+8(r1)
stb r6, 0x50+de.name+0xA(r1)
stb r10, 0x50+de.attr(r1)
beq loc_728
ori r10, r10, 2
li r9, 0
li r7, 0
addi r6, r1, 0x50+date
stb r10, 0x50+de.attr(r1)
addi r5, r1, 0x50+time
mr r4, r8
stb r9, 0x50+de.lcase(r1)
bl fat_time_unix2fat
lhz r9, 0x50+time(r1)
li r10, 0
sth r10, 0x50+de.adate(r1)
sth r9, 0x50+de.time(r1)
lhz r9, 0x50+date(r1)
sth r10, 0x50+de.cdate(r1)
sth r10, 0x50+de.ctime(r1)
stb r10, 0x50+de.ctime_cs(r1)
sth r9, 0x50+de.date(r1)
loc_698:
addi r10, r1, 0x50+de.start
srawi r9, r29, 0x10
sthbrx r29, r0, r10
addi r10, r1, 0x50+de.starthi
mr r6, r27
sthbrx r9, r0, r10
li r5, 1
li r9, 0
addi r4, r1, 0x50+de
mr r3, r31
stw r9, 0x50+de.size(r1)
bl fat_add_entries
mr. r30, r3
bne loc_710
lwz r10, 0(r28)
lwz r11, 4(r28)
stw r10, 0x48(r31)
stw r10, 0x50(r31)
stw r11, 0x4C(r31)
stw r11, 0x54(r31)
lwz r9, 0x1C(r31)
lwz r9, 0x34(r9)
andi. r10, r9, 0x90
bne loc_704
lwz r9, 0xC(r31)
andi. r10, r9, 0x41
beq loc_768
loc_704:
mr r3, r31 # struct inode *
li r30, 0
bl fat_sync_inode
loc_710:
mr r3, r30
lwz r0, 0x50+sender_lr(r1)
lmw r27, 0x50+var_14(r1)
addi r1, r1, 0x50
mtlr r0
blr
# ---------------------------------------------------------------------------
loc_728:
li r7, 0
addi r6, r1, 0x50+date
stb r30, 0x50+de.lcase(r1)
addi r5, r1, 0x50+time
mr r4, r8
bl fat_time_unix2fat
li r9, 0
sth r30, 0x50+de.adate(r1)
stb r9, 0x50+de.ctime_cs(r1)
lhz r9, 0x50+time(r1)
sth r30, 0x50+de.cdate(r1)
sth r9, 0x50+de.time(r1)
lhz r9, 0x50+date(r1)
sth r30, 0x50+de.ctime(r1)
sth r9, 0x50+de.date(r1)
b loc_698
# ---------------------------------------------------------------------------
loc_768:
mr r3, r31 # struct inode *
li r4, 7
bl __mark_inode_dirty
mr r3, r3
lwz r0, 0x50+sender_lr(r1)
lmw r27, 0x50+var_14(r1)
addi r1, r1, 0x50
mtlr r0
blr
# End of function msdos_add_entry
int __fastcall msdos_add_entry(struct inode *dir, const unsigned __int8 *name, int is_dir, int is_hid,
int cluster, struct timespec *ts, struct fat_slot_info *sinfo)
{
__int16 zero; // r30@1 601
bool not_hidden; // cr34@1 601 TYPED
int v10; // r11@1 611
signed int v11; // r10@1 619 TYPED
__int16 v13; // r5@1 621
__u8 node_attrs; // r10@1 625 TYPED
__u8 v16; // r6@1 62D TYPED
struct msdos_sb_info *sbi; // r3@1 635 TYPED
int err; // r30@3 6C9 TYPED
__time_t sec; // r10@4 6D1 TYPED
__syscall_slong_t nsec; // r11@4 6D5 TYPED
struct msdos_dir_entry de; // [sp+8h] [-48h]@1 639 TYPED
__le16 date; // [sp+28h] [-28h]@2 670 TYPED
__le16 time; // [sp+2Ah] [-26h]@2 670 TYPED
zero = is_hid;
not_hidden = is_hid == 0;
v10 = *((_DWORD *)name + 1);
v11 = (unsigned int)is_dir <= 0 ? 0 : -16;
v13 = *((_WORD *)name + 4);
node_attrs = v11 + ATTR_ARCH; // ATTR_ARCH or ATTR_DIR
v16 = name[10];
sbi = (struct msdos_sb_info *)dir->i_sb->s_fs_info;
*(_DWORD *)&de.name[0] = *(_DWORD *)name; // memcpy(&de.name[0], name, 12);
*(_DWORD *)&de.name[4] = v10; // ...
*(_WORD *)&de.name[8] = v13; // ...
de.name[10] = v16;
de.attr = node_attrs;
if ( not_hidden )
{
de.lcase = zero; // = 0
fat_time_unix2fat(sbi, ts, &time, &date, 0);
de.adate = zero;
de.ctime_cs = 0;
de.cdate = zero;
de.time = time;
de.ctime = zero;
de.date = date;
}
else
{
de.attr = node_attrs | ATTR_HIDDEN;
de.lcase = 0;
fat_time_unix2fat(sbi, ts, &time, &date, 0);
de.adate = 0;
de.time = time;
de.cdate = 0;
de.ctime = 0;
de.ctime_cs = 0;
de.date = date;
}
de.start = _byteswap_ushort(cluster);
de.starthi = _byteswap_ushort(HIWORD(cluster));
de.size = 0;
err = fat_add_entries(dir, &de, 1, sinfo);
if ( err )
return err;
sec = ts->tv_sec;
nsec = ts->tv_nsec;
dir->i_mtime.tv_sec = ts->tv_sec;
dir->i_ctime.tv_sec = sec;
dir->i_mtime.tv_nsec = nsec;
dir->i_ctime.tv_nsec = nsec;
if ( dir->i_sb->s_flags & (MS_DIRSYNC|MS_SYNCHRONOUS) || dir->i_flags & (S_DIRSYNC|S_SYNC) )
{
err = 0;
fat_sync_inode(dir);
return err;
}
_mark_inode_dirty(dir);
return 0;
}