Comparisons of PowerPC disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for PowerPC. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Simple code

This simple function calculates the sum of the squares of the first N natural numbers. While the function logic is obvious by just looking at the decompiler output, the assembly listing has too much noise and requires studying it. The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.

f:
 .set back_chain, -0x20
 .set var_4, -4
                stw       r31, var_4(r1)
                stwu      r1, back_chain(r1)
                mr        r31, r1
                stw       r3, 0x14(r31)
                mr        r4, r3
                cmpwi     r3, 0
                stw       r4, 8(r31)
                bgt       loc_30
                b         loc_24
 loc_24:
                li        r3, 0
                stw       r3, 0x18(r31)
                b         loc_88
 loc_30:
                li        r3, 0
                stw       r3, 0x10(r31)
                stw       r3, 0xC(r31)
                b         loc_40
 loc_40:
                lwz       r3, 0x14(r31)
                lwz       r4, 0xC(r31)
                cmpw      r4, r3
                bge       loc_7C
                b         loc_54
 loc_54:
                lwz       r3, 0xC(r31)
                mullw     r3, r3, r3
                lwz       r4, 0x10(r31)
                add       r3, r4, r3
                stw       r3, 0x10(r31)
                b         loc_6C
 loc_6C:
                lwz       r3, 0xC(r31)
                addi      r3, r3, 1
                stw       r3, 0xC(r31)
                b         loc_40
 loc_7C:
                lwz       r3, 0x10(r31)
                stw       r3, 0x18(r31)
                b         loc_88
 loc_88:
                lwz       r3, 0x18(r31)
                addi      r1, r1, 0x20
                lwz       r31, var_4(r1)
                blr
 # End of       function f

Linear execution

The PowerPC processor has a number of instructions which can be used to avoid branches (for example cntlzw). The decompiler restores the conditional logic and makes code easier to understand.

# _DWORD c_eq_s(void)
.globl _Z6c_eq_sv
_Z6c_eq_sv:

.set back_chain, -0x10
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r0, 0x10+sender_lr(r1)
                stw       r30, 0x10+var_8(r1)
                stw       r31, 0x10+var_4(r1)
                mr        r31, r1
                bl        c
                mr        r9, r3
                extsh     r30, r9
                bl        s
                mr        r9, r3
                xor       r9, r30, r9
                cntlzw    r9, r9
                srwi      r9, r9, 5
                clrlwi    r9, r9, 24
                mr        r3, r9
                addi      r11, r31, 0x10
                lwz       r0, 4(r11)
                mtlr      r0
                lwz       r30, -8(r11)
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of        function c_eq_s(void)

64-bit comparison

64-bit comparison usually involves several compare and branch instructions which do not improve the code readability.

.globl i_ge_uh
i_ge_uh:

.set back_chain, -0x10
.set var_4, -4

                stwu      r1, back_chain(r1)
                stw       r31, 0x10+var_4(r1)
                mr        r31, r1
                lis       r9, i@ha
                lwz       r9, i@l(r9)
                mr        r8, r9
                srawi     r9, r9, 0x1F
                mr        r7, r9
                lis       r9, uh@ha
                addi      r9, r9, uh@l
                lwz       r10, (uh+4 - uh)(r9)
                lwz       r9, 0(r9)
                cmplw     cr7, r9, r7
                bgt       cr7, loc_7028
                cmplw     cr7, r9, r7
                bne       cr7, loc_7020
                cmplw     cr7, r10, r8
                bgt       cr7, loc_7028
loc_7020:
                li        r9, 1
                b         loc_702C
loc_7028:
                li        r9, 2
loc_702C:
                mr        r3, r9
                addi      r11, r31, 0x10
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of      function i_ge_uh

System calls

System call is always mysterious, but decompiler helps you with its name and arguments.

 mr        r3, r26 # set
                bl .sigfillset
                li        r0, 0xAE
                li        r3, 2
                mr        r4, r26
                mr        r5, r29
                li        r6, 8
                sc
                mfcr      r0
                lwz       r5, (off_F9A704C - dword_F9A7130)(r30) # sub_F9920A4 # start_routine
                mr        r4, r31 # attr
                mr        r6, r28 # arg
                addi      r3, r1, 0x180+var_54 # newthread
                bl .pthread_create
                li        r0, 0xAE
                mr        r26, r3
                mr        r4, r29
                li        r3, 2
                li        r5, 0
                li        r6, 8
                sc
                mfcr      r0
                mr        r3, r31 # attr
                bl .pthread_attr_destroy

Compiler helpers

Compiler sometime uses helpers and decompiler knows the meaning of the many helpers and uses it to simplify code.

.globl lldiv # weak
lldiv:

.set back_chain, -0x30
.set var_18, -0x18
.set var_14, -0x14
.set var_10, -0x10
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r28, 0x30+var_10(r1)
                mr        r28, r5
                stw       r29, 0x30+var_C(r1)
                mr        r29, r6
                stw       r31, 0x30+var_4(r1)
                mr        r5, r7
                mr        r31, r3
                mr        r6, r8
                mr        r3, r28
                mr        r4, r29
                stw       r0, 0x30+sender_lr(r1)
                stw       r26, 0x30+var_18(r1)
                mr        r26, r7
                stw       r27, 0x30+var_14(r1)
                mr        r27, r8
                stw       r30, 0x30+var_8(r1)
                bl        __divdi3
                stw       r3, 0(r31)
                mr        r5, r26
                stw       r4, 4(r31)
                mr        r6, r27
                mr        r3, r28
                mr        r4, r29
                bl        __moddi3
                lwz       r0, 0x30+sender_lr(r1)
                stw       r3, 8(r31)
                mr        r3, r31
                stw       r4, 0xC(r31)
                mtlr      r0
                lwz       r26, 0x30+var_18(r1)
                lwz       r27, 0x30+var_14(r1)
                lwz       r28, 0x30+var_10(r1)
                lwz       r29, 0x30+var_C(r1)
                lwz       r30, 0x30+var_8(r1)
                lwz       r31, 0x30+var_4(r1)
                addi      r1, r1, 0x30
                blr
# End of     function lldiv

Floating point arithmetic

The PowerPC processor contains a number of complex floating point instructions which perform several operations at once. It is not easy to recover an expression from the assembler code but not for the decompiler.

                .globl _x2y2m1f
_x2y2m1f:
                lis	 r9, unk_20@ha
                lfs	 f0, unk_20@l(r9)
                fsub	 f12, f1, f0
                fadd	 f0, f1, f0
                fmul	 f0, f12, f0
                fmadd	 f1, f1, f2, f0
                blr
# End of	function _x2y2m1f

Magic multiplication/division operations

Compilers can decompose a multiplication/division instruction into a sequence of cheaper instructions (additions, shifts, etc). This example demonstrates how the decompiler recognizes them and coagulates back to the original operation.

# __int64 __fastcall int_u_mod_10()
                .globl int_u_mod_10
int_u_mod_10:

.set back_chain, -0x20
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r0, 0x20+sender_lr(r1)
                stw       r29, 0x20+var_C(r1)
                stw       r30, 0x20+var_8(r1)
                stw       r31, 0x20+var_4(r1)
                mr        r31, r1
                bl        u
                mr        r10, r3
                lis       r9, -0x3334
                ori       r9, r9, 0xCCCD # 0xCCCCCCCD
                mulhwu    r9, r10, r9
                srwi      r9, r9, 3
                mulli     r9, r9, 0xA
                subf      r9, r9, r10
                mr        r30, r9
                li        r29, 0
                mr        r9, r29
                mr        r10, r30
                mr        r3, r9
                mr        r4, r10
                addi      r11, r31, 0x20
                lwz       r0, 4(r11)
                mtlr      r0
                lwz       r29, -0xC(r11)
                lwz       r30, -8(r11)
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of      function int_u_mod_10

VLE code

This example demonstrates that the decompiler can handle VLE code without problems.

sub_498E:
                se_mr     r6, r3
                se_mr     r7, r4
                se_add    r7, r6
                se_subi   r7, 1
                se_li     r5, 0
                se_b      loc_49A2
# ---------------------------------------------------------------------------
loc_499A:
                se_lbz    r4, 0(r6)
                se_add    r5, r4
                se_extzh  r5
                se_addi   r6, 1
loc_49A2:
                se_cmpl   r6, r7
                se_ble    loc_499A
                se_mr     r7, r5
                se_mr     r3, r7
                se_blr
# End of function sub_498E

Interactive decompiler

The pseudocode is not something static because the decompiler is interactive the same way as IDA. You can change variable types and names, change function prototypes, add comments and more. The example above presents the result after these modifications.

Surely the result is not ideal, and there is a lot of room for improvement, but we hope that you got the idea.

And you can compare the result with the original: http://lxr.free-electrons.com/source/fs/fat/namei_msdos.c#L224

# int __fastcall msdos_add_entry(struct inode *_dir, const unsigned __int8 *name, int is_dir, int is_hid,
int cluster, struct timespec *_ts, struct fat_slot_info *_sinfo)
msdos_add_entry:

.set back_chain, -0x50
.set de, -0x48
.set date, -0x28
.set time, -0x26
.set var_14, -0x14
.set sender_lr, 4

                mflr      r0
                stw       r0, sender_lr(r1)
                bl        _mcount
                stwu      r1, back_chain(r1)
                mflr      r0
                stmw      r27, 0x50+var_14(r1)
                stw       r0, 0x50+sender_lr(r1)
                subfic    r5, r5, 0
                mr.       r30, r6
                lwz       r0, 0(r4)
                subfe     r10, r10, r10
                mr        r31, r3
                lwz       r11, 4(r4)
                lwz       r3, 0x1C(r3)
                clrrwi    r10, r10, 4
                mr        r29, r7
                lhz       r5, 8(r4)
                addi      r10, r10, 0x20
                mr        r28, r8
                lbz       r6, 0xA(r4)
                mr        r27, r9
                lwz       r3, 0x2B8(r3)
                stw       r0, 0x50+de(r1)
                stw       r11, 0x50+de.name+4(r1)
                sth       r5, 0x50+de.name+8(r1)
                stb       r6, 0x50+de.name+0xA(r1)
                stb       r10, 0x50+de.attr(r1)
                beq       loc_728
                ori       r10, r10, 2
                li        r9, 0
                li        r7, 0
                addi      r6, r1, 0x50+date
                stb       r10, 0x50+de.attr(r1)
                addi      r5, r1, 0x50+time