1 of 7

Comparisons of ARM disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for ARM. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Simple case
64-bit arithmetics
Conditional instructions
Conditional instructions - 2
Complex instructions
Compiler helper functions
Immediate constants
Position independent code

Simple case

Let's start with a very simple function. It accepts a pointer to a structure and zeroes out its first three fields. While the function logic is obvious by just looking at the decompiler output, the assembly listing has too much noise and requires studying it.

The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.

; struct_result *__fastcall sub_210DC(struct_result *result)                 
                                         
 var_10          = -0x10                                                      
 var_4           = -4                                                         
                                                                              
                 MOV     R12, SP                                              
                 STMFD   SP!, {R0}                                            
                 STMFD   SP!, {R12,LR}                                        
                 SUB     SP, SP, #4                                           
                 LDR     R2, [SP,#0x10+var_4]
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 ADD     R2, R3, #4
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 ADD     R2, R3, #8
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 STR     R3, [SP,#0x10+var_10]
                 LDR     R0, [SP,#0x10+var_10]
                 ADD     SP, SP, #4
                 LDMFD   SP, {SP,LR}
                 BX      LR
 ; End of function sub_210DC

struct_result *__fastcall sub_210DC(struct_result *result)
{
  result->dword0 = 0;
  result->dword4 = 0;
  result->dword8 = 0;
  return result;
}

64-bit arithmetics

Sorry for a long code snippet, ARM code tends to be longer compared to x86 code. This makes our comparison even more impressive: look at how concise is the decompiler output!

 ; bool __cdecl uh_gt_uc()                                                    
                 EXPORT _uh_gt_uc__YA_NXZ                                     
 _uh_gt_uc__YA_NXZ                       ; DATA XREF: .pdata:$T7452o          
                                                                              
 var_2C          = -0x2C                                                      
 var_28          = -0x28                                                      
 var_24          = -0x24                                                      
 var_20          = -0x20                                                      
 var_1C          = -0x1C                                                      
 var_18          = -0x18                                                      
 var_14          = -0x14                                                      
 var_10          = -0x10                                                      
 var_C           = -0xC                                                       
 var_8           = -8                                                         
 var_4           = -4                                                         
                                                                              
                 STR     LR, [SP,#var_4]! ; $M7441                            
                                         ; $LN8@uh_gt_uc                      
                 SUB     SP, SP, #0x28                                        

 $M7449
                 BL      uh
                 STR     R1, [SP,#0x2C+var_24]
                 STR     R0, [SP,#0x2C+var_28]
                 BL      uc
                 STRB    R0, [SP,#0x2C+var_20]
                 LDRB    R3, [SP,#0x2C+var_20]
                 STR     R3, [SP,#0x2C+var_1C]
                 LDR     R1, [SP,#0x2C+var_1C]
                 LDR     R3, [SP,#0x2C+var_1C]
                 MOV     R2, R3,ASR#31
                 LDR     R3, [SP,#0x2C+var_28]
                 STR     R3, [SP,#0x2C+var_18]
                 LDR     R3, [SP,#0x2C+var_24]
                 STR     R3, [SP,#0x2C+var_14]
                 LDR     R3, [SP,#0x2C+var_18]
                 STR     R3, [SP,#0x2C+var_10]
                 STR     R1, [SP,#0x2C+var_C]
                 LDR     R3, [SP,#0x2C+var_14]
                 CMP     R3, R2
                 BCC     $LN3_8

 loc_6AC
                 BHI     $LN5_0

 loc_6B0
                 LDR     R2, [SP,#0x2C+var_10]
                 LDR     R3, [SP,#0x2C+var_C]
                 CMP     R2, R3
                 BLS     $LN3_8

 $LN5_0
                 MOV     R3, #1
                 STR     R3, [SP,#0x2C+var_8]
                 B       $LN4_8
 ; ---------------------------------------------------------------------------

 $LN3_8
                                         ; uh_gt_uc(void)+68j
                 MOV     R3, #0
                 STR     R3, [SP,#0x2C+var_8]

 $LN4_8
                 LDR     R3, [SP,#0x2C+var_8]
                 AND     R3, R3, #0xFF
                 STRB    R3, [SP,#0x2C+var_2C]
                 LDRB    R0, [SP,#0x2C+var_2C]
                 ADD     SP, SP, #0x28
                 LDR     PC, [SP+4+var_4],#4
 ; End of function uh_gt_uc(void)

bool __fastcall uh_gt_uc()
{
  unsigned __int64 v0; // ST04_8@1

  v0 = uh();
  return v0 > uc();
}

Conditional instructions

The ARM processor has conditional instructions that can shorten the code but require high attention from the reader. The case above is very simple, just note that there is a pair of instructions: MOVNE and LDREQSH. Only one of them will be executed at once. This is how simple if-then-else looks in ARM.

The pseudocode shows it much better and does not require any explanations.

A quiz question: did you notice that MOVNE loads zero to R0? (because I didn't:)

Also note that in the disassembly listing we see var_8 but the location really used is var_A, which corresponds to v4.

; int __cdecl ReadShort(void *, unsigned __int32 offset, int whence)         
 ReadShort                                                                    
                                                                              
 whence          = -0x18                                                      
 var_A           = -0xA                                                       
 var_8           = -8                                                         
                                                                              
                 STMFD   SP!, {R4,LR}                                         
                 SUB     SP, SP, #0x10   ; whence                             
                 MOV     R4, #0
                 ADD     R3, SP, #0x18+var_8
                 STRH    R4, [R3,#-2]!
                 STR     R2, [SP,#0x18+whence] ; whence
                 MOV     R2, R3          ; buffer
                 MOV     R3, #2          ; len
                 BL      ReadData
                 CMP     R0, R4
                 MOVNE   R0, R4
                 LDREQSH R0, [SP,#0x18+var_A]
                 ADD     SP, SP, #0x10
                 LDMFD   SP!, {R4,PC}
 ; End of function ReadShort

int __cdecl ReadShort(void *a1, unsigned __int32 offset, int whence)
{
  int result; // r0@2
  __int16 v4; // [sp+Eh] [bp-Ah]@1

  v4 = 0;
  if ( ReadData(a1, offset, &v4, 2u, whence) )
    result = 0;
  else
    result = v4;
  return result;
}

Conditional instructions - 2

Look, the decompiler output is longer! This is a rare case when the pseudocode is longer than the disassembly listing, but it is a for a good cause: to keep it readable. There are so many conditional instructions here, it is very easy to misunderstand the dependencies. For example, did you notice that the first MOVEQ may use the condition codes set by CMP? The subtle detail is that CMPNE may be skipped and the condition codes set by CMP may reach MOVEQs.

The decompiler represented it perfectly well. I renamed some variables and set their types, but this was an easy task.

; signed int __fastcall get_next_byte(entry_t *entry)
 get_next_byte                           ; DATA XREF: sub_3BC+30o
                                         ;
                 LDR     R2, [R0,#4]
                 CMP     R2, #0
                 LDRNE   R3, [R0]
                 LDRNEB  R1, [R3],#1
                 CMPNE   R1, #0
                 MOVEQ   R1, #1
                 STREQ   R1, [R0,#0xC]
                 MOVEQ   R0, 0xFFFFFFFF
                 MOVEQ   PC, LR
                 SUB     R2, R2, #1
                 STR     R2, [R0,#4]
                 STR     R3, [R0]
                 MOV     R0, R1
                 RET
 ; End of function get_next_byte

signed int __fastcall get_next_byte(entry_t *entry)
{
  signed int chr; // r1@0
  unsigned __int8 *ptr; // r3@0
  int count; // r2@1
  char done; // zf@1
  signed int result; // r0@4

  count = entry->count;
  done = count == 0;
  if ( count )
  {
    ptr = entry->ptr + 1;
    chr = *entry->ptr;
    done = chr == 0;
  }
  if ( done )
  {
    entry->done = 1;
    result = -1;
  }
  else
  {
    entry->count = count - 1;
    entry->ptr = ptr;
    result = chr;
  }
  return result;
}

Complex instructions

Conditional instructions are just part of the story. ARM is also famous for having a plethora of data movement instructions. They come with a set of possible suffixes that subtly change the meaning of the instruction. Take STMCSIA, for example. It is a STM instruction, but then you have to remember that CS means "carry set" and IA means "increment after".

In short, the disassembly listing is like Chinese. The pseudocode is longer but requires much less time to understand.

; void __fastcall sub_2A38(list_t *ptr, unsigned int a2)
 sub_2A38                                ; CODE XREF: sub_5C8+48p
                                         ; sub_648+5Cp ...
                 MOV     R2, #0
                 STMFD   SP!, {LR}                                            
                 MOV     R3, R2
                 MOV     R12, R2
                 MOV     LR, R2
                 SUBS    R1, R1, #0x20

 loc_2A50                                ; CODE XREF: sub_2A38+24j
                 STMCSIA R0!, {R2,R3,R12,LR}
                 STMCSIA R0!, {R2,R3,R12,LR}
                 SUBCSS  R1, R1, #0x20
                 BCS     loc_2A50
                 MOVS    R1, R1,LSL#28
                 STMCSIA R0!, {R2,R3,R12,LR}
                 STMMIIA R0!, {R2,R3}
                 LDMFD   SP!, {LR}
                 MOVS    R1, R1,LSL#2
                 STRCS   R2, [R0],#4
                 MOVEQ   PC, LR
                 STRMIH  R2, [R0],#2
                 TST     R1, #0x40000000
                 STRNEB  R2, [R0],#1
                 RET
 ; End of function sub_2A38

void __fastcall sub_2A38(list_t *ptr, unsigned int a2)
{
  char copybig; // cf@1
  unsigned int size; // r1@1
  list_t *v4; // r0@3
  int remains; // r1@4
  int final; // r1@8

  copybig = a2 >= 0x20;
  size = a2 - 32;
  do
  {
    if ( !copybig )
      break;
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr->dword8 = 0;
    ptr->dwordC = 0;
    v4 = ptr + 1;
    v4->dword0 = 0;
    v4->dword4 = 0;
    v4->dword8 = 0;
    v4->dwordC = 0;
    ptr = v4 + 1;
    copybig = size >= 0x20;
    size -= 32;
  }
  while ( copybig );
  remains = size << 28;
  if ( copybig )
  {
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr->dword8 = 0;
    ptr->dwordC = 0;
    ++ptr;
  }
  if ( remains < 0 )
  {
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr = (list_t *)((char *)ptr + 8);
  }
  final = 4 * remains;
  if ( copybig )
  {
    ptr->dword0 = 0;
    ptr = (list_t *)((char *)ptr + 4);
  }
  if ( final )
  {
    if ( final < 0 )
    {
      LOWORD(ptr->dword0) = 0;
      ptr = (list_t *)((char *)ptr + 2);
    }
    if ( final & 0x40000000 )
      LOBYTE(ptr->dword0) = 0;
  }
}

Compiler helper functions

Sorry for another long code snippet. Just wanted to show you that the decompiler can handle compiler helper functions (like __divdi3) and handles 64-bit arithmetic quite well.

EXPORT op_two64                                              
     op_two64                                ; CODE XREF: refer_all+31Cp          
                                             ; main+78p                           
                                                                                  
     anonymous_1     = -0x28                                                      
     var_20          = -0x20                                                      
     anonymous_0     = -0x18                                                      
     var_10          = -0x10                                                      
     arg_0           =  4                                                         
                                                                                  
 000                 MOV     R12, SP                                              
 000                 STMFD   SP!, {R4,R11,R12,LR,PC}                              
 014                 SUB     R11, R12, #4                                         
 014                 SUB     SP, SP, #0x18                                        
 02C                 SUB     R4, R11, #-var_10
 02C                 STMDB   R4, {R0,R1}
 02C                 MOV     R1, 0xFFFFFFF0
 02C                 SUB     R12, R11, #-var_10
 02C                 ADD     R1, R12, R1
 02C                 STMIA   R1, {R2,R3}
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #1
 02C                 BNE     loc_9C44
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R4, R11, #-var_10
 02C                 LDMDB   R4, {R1,R2}
 02C                 LDMIA   R3, {R3,R4}
 02C                 ADDS    R3, R3, R1
 02C                 ADC     R4, R4, R2
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9C44                                ; CODE XREF: op_two64+30j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #2
 02C                 BNE     loc_9C7C
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R4, R11, #-var_10
 02C                 LDMDB   R4, {R1,R2}
 02C                 LDMIA   R3, {R3,R4}
 02C                 SUBS    R3, R1, R3
 02C                 SBC     R4, R2, R4
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9C7C                                ; CODE XREF: op_two64+68j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #3
 02C                 BNE     loc_9CB8
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R2, R11, #-var_10
 02C                 LDMDB   R2, {R0,R1}
 02C                 LDMIA   R3, {R2,R3}
 02C                 BL      __muldi3
 02C                 MOV     R4, R1
 02C                 MOV     R3, R0
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9CB8                                ; CODE XREF: op_two64+A0j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #4
 02C                 BNE     loc_9CF4
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R2, R11, #-var_10
 02C                 LDMDB   R2, {R0,R1}
 02C                 LDMIA   R3, {R2,R3}
 02C                 BL      __divdi3
 02C                 MOV     R4, R1
 02C                 MOV     R3, R0
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9CF4                                ; CODE XREF: op_two64+DCj
 02C                 MOV     R3, 0xFFFFFFFF
 02C                 MOV     R2, 0xFFFFFFFF
 02C                 SUB     R4, R11, #-var_20
 02C                 STMDB   R4, {R2,R3}

     loc_9D04                                ; CODE XREF: op_two64+5Cj
                                             ; op_two64+94j ...
 02C                 SUB     R12, R11, #-var_20
 02C                 LDMDB   R12, {R0,R1}
 02C                 SUB     SP, R11, #0x10
 014                 LDMFD   SP, {R4,R11,SP,PC}
     ; End of function op_two64

signed __int64 __fastcall op_two64(signed __int64 a1, signed __int64 a2, int a3)
{
  signed __int64 v4; // [sp+0h] [bp-28h]@2

  switch ( a3 )
  {
    case 1:
      v4 = a2 + a1;
      break;
    case 2:
      v4 = a1 - a2;
      break;
    case 3:
      v4 = a1 * a2;
      break;
    case 4:
      v4 = a1 / a2;
      break;
    default:
      v4 = -1LL;
      break;
  }
  return v4;
}

Immediate constants

Since ARM instructions cannot have big immediate constants, sometimes they are loaded with two instructions. There are many 0xFA (250 decimal) constants in the disassembly listing, but all of them are shifted to the left by 2 before use. The decompiler saves you from these petty details.

Also a side: the decompiler can handle ARM mode as well as Thumb mode instructions. It just does not care about the instruction encoding because it is already handled by IDA.

loc_110D6                               ; CODE XREF: sub_10E38+43Cj
                                         ; sub_10E38+442j ...
                 LDR     R1, =(tmin_ptr - 0x1CDB8)
                 LDR     R2, =(tmax_ptr - 0x1CDB8)
                 LDR     R0, =(aRttMinAvgMaxMd - 0x1CDB8)
                 LDR     R6, [R7,R1]
                 LDR     R5, [R7,R2]
                 MOVS    R3, #0xFA
                 LDR     R4, [R6]
                 LSLS    R1, R3, #2
                 LDR     R6, [R5]
                 ADDS    R5, R7, R0      ; "rtt min/avg/max/mdev = %ld.%03ld/%lu.%0"...
                 MOVS    R0, R4
                 BLX     __aeabi_idiv
                 MOV     R8, R0
                 MOVS    R0, R4
                 MOVS    R4, #0xFA
                 LSLS    R1, R4, #2
                 BLX     __aeabi_idivmod
                 LDR     R3, =0
                 LDR     R2, =0x3E8
                 MOVS    R4, R1
                 LDR     R0, [SP,#0x78+var_40]
                 LDR     R1, [SP,#0x78+var_40+4]
                 BLX     __aeabi_ldivmod
                 LDR     R3, =0
                 LDR     R2, =0x3E8
                 STR     R0, [SP,#0x78+var_50]
                 STR     R1, [SP,#0x78+var_4C]
                 LDR     R0, [SP,#0x78+var_40]
                 LDR     R1, [SP,#0x78+var_40+4]
                 BLX     __aeabi_ldivmod
                 MOVS    R1, #0xFA
                 MOVS    R0, R6
                 LSLS    R1, R1, #2
                 STR     R2, [SP,#0x78+var_78]
                 BLX     __aeabi_idiv
                 STR     R0, [SP,#0x78+var_74]
                 MOVS    R0, R6
                 MOVS    R6, #0xFA
                 LSLS    R1, R6, #2
                 BLX     __aeabi_idivmod
                 MOVS    R2, #0xFA
                 STR     R1, [SP,#0x78+var_70]
                 LDR     R0, [SP,#0x78+var_38]
                 LSLS    R1, R2, #2
                 BLX     __aeabi_idiv
                 MOVS    R3, #0xFA
                 STR     R0, [SP,#0x78+var_6C]
                 LSLS    R1, R3, #2
                 LDR     R0, [SP,#0x78+var_38]
                 BLX     __aeabi_idivmod
                 MOVS    R0, R5          ; format
                 STR     R1, [SP,#0x78+var_68]
                 MOVS    R2, R4
                 MOV     R1, R8
                 LDR     R3, [SP,#0x78+var_50]
                 BLX     printf

 printf(
      "rtt min/avg/max/mdev = %ld.%03ld/%lu.%03ld/%ld.%03ld/%ld.%03ld ms",
      tmin / 1000,
      tmin % 1000,
      v27 / 1000,
      v27 % 1000,
      tmax / 1000,
      tmax % 1000,
      v28 / 1000,
      v28 % 1000);

Position independent code

In some case the disassembly listing can be misleading, especially with PIC (position independent code). While the address of a constant string is loaded into R12, the code does not care about it. It is just how variable addresses are calculated in PIC-code (it is .got-someoffset). Such calculations are very frequent in shared objects and unfortunately IDA cannot handle all of them. But the decompiler did a great job of tracing R12.

sub_65768                               ; DATA XREF: .data:007E37A4o         
                                                                              
 var_18          = -0x18                                                      
 var_14          = -0x14                                                      
 var_10          = -0x10                                                      
 arg_0           =  0                                                         
                                                                              
                 PUSH    {LR}                                                 
                 LDR.W   R12, =aResponsetype ; "responseType"
                 SUB     SP, SP, #0x14                                        
                 ADR.W   LR, loc_65774

 loc_65774                               ; DATA XREF: sub_65768+8o
                 ADD     R12, LR
                 LDR.W   LR, [SP,#0x18+arg_0]
                 STR.W   LR, [SP,#0x18+var_18]
                 MOV.W   LR, #0x10
                 STR.W   LR, [SP,#0x18+var_14]
                 LDR.W   LR, =0xFFF0883C
                 ADD     R12, LR
                 STR.W   R12, [SP,#0x18+var_10]
                 BL      sub_65378
                 ADD     SP, SP, #0x14
                 POP     {PC}
 ; End of function sub_65768

int __fastcall sub_65768(int a1, int a2, int a3, int a4, int a5)
{
  return sub_65378(a1, a2, a3, a4, a5, 16, (int)myarray);
}

Comparisons of PowerPC disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for PowerPC. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Simple code
Linear execution
64-bit comparison
System calls
Compiler helpers
Floating point arithmetic
Magic multiplication/division operations
VLE code
Interactive decompiler

Simple code

This simple function calculates the sum of the squares of the first N natural numbers. While the function logic is obvious by just looking at the decompiler output, the assembly listing has too much noise and requires studying it. The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.

f:
 .set back_chain, -0x20
 .set var_4, -4
                stw       r31, var_4(r1)
                stwu      r1, back_chain(r1)
                mr        r31, r1
                stw       r3, 0x14(r31)
                mr        r4, r3
                cmpwi     r3, 0
                stw       r4, 8(r31)
                bgt       loc_30
                b         loc_24
 loc_24:
                li        r3, 0
                stw       r3, 0x18(r31)
                b         loc_88
 loc_30:
                li        r3, 0
                stw       r3, 0x10(r31)
                stw       r3, 0xC(r31)
                b         loc_40
 loc_40:
                lwz       r3, 0x14(r31)
                lwz       r4, 0xC(r31)
                cmpw      r4, r3
                bge       loc_7C
                b         loc_54
 loc_54:
                lwz       r3, 0xC(r31)
                mullw     r3, r3, r3
                lwz       r4, 0x10(r31)
                add       r3, r4, r3
                stw       r3, 0x10(r31)
                b         loc_6C
 loc_6C:
                lwz       r3, 0xC(r31)
                addi      r3, r3, 1
                stw       r3, 0xC(r31)
                b         loc_40
 loc_7C:
                lwz       r3, 0x10(r31)
                stw       r3, 0x18(r31)
                b         loc_88
 loc_88:
                lwz       r3, 0x18(r31)
                addi      r1, r1, 0x20
                lwz       r31, var_4(r1)
                blr
 # End of       function f

int __fastcall f(int a1)
{
  int i; // [sp+Ch] [-14h]@3
  int v3; // [sp+10h] [-10h]@3

  if ( a1 )
      return 0;
  v3 = 0;
  for ( i = 0; i < a1; ++i )
    v3 += i * i;
  return v3;
}

Linear execution

The PowerPC processor has a number of instructions which can be used to avoid branches (for example cntlzw). The decompiler restores the conditional logic and makes code easier to understand.

# _DWORD c_eq_s(void)
.globl _Z6c_eq_sv
_Z6c_eq_sv:

.set back_chain, -0x10
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r0, 0x10+sender_lr(r1)
                stw       r30, 0x10+var_8(r1)
                stw       r31, 0x10+var_4(r1)
                mr        r31, r1
                bl        c
                mr        r9, r3
                extsh     r30, r9
                bl        s
                mr        r9, r3
                xor       r9, r30, r9
                cntlzw    r9, r9
                srwi      r9, r9, 5
                clrlwi    r9, r9, 24
                mr        r3, r9
                addi      r11, r31, 0x10
                lwz       r0, 4(r11)
                mtlr      r0
                lwz       r30, -8(r11)
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of        function c_eq_s(void)

bool c_eq_s(void)
{
  int v0; // r30@1

  v0 = c();
  return v0 == s();
}

64-bit comparison

64-bit comparison usually involves several compare and branch instructions which do not improve the code readability.

.globl i_ge_uh
i_ge_uh:

.set back_chain, -0x10
.set var_4, -4

                stwu      r1, back_chain(r1)
                stw       r31, 0x10+var_4(r1)
                mr        r31, r1
                lis       r9, i@ha
                lwz       r9, i@l(r9)
                mr        r8, r9
                srawi     r9, r9, 0x1F
                mr        r7, r9
                lis       r9, uh@ha
                addi      r9, r9, uh@l
                lwz       r10, (uh+4 - uh)(r9)
                lwz       r9, 0(r9)
                cmplw     cr7, r9, r7
                bgt       cr7, loc_7028
                cmplw     cr7, r9, r7
                bne       cr7, loc_7020
                cmplw     cr7, r10, r8
                bgt       cr7, loc_7028
loc_7020:
                li        r9, 1
                b         loc_702C
loc_7028:
                li        r9, 2
loc_702C:
                mr        r3, r9
                addi      r11, r31, 0x10
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of      function i_ge_uh

signed int i_ge_uh()
{
  signed int v0; // r9@2 7029 TYPED

  if ( uh unsigned __int64)i )
      v0 = 1;
  else
      v0 = 2;
  return v0;
}

System calls

System call is always mysterious, but decompiler helps you with its name and arguments.

 mr        r3, r26 # set
                bl .sigfillset
                li        r0, 0xAE
                li        r3, 2
                mr        r4, r26
                mr        r5, r29
                li        r6, 8
                sc
                mfcr      r0
                lwz       r5, (off_F9A704C - dword_F9A7130)(r30) # sub_F9920A4 # start_routine
                mr        r4, r31 # attr
                mr        r6, r28 # arg
                addi      r3, r1, 0x180+var_54 # newthread
                bl .pthread_create
                li        r0, 0xAE
                mr        r26, r3
                mr        r4, r29
                li        r3, 2
                li        r5, 0
                li        r6, 8
                sc
                mfcr      r0
                mr        r3, r31 # attr
                bl .pthread_attr_destroy

...
sigset_t v36; // [sp+8h] [-178h]@47 F992C04 TYPED
sigset_t v37; // [sp+88h] [-F8h]@47 F992BEC TYPED
pthread_attr_t v38; // [sp+108h] [-78h]@47 F992BC4 TYPED
__int16 v39; // [sp+12Ch] [-54h]@47 F992C1C
...

_sigfillset(&v37);
v29 = linux_syscall(__NR_rt_sigprocmask, 2, &v37, &v36);
v30 = _pthread_create((pthread_t *)&v39, &v38, (void *(*)(void *))0x93C10018, v11);
v31 = linux_syscall(__NR_rt_sigprocmask, 2, &v36, 0);
_pthread_attr_destroy(&v38);

Compiler helpers

Compiler sometime uses helpers and decompiler knows the meaning of the many helpers and uses it to simplify code.

.globl lldiv # weak
lldiv:

.set back_chain, -0x30
.set var_18, -0x18
.set var_14, -0x14
.set var_10, -0x10
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r28, 0x30+var_10(r1)
                mr        r28, r5
                stw       r29, 0x30+var_C(r1)
                mr        r29, r6
                stw       r31, 0x30+var_4(r1)
                mr        r5, r7
                mr        r31, r3
                mr        r6, r8
                mr        r3, r28
                mr        r4, r29
                stw       r0, 0x30+sender_lr(r1)
                stw       r26, 0x30+var_18(r1)
                mr        r26, r7
                stw       r27, 0x30+var_14(r1)
                mr        r27, r8
                stw       r30, 0x30+var_8(r1)
                bl        __divdi3
                stw       r3, 0(r31)
                mr        r5, r26
                stw       r4, 4(r31)
                mr        r6, r27
                mr        r3, r28
                mr        r4, r29
                bl        __moddi3
                lwz       r0, 0x30+sender_lr(r1)
                stw       r3, 8(r31)
                mr        r3, r31
                stw       r4, 0xC(r31)
                mtlr      r0
                lwz       r26, 0x30+var_18(r1)
                lwz       r27, 0x30+var_14(r1)
                lwz       r28, 0x30+var_10(r1)
                lwz       r29, 0x30+var_C(r1)
                lwz       r30, 0x30+var_8(r1)
                lwz       r31, 0x30+var_4(r1)
                addi      r1, r1, 0x30
                blr
# End of     function lldiv

__int64 *__fastcall lldiv(__int64 *result, int a2, __int64 a3, __int64 a4)
{
    *result = a3 / a4;
    result[1] = a3 % a4;
    return result;
}

Floating point arithmetic

The PowerPC processor contains a number of complex floating point instructions which perform several operations at once. It is not easy to recover an expression from the assembler code but not for the decompiler.

                .globl _x2y2m1f
_x2y2m1f:
                lis	 r9, unk_20@ha
                lfs	 f0, unk_20@l(r9)
                fsub	 f12, f1, f0
                fadd	 f0, f1, f0
                fmul	 f0, f12, f0
                fmadd	 f1, f1, f2, f0
                blr
# End of	function _x2y2m1f

double __fastcall x2y2m1f(double a1, double a2)
{
    return a1 * ((a1 - 1.0) * (a1 + 1.0)) + a2;
}

Magic multiplication/division operations

Compilers can decompose a multiplication/division instruction into a sequence of cheaper instructions (additions, shifts, etc). This example demonstrates how the decompiler recognizes them and coagulates back to the original operation.

# __int64 __fastcall int_u_mod_10()
                .globl int_u_mod_10
int_u_mod_10:

.set back_chain, -0x20
.set var_C, -0xC
.set var_8, -8
.set var_4, -4
.set sender_lr, 4

                stwu      r1, back_chain(r1)
                mflr      r0
                stw       r0, 0x20+sender_lr(r1)
                stw       r29, 0x20+var_C(r1)
                stw       r30, 0x20+var_8(r1)
                stw       r31, 0x20+var_4(r1)
                mr        r31, r1
                bl        u
                mr        r10, r3
                lis       r9, -0x3334
                ori       r9, r9, 0xCCCD # 0xCCCCCCCD
                mulhwu    r9, r10, r9
                srwi      r9, r9, 3
                mulli     r9, r9, 0xA
                subf      r9, r9, r10
                mr        r30, r9
                li        r29, 0
                mr        r9, r29
                mr        r10, r30
                mr        r3, r9
                mr        r4, r10
                addi      r11, r31, 0x20
                lwz       r0, 4(r11)
                mtlr      r0
                lwz       r29, -0xC(r11)
                lwz       r30, -8(r11)
                lwz       r31, -4(r11)
                mr        r1, r11
                blr
# End of      function int_u_mod_10

__int64 __fastcall int_u_mod_10()
{
    return u() % 0xAu;
}

VLE code

This example demonstrates that the decompiler can handle VLE code without problems.

sub_498E:
                se_mr     r6, r3
                se_mr     r7, r4
                se_add    r7, r6
                se_subi   r7, 1
                se_li     r5, 0
                se_b      loc_49A2
# ---------------------------------------------------------------------------
loc_499A:
                se_lbz    r4, 0(r6)
                se_add    r5, r4
                se_extzh  r5
                se_addi   r6, 1
loc_49A2:
                se_cmpl   r6, r7
                se_ble    loc_499A
                se_mr     r7, r5
                se_mr     r3, r7
                se_blr
# End of function sub_498E

int __fastcall sub_498E(unsigned __int8 *a1, int a2)
{
  unsigned __int8 *v2; // r6@1 498F TYPED
  int v3; // r5@1 4997

  v2 = a1;
  v3 = 0;
  while ( v2 a1[a2 - 1] )
    v3 = (unsigned __int16)(v3 + *v2++);
  return v3;
}

Interactive decompiler

The pseudocode is not something static because the decompiler is interactive the same way as IDA. You can change variable types and names, change function prototypes, add comments and more. The example above presents the result after these modifications.

Surely the result is not ideal, and there is a lot of room for improvement, but we hope that you got the idea.

And you can compare the result with the original: http://lxr.free-electrons.com/source/fs/fat/namei_msdos.c#L224

# int __fastcall msdos_add_entry(struct inode *_dir, const unsigned __int8 *name, int is_dir, int is_hid,
int cluster, struct timespec *_ts, struct fat_slot_info *_sinfo)
msdos_add_entry:

.set back_chain, -0x50
.set de, -0x48
.set date, -0x28
.set time, -0x26
.set var_14, -0x14
.set sender_lr, 4

                mflr      r0
                stw       r0, sender_lr(r1)
                bl        _mcount
                stwu      r1, back_chain(r1)
                mflr      r0
                stmw      r27, 0x50+var_14(r1)
                stw       r0, 0x50+sender_lr(r1)
                subfic    r5, r5, 0
                mr.       r30, r6
                lwz       r0, 0(r4)
                subfe     r10, r10, r10
                mr        r31, r3
                lwz       r11, 4(r4)
                lwz       r3, 0x1C(r3)
                clrrwi    r10, r10, 4
                mr        r29, r7
                lhz       r5, 8(r4)
                addi      r10, r10, 0x20
                mr        r28, r8
                lbz       r6, 0xA(r4)
                mr        r27, r9
                lwz       r3, 0x2B8(r3)
                stw       r0, 0x50+de(r1)
                stw       r11, 0x50+de.name+4(r1)
                sth       r5, 0x50+de.name+8(r1)
                stb       r6, 0x50+de.name+0xA(r1)
                stb       r10, 0x50+de.attr(r1)
                beq       loc_728
                ori       r10, r10, 2
                li        r9, 0
                li        r7, 0
                addi      r6, r1, 0x50+date
                stb       r10, 0x50+de.attr(r1)
                addi      r5, r1, 0x50+time
                mr        r4, r8
                stb       r9, 0x50+de.lcase(r1)
                bl        fat_time_unix2fat
                lhz       r9, 0x50+time(r1)
                li        r10, 0
                sth       r10, 0x50+de.adate(r1)
                sth       r9, 0x50+de.time(r1)
                lhz       r9, 0x50+date(r1)
                sth       r10, 0x50+de.cdate(r1)
                sth       r10, 0x50+de.ctime(r1)
                stb       r10, 0x50+de.ctime_cs(r1)
                sth       r9, 0x50+de.date(r1)
loc_698:
                addi      r10, r1, 0x50+de.start
                srawi     r9, r29, 0x10
                sthbrx    r29, r0, r10
                addi      r10, r1, 0x50+de.starthi
                mr        r6, r27
                sthbrx    r9, r0, r10
                li        r5, 1
                li        r9, 0
                addi      r4, r1, 0x50+de
                mr        r3, r31
                stw       r9, 0x50+de.size(r1)
                bl        fat_add_entries
                mr.       r30, r3
                bne       loc_710
                lwz       r10, 0(r28)
                lwz       r11, 4(r28)
                stw       r10, 0x48(r31)
                stw       r10, 0x50(r31)
                stw       r11, 0x4C(r31)
                stw       r11, 0x54(r31)
                lwz       r9, 0x1C(r31)
                lwz       r9, 0x34(r9)
                andi.     r10, r9, 0x90
                bne       loc_704
                lwz       r9, 0xC(r31)
                andi.     r10, r9, 0x41
                beq       loc_768
loc_704:
                mr        r3, r31       # struct inode *
                li        r30, 0
                bl        fat_sync_inode
loc_710:
                mr        r3, r30
                lwz       r0, 0x50+sender_lr(r1)
                lmw       r27, 0x50+var_14(r1)
                addi      r1, r1, 0x50
                mtlr      r0
                blr
# ---------------------------------------------------------------------------
loc_728:
                li        r7, 0
                addi      r6, r1, 0x50+date
                stb       r30, 0x50+de.lcase(r1)
                addi      r5, r1, 0x50+time
                mr        r4, r8
                bl        fat_time_unix2fat
                li        r9, 0
                sth       r30, 0x50+de.adate(r1)
                stb       r9, 0x50+de.ctime_cs(r1)
                lhz       r9, 0x50+time(r1)
                sth       r30, 0x50+de.cdate(r1)
                sth       r9, 0x50+de.time(r1)
                lhz       r9, 0x50+date(r1)
                sth       r30, 0x50+de.ctime(r1)
                sth       r9, 0x50+de.date(r1)
                b         loc_698
# ---------------------------------------------------------------------------
loc_768:
                mr        r3, r31       # struct inode *
                li        r4, 7
                bl        __mark_inode_dirty
                mr        r3, r3
                lwz       r0, 0x50+sender_lr(r1)
                lmw       r27, 0x50+var_14(r1)
                addi      r1, r1, 0x50
                mtlr      r0
                blr
# End of       function msdos_add_entry

int __fastcall msdos_add_entry(struct inode *dir, const unsigned __int8 *name, int is_dir, int is_hid,
                               int cluster, struct timespec *ts, struct fat_slot_info *sinfo)
{
  __int16 zero; // r30@1 601
  bool not_hidden; // cr34@1 601 TYPED
  int v10; // r11@1 611
  signed int v11; // r10@1 619 TYPED
  __int16 v13; // r5@1 621
  __u8 node_attrs; // r10@1 625 TYPED
  __u8 v16; // r6@1 62D TYPED
  struct msdos_sb_info *sbi; // r3@1 635 TYPED
  int err; // r30@3 6C9 TYPED
  __time_t sec; // r10@4 6D1 TYPED
  __syscall_slong_t nsec; // r11@4 6D5 TYPED
  struct msdos_dir_entry de; // [sp+8h] [-48h]@1 639 TYPED
  __le16 date; // [sp+28h] [-28h]@2 670 TYPED
  __le16 time; // [sp+2Ah] [-26h]@2 670 TYPED

  zero = is_hid;
  not_hidden = is_hid == 0;
  v10 = *((_DWORD *)name + 1);
  v11 = (unsigned int)is_dir <= 0 ? 0 : -16;
  v13 = *((_WORD *)name + 4);
  node_attrs = v11 + ATTR_ARCH;                 // ATTR_ARCH or ATTR_DIR
  v16 = name[10];
  sbi = (struct msdos_sb_info *)dir->i_sb->s_fs_info;
  *(_DWORD *)&de.name[0] = *(_DWORD *)name;     // memcpy(&de.name[0], name, 12);
  *(_DWORD *)&de.name[4] = v10;                 // ...
  *(_WORD *)&de.name[8] = v13;                  // ...
  de.name[10] = v16;
  de.attr = node_attrs;
  if ( not_hidden )
  {
    de.lcase = zero;                            // = 0
    fat_time_unix2fat(sbi, ts, &time, &date, 0);
    de.adate = zero;
    de.ctime_cs = 0;
    de.cdate = zero;
    de.time = time;
    de.ctime = zero;
    de.date = date;
  }
  else
  {
    de.attr = node_attrs | ATTR_HIDDEN;
    de.lcase = 0;
    fat_time_unix2fat(sbi, ts, &time, &date, 0);
    de.adate = 0;
    de.time = time;
    de.cdate = 0;
    de.ctime = 0;
    de.ctime_cs = 0;
    de.date = date;
  }
  de.start = _byteswap_ushort(cluster);
  de.starthi = _byteswap_ushort(HIWORD(cluster));
  de.size = 0;
  err = fat_add_entries(dir, &de, 1, sinfo);
  if ( err )
    return err;
  sec = ts->tv_sec;
  nsec = ts->tv_nsec;
  dir->i_mtime.tv_sec = ts->tv_sec;
  dir->i_ctime.tv_sec = sec;
  dir->i_mtime.tv_nsec = nsec;
  dir->i_ctime.tv_nsec = nsec;
  if ( dir->i_sb->s_flags & (MS_DIRSYNC|MS_SYNCHRONOUS) || dir->i_flags & (S_DIRSYNC|S_SYNC) )
  {
    err = 0;
    fat_sync_inode(dir);
    return err;
  }
  _mark_inode_dirty(dir);
  return 0;
}

Comparisons of MIPS disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for MIPS. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Simple code
64-bit comparison
Magic divisions
Hard cases with delay slots
Little-endian MIPS
MicroMIPS
Floating-point operations
Compiler helpers

Simple code

This is a very simple code to decompile and the output is perfect. The only minor obstacle are references through the global offset table but both IDA and the Decompiler handle them well. Please note the difference in the number of lines to read on the left and on the right.

# void __fastcall free_argv(int argc, char **argv)
                .globl _Z9free_argviPPc  # weak
_Z9free_argviPPc:                        # CODE XREF: test_expand_argv(void)+264↑p
                                         # test_expand_argv(void)+51C↑p ...

var_10          = -0x10
var_4           = -4
var_s0          =  0
var_s4          =  4
arg_0           =  8
arg_4           =  0xC

 # __unwind {
                addiu   $sp, -0x28
                sw      $ra, 0x20+var_s4($sp)
                sw      $fp, 0x20+var_s0($sp)
                move    $fp, $sp
                la      $gp, _GLOBAL_OFFSET_TABLE_+0x7FF0
                sw      $gp, 0x20+var_10($sp)
                sw      $a0, 0x20+arg_0($fp)
                sw      $a1, 0x20+arg_4($fp)
                lw      $v0, 0x20+arg_4($fp)
                beqz    $v0, loc_17778
                nop
                sw      $zero, 0x20+var_4($fp)

loc_1770C:                               # CODE XREF: free_argv(int,char **)+80↓j
                lw      $v1, 0x20+var_4($fp)
                lw      $v0, 0x20+arg_0($fp)
                slt     $v0, $v1, $v0
                beqz    $v0, loc_17760
                nop
                lw      $v0, 0x20+var_4($fp)
                sll     $v0, 2
                lw      $v1, 0x20+arg_4($fp)
                addu    $v0, $v1, $v0
                lw      $v0, 0($v0)
                move    $a0, $v0
                lw      $v0, (qfree_ptr-0x7FF0 - _GLOBAL_OFFSET_TABLE_)($gp)
                move    $t9, $v0
                jalr    $t9 ; qfree
                nop
                lw      $gp, 0x20+var_10($fp)
                lw      $v0, 0x20+var_4($fp)
                addiu   $v0, 1
                sw      $v0, 0x20+var_4($fp)
                b       loc_1770C
                nop
 # ---------------------------------------------------------------------------

loc_17760:                               # CODE XREF: free_argv(int,char **)+40↑j
                lw      $a0, 0x20+arg_4($fp)
                lw      $v0, (qfree_ptr-0x7FF0 - _GLOBAL_OFFSET_TABLE_)($gp)
                move    $t9, $v0
                jalr    $t9 ; qfree
                nop
                lw      $gp, 0x20+var_10($fp)

loc_17778:                               # CODE XREF: free_argv(int,char **)+28↑j
                nop
                move    $sp, $fp
                lw      $ra, 0x20+var_s4($sp)
                lw      $fp, 0x20+var_s0($sp)
                addiu   $sp, 0x28
                jr      $ra
                nop
 # } // starts at 176D8

void __fastcall free_argv(int argc, char **argv)
{
  int i; // [sp+1Ch] [+1Ch]

  if ( argv )
  {
    for ( i = 0; i < argc; ++i )
      qfree(argv[i]);
    qfree(argv);
  }
}

64-bit comparison

Sorry for another long assembler listing. It shows that for MIPS, as for other platforms, the decompiler can recognize 64-bit operations and collapse them into very readable constructs.

# =============== S U B R O U T I N E =======================================

# Attributes: bp-based frame fpd=0x18

# _DWORD uh_eq_s(void)
                .globl _Z7uh_eq_sv
_Z7uh_eq_sv:                             # DATA XREF: .eh_frame:000478E4↓o

var_s0          =  0
var_s4          =  4
var_s8          =  8
var_sC          =  0xC
var_s10         =  0x10
var_s14         =  0x14
var_s18         =  0x18
var_s1C         =  0x1C

 # __unwind {
                addiu   $sp, -0x38
                 sw      $ra, 0x18+var_s1C($sp)
                 sw      $fp, 0x18+var_s18($sp)
                 sw      $s5, 0x18+var_s14($sp)
                 sw      $s4, 0x18+var_s10($sp)
                 sw      $s3, 0x18+var_sC($sp)
                 sw      $s2, 0x18+var_s8($sp)
                 sw      $s1, 0x18+var_s4($sp)
                 sw      $s0, 0x18+var_s0($sp)
                 move    $fp, $sp
                 jal     uh
                 nop
                 move    $s5, $v1
                 move    $s4, $v0
                 jal     s
                 nop
                 move    $s3, $v0
                 sra     $v0, 31
                 move    $s2, $v0
                 xor     $s0, $s4, $s2
                 xor     $s1, $s5, $s3
                 or      $v0, $s0, $s1
                 sltiu   $v0, 1
                 andi    $v0, 0xFF
                 move    $sp, $fp
                 lw      $ra, 0x18+var_s1C($sp)
                 lw      $fp, 0x18+var_s18($sp)
                 lw      $s5, 0x18+var_s14($sp)
                 lw      $s4, 0x18+var_s10($sp)
                 lw      $s3, 0x18+var_sC($sp)
                 lw      $s2, 0x18+var_s8($sp)
                 lw      $s1, 0x18+var_s4($sp)
                 lw      $s0, 0x18+var_s0($sp)
                 addiu   $sp, 0x38
                 jr      $ra
                 nop
  # } // starts at 25C

bool uh_eq_s(void)
{
  unsigned __int64 v0; // $v1

  v0 = uh();
  return v0 == s();
}

Magic divisions

We recognize magic divisions for MIPS the same way as for other processors. Note that this listing has a non-trivial delay slot.

.globl smod199
 smod199:                                 # DATA XREF: .eh_frame:0000875C↓o
 # __unwind {
                 lui     $v1, 0x5254
                 sra     $v0, $a0, 31
                 li      $v1, 0x5254E78F
                 mult    $a0, $v1
                 mfhi    $v1
                 sra     $v1, 6
                 subu    $v1, $v0
                 li      $v0, 0xC7
                 mul     $a1, $v1, $v0
                 jr      $ra
                 subu    $v0, $a0, $a1
  # } // starts at 4F2C

int __fastcall smod199(int a1)
{
  return a1 % 199;
}

Hard cases with delay slots

The previous example was a piece of cake. This one shows a tougher nut to crack: there is a jump to a delay slot. A decent decompiler must handle these cases too and produce a correct output without misleading the user. This is what we do. (We spent quite long time inventing and testing various scenarios with delay slots).

branch_to_b_dslot:                       # CODE XREF: branch_to_bal_dslot+14↓p
                                         # DATA XREF: branch_likely_cond_move+10↓o
                move    $t2, $a0
                addiu   $t3, $t2, -0x18
                bltz    $t3, l1
                li      $a0, 1
                sllv    $a0, $t3
                b       l2

l1:                                      # CODE XREF: branch_to_b_dslot+8↑j
                li      $t4, 0xFFFFFFC0
                li      $t3, 0x18
                subu    $t3, $t2
                srav    $a0, $t3

l2:                                      # CODE XREF: branch_to_b_dslot+14↑j
                jr      $ra
                addu    $v0, $a0, $t4
 # End of function branch_to_b_dslot

int __fastcall branch_to_b_dslot(int a1)
{
  int v1; // $a0

  if ( a1 - 24 < 0 )
    v1 = 1 >> (24 - a1);
  else
    v1 = 1 << (a1 - 24);
  return v1 - 64;
}

Little-endian MIPS

We support both big-endian and little-endian code. Usually they look the same but there may be subtle differences in the assembler. The decompiler keeps track of the bits involved and produces human-readable code.

.globl upd_d2
upd_d2:
                lwl     $v0, 5($a0)
                lwr     $v0, 2($a0)
                addiu   $v0, $v0, 1
                swl     $v0, 5($a0)
                swr     $v0, 2($a0)
                jr      $ra
                lb      $v0, 0($a0)
 # End of function upd_d2

 .globl upd_d2
upd_d2:
                lwl     $v0, 2($a0)
                lwr     $v0, 5($a0)
                addiu   $v0, $v0, 1
                swl     $v0, 2($a0)
                swr     $v0, 5($a0)
                jr      $ra
                lb      $v0, 0($a0)
 # End of function upd_d2

int __fastcall upd_d2(char *a1)
{
  ++*(_DWORD *)(a1 + 2);
  return *a1;
}

MicroMIPS

MicroMIPS, as you have probably guessed, is supported too, with its special instructions and quirks.

 lwm16_sp:

 var_10          = -0x10

                 addiu   $sp, -0x10
                 swm     $ra,$s0-$s2, 0x10+var_10($sp)
                 move    $s0, $a0
                 move    $s1, $a1
                 move    $s2, $a2
                 addu    $s0, $s1
                 addu    $v0, $s0, $s2
                 lwm     $ra,$s0-$s2, 0x10+var_10($sp)
                 jraddiusp 0x10

__int64 __fastcall lwm16_sp(int a1, int a2, int a3)
{
  return a1 + a2 + a3;
}

Floating-point operations

The MIPS processor contains a number of complex floating point instructions, which perform several operations at once. It is not easy to decipher the meaning of the assembler code but the pseudocode is the simplest possible.

x2y2m1f:
                lui     $v0, %hi(dbl_50)
                ldc1    $f1, dbl_50
                sub.d   $f0, $f12, $f1
                add.d   $f1, $f12, $f1
                mul.d   $f0, $f1
                jr      $ra
                madd.d  $f0, $f13, $f0, $f12

double __fastcall x2y2m1f(double a1, double a2)
{
  return a2 * ((a1 - 1.0) * (a1 + 1.0)) + a1;
}

Compiler helpers

A compiler sometime uses helpers; our decompiler knows the meaning of the many helpers and uses it to simplify code.

mod4:

var_C           = -0xC
var_s0          =  0

                lui     $gp, %hi(_GLOBAL_OFFSET_TABLE_+0x7FF0)
                addiu   $sp, -0x20
                la      $gp, _GLOBAL_OFFSET_TABLE_+0x7FF0
                li      $a3, 5
                sw      $ra, 0x1C+var_s0($sp)
                sw      $gp, 0x1C+var_C($sp)
                lw      $t9, (__moddi3_ptr-0x7FF0 - _GLOBAL_OFFSET_TABLE_)($gp)
                jalr    $t9 ; __moddi3
                move    $a2, $zero
                lw      $ra, 0x1C+var_s0($sp)
                jr      $ra
                addiu   $sp, 0x20

__int64 __fastcall mod4(__int64 a1)
{
  return a1 % 5;
}

Hex-Rays v7.4 vs. v7.3 Decompiler Comparison Page

Here are some side-by-side comparisons of decompilations for v7.3 and v7.4. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Better array detection
Support for more floating-point helpers
Automatic variable mapping
Automatic symbolic names
Simplified C++ names
Improved handling of 64-bit arithmetics
Better detection of 64-bit decrements
More meaningful variable names

Better array detection

The text produced by v7.3 is not quite correct because the array at [ebp-128] was not recognized. Overall determining the array is a tough task but we can handle simple cases automatically now.

_BYTE v7[256]; // [sp+0h] [bp-128h]
  __int64 v8; // [sp+120h] [bp-8h]

  v8 = a2;
  v4 = a2;
  memcpy(v7, &v8, sizeof(v7));
  memcpy(a1, v7, 0x100u);

_QWORD *v5; // r4
int v7; // [sp+0h] [bp-128h]
__int64 v8; // [sp+120h] [bp-8h]

  v8 = a2;
  v4 = a2;
  v5 = a1;
  memcpy(&v7, &v8, 0x100u);
  memcpy(v5, &v7, 0x100u);

Support for more floating-point helpers

On the left there is a mysterious call to _extendsfdf2. In fact this is a compiler helper function that just converts a single precision floating point value into a double precision value. However, we do not want to see this call as is. It is much better to translate it into the code that looks more like C. Besides, there is a special treatment for printf-like functions.

void __cdecl printf_float(float a)
{
  printf("%f\n", a);
}

void __cdecl printf_float(float a)
{
  double v1; // r0

  v1 = COERCE_DOUBLE(_extendsfdf2(LODWORD(a)));
  printf("%f\n", v1);
}

Automatic variable mapping

In some cases we can easily prove that one variable can be mapped into another. The new version automatically creates a variable mapping in such cases. This makes the output shorter and easier to read. Needless to say that the user can revert the mapping if necessary.

__int64 sprintf_s(
        char *__ptr64 const _Buffer,
        const unsigned __int64 _BufferCount,
        const char *__ptr64 const _Format,
        ...)
{
  unsigned __int64 *v6; // x0
  __int64 result; // x0
  va_list va; // [xsp+38h] [xbp+38h]

  va_start(va, _Format);
  v6 = _local_stdio_printf_options();
  return _stdio_common_vsprintf_s(*v6, _Buffer, _BufferCount, _Format, 0i64,
                                  (char *__ptr64)va);
}

__int64 sprintf_s(
        char *__ptr64 const _Buffer,
        const unsigned __int64 _BufferCount,
        const char *__ptr64 const _Format,
        ...)
{
  char *v3; // x21
  unsigned __int64 v4; // x20
  const char *v5; // x19
  unsigned __int64 *v6; // x0
  __int64 result; // x0
  va_list va; // [xsp+38h] [xbp+38h]

  va_start(va, _Format);
  v3 = _Buffer;
  v4 = _BufferCount;
  v5 = _Format;
  v6 = _local_stdio_printf_options();
  return _stdio_common_vsprintf_s(*v6, v3, v4, v5, 0i64, (char *__ptr64)va);
}

Automatic symbolic names

The new version automatically applies symbolic constants when necessary. Less manual work.

  if ( operation == ReadKeyNames )
    return BaseDllReadVariableNames(v1, v2);
  if ( operation != ReadSection )
  {
    if ( operation == WriteKeyValue || operation == DeleteKey )
      return BaseDllWriteVariableValue(v1, v2, 0, 0);
    if ( operation == WriteSection || operation == DeleteSection )
      return BaseDllWriteApplicationVariables(v1, v2);

if ( operation == 4 )
    return BaseDllReadVariableNames(v1, v2);
  if ( operation != 6 )
  {
    if ( operation == 2 || operation == 3 )
      return BaseDllWriteVariableValue(v1, v2, 0, 0);
    if ( operation == 7 || operation == 8 )
      return BaseDllWriteApplicationVariables(v1, v2);

Simplified C++ names

This is not the longest C++ function name one may encounter but just compare the left and right sides. In fact the right side could even fit into one line easily, we just kept it multiline to be consistent. By the way, all names in IDA benefit from this simplification, not only the ones displayed by the decompiler. And it is configurable!

std::string *
__fastcall
std::_System_error::_Makestr(
  std::string *result,
  std::error_code _Errcode,
  std::string _Message)

std::basic_string<char,std::char_traits<char>,std::allocator<char> > *
__fastcall
std::_System_error::_Makestr(
  std::basic_string<char,std::char_traits<char>,std::allocator<char> > *result,
  std::error_code _Errcode,
  std::basic_string<char,std::char_traits<char>,std::allocator<char> > _Message)

Improved handling of 64-bit arithmetics

The battle is long but we do not give up. More 64-bit patterns are recognized now.

return h() % 1024;

v0 = h();
return (__int16)((((v0 ^ (SHIDWORD(v0) >> 31)) - (SHIDWORD(v0) >> 31)) & 0x3FF ^ (SHIDWORD(v0) >> 31))
                 - (SHIDWORD(v0) >> 31));

Better detection of 64-bit decrements

Yet another example of 64-bit arithmetics. The code on the left is correct but not useful at all. It can and should be converted into the simple equivalent text on the right.

return a1 - 1;

v1 = a1 + 0xFFFFFFFFLL;
  HIDWORD(v1) = ((unsigned __int64)(a1 + 0xFFFFFFFFLL) >> 32) - 1;

More meaningful variable names

Currently we support only GetProcAddress but we are sure that we will expand this feature in the future.\

MessageBoxA_0 = (int (__stdcall *)(HWND, LPCSTR, LPCSTR, UINT))
                    GetProcAddress(v4, "MessageBoxA");
    if ( !MessageBoxA_0 )
      return 0;
    GetActiveWindow = (HWND (__stdcall *)())GetProcAddress(v5, "GetActiveWindow");
    GetLastActivePopup = (HWND (__stdcall *)(HWND))GetProcAddress(v5, "GetLastActivePopup");
  }
  if ( GetActiveWindow )
  {
    v3 = GetActiveWindow();
    if ( v3 )
    {
      if ( GetLastActivePopup )
        v3 = GetLastActivePopup(v3);
    }
  }
  return MessageBoxA_0(v3, a1, a2, a3);

    dword_12313BA8 = (int (__stdcall *)(_DWORD, _DWORD, _DWORD, _DWORD))
                     GetProcAddress(v4, "MessageBoxA");
    if ( !dword_12313BA8 )
      return 0;
    dword_12313BAC = GetProcAddress(v5, "GetActiveWindow");
    dword_12313BB0 = (int (__stdcall *)(_DWORD))GetProcAddress(v5, "GetLastActivePopup");
  }
  if ( dword_12313BAC )
  {
    v3 = dword_12313BAC();
    if ( v3 )
    {
      if ( dword_12313BB0 )
        v3 = dword_12313BB0(v3);
    }
  }
  return dword_12313BA8(v3, a1, a2, a3);

Hex-Rays v7.3 vs. v7.2 Decompiler Comparison Page

Below you will find side-by-side comparisons of v7.2 and v7.3 decompilations. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

NOTE: these are just some selected examples that can be illustrated as side-by-side differences. There are many other improvements and new features that are not mentioned on this page. We just got tired selecting them. Some of the improvements that did not do to this page:

objc-related improvements
value range analysis can eliminate more useless code
better resolving of got-relative memory references
too big shift amounts are converted to lower values (e.g. 33->1)
more for-loops
better handling of fragemented variables
many other things...

More hexadecimal numbers in the output

When a constant looks nicer as a hexadecimal number, we print it as a hexadecimal number by default. Naturally, beauty is in the eye of the beholder, but the new beahavior will produce more readable code, and less frequently you will fell compelled to change the number representation. By the way, this tiny change is just one of numerious improvements that we keep adding in each release. Most of them go literally unnoticed. It is just this time we decided to talk about them

bool __fastcall ge_100000001(__int64 a1)
{
  return a1 >= 0x100000001LL;
}

bool __fastcall ge_100000001(__int64 a1)
{
  return a1 >= 4294967297LL;
}

Support for variable size structures

EfiBootRecord points to a structure that has RecordExtents[0] as the last member. Such structures are considered as variable size structures in C/C++. Now we handle them nicely.

BlockNumber = EfiBootRecord->RecordExtents[ExtentIndex64].BlockNumber;
BlockCount = EfiBootRecord->RecordExtents[ExtentIndex64].BlockCount;

BlockNumber = *(UINT64 *)((char *)&EfiBootRecord[1].BlockHeader.Checksum + ExtentIndex64);
BlockCount = *(UINT64 *)((char *)&EfiBootRecord[1].BlockHeader.ObjectOid + ExtentIndex64);

UTF-32 strings are printed inline

We were printing UTF-8 and other string types, UTF-32 was not supported yet. Now we print it with the 'U' prefix.

v3 = std::operator<<<std::char_traits<char>>(&std::cout, U"This is U\"Hello\"

    .rodata:0000000000000120  text "UTF-32LE", 'This is U"Hello"',0
    ...
    v10 = std::ostream::operator<<(v9, aThisIsUHello_0);

Better argument detection for printf

The difference between these outputs is subtle but pleasant. The new version managed to determine the variable types based on the printf format string. While the old version ended up with int a2, int a3, the new version correctly determined them as one __int64 a2.

int __fastcall ididi(int a1, __int64 a2, int a3, __int64 a4, int a5)
{
  int varg_r0; // [sp+28h] [bp-10h]
  __int64 varg_r2; // [sp+30h] [bp-8h]

  varg_r0 = a1;
  varg_r2 = a2;
  my_print("d=%I64d\n", a2);
  my_print("d1=%I64d\n", a4);
  my_print("%d-%I64d-%d-%I64d-%d\n", varg_r0, varg_r2, a3, a4, a5);
  return 0;
}

int __fastcall ididi(int a1, int a2, __int64 a3, int a4, __int64 a5, int a6)
{
  int v6; // r1
  char v8; // [sp+4h] [bp-34h]
  int varg_r0; // [sp+28h] [bp-10h]
  __int64 varg_r2; // [sp+30h] [bp-8h]

  varg_r0 = a1;
  varg_r2 = a3;
  my_print("d=%I64d\n", a2, a3);
  my_print("d1=%I64d\n", v6, a5);
  my_print("%d-%I64d-%d-%I64d-%d\n", varg_r0, varg_r2, a4, v8, a5, a6);
  return 0;
}

Better argument detection for scanf

A similar logic works for scanf-like functions. Please note that the old version was misdetecting the number of arguments. It was possible to correct the misdetected arguments using the Numpad-Minus hotkey but it is always better when there is less routine work on your shoulders, right?

scanf("8: %d%i %x%o %s%s %C%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13);
scanf("8:   %[ a-z]%c %2c%c %2c%2c %[ a-z]%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13);

  scanf("8: %d%i %x%o %s%s %C%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13, &v10, &v0, &v6, &v5, &v1, &v11);
  scanf(
    "8:   %[ a-z]%c %2c%c %2c%2c %[ a-z]%c",
    &v12,
    &v7,
    &v3,
    &v4,
    &v2,
    &v9,
    &v8,
    &v13,
    &v10,
    &v0,
    &v6,
    &v5,
    &v1,
    &v11);

Resolved TEB references

While seasoned reversers know what is located at fs:0, it is still better to have it spelled out. Besides, the type of v15 is automatically detected as struct _EXCEPTION_REGISTRATION_RECORD *.

v15 = NtCurrentTeb()->NtTib.ExceptionList;

v15 = __readfsdword(0);

Better automatic selection of union fields

Again, the user can specify the union field that should be used in the output (the hotkey is Alt-Y) but there are situations when it can be automatically determined based on the access type and size. The above example illustrates this point. JFYI, the type of entry is:

union __XmStringEntryRec
{
  _XmStringEmptyHeader empty;
  _XmStringOptSegHdrRec single;
  _XmStringUnoptSegHdrRec unopt_single;
  _XmStringArraySegHdrRec multiple;
};
struct __XmStringEmptyHeader
{
  unsigned __int32 type : 2;
};
struct __XmStringOptSegHdrRec
{
  unsigned __int32 type : 2;
  unsigned __int32 text_type : 2;
  unsigned __int32 tag_index : 3;
  unsigned __int32 rend_begin : 1;
  unsigned __int8 byte_count;
  unsigned __int32 rend_end : 1;
  unsigned __int32 rend_index : 4;
  unsigned __int32 str_dir : 2;
  unsigned __int32 flipped : 1;
  unsigned __int32 tabs_before : 3;
  unsigned __int32 permanent : 1;
  unsigned __int32 soft_line_break : 1;
  unsigned __int32 immediate : 1;
  unsigned __int32 pad : 2;
};

While we can not handle bitfields yet, their presence does not prevent using other, regular fields, of the structure.

if ( entry->single.byte_count )

if ( *((_BYTE *)&entry->empty + 1) )

Yet one more example of union fields

I could not resist the temptation to include one more example of automatic union selection. How beautiful the code on the right is!

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  res->w32[0] = argL->w32[argR->w32[0] & 7];
  res->w32[1] = argL->w32[argR->w32[1] & 7];
  res->w32[2] = argL->w32[argR->w32[2] & 7];
  res->w32[3] = argL->w32[argR->w32[3] & 7];
  res->w32[4] = argL->w32[argR->w32[4] & 7];
  res->w32[5] = argL->w32[argR->w32[5] & 7];
  res->w32[6] = argL->w32[argR->w32[6] & 7];
  res->w32[7] = argL->w32[argR->w32[7] & 7];
}

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  LODWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (argR->w64[0] & 7));
  HIDWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[0]) & 7));
  LODWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (argR->w64[1] & 7));
  HIDWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[1]) & 7));
  LODWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (argR->w64[2] & 7));
  HIDWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[2]) & 7));
  LODWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (argR->w64[3] & 7));
  HIDWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[3]) & 7));
}

Improved support for EABI helpers

No comments needed, we hope. The new decompiler managed to fold constant expressions after replacing EABI helpers with corresponding operators.

int __cdecl main(int argc, const char **argv, const char **envp)
{
  printf("r = %d == 42\n", 42);
  printf("r = %lld == 42\n", 42LL);
  printf("ABORT %d\n", 0x40000001);
  return 0;
}

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  LODWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (argR->w64[0] & 7));
  HIDWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[0]) & 7));
  LODWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (argR->w64[1] & 7));
  HIDWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[1]) & 7));
  LODWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (argR->w64[2] & 7));
  HIDWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[2]) & 7));
  LODWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (argR->w64[3] & 7));
  HIDWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[3]) & 7));
}

Improved local variable allocation

Now it works better especially in complex cases.

tbd

int __cdecl main(int argc, const char **argv, const char **envp)
{
  int v3; // r0
  int v4; // r0
  int v5; // r0
  int v6; // r0
  int v7; // r0
  __int64 v8; // r0
  int v9; // r2
  __int64 v11; // [sp+0h] [bp-14h]
  int v12; // [sp+Ch] [bp-8h]
  int v13; // [sp+Ch] [bp-8h]

  v3 = _mulvsi3(7, 6, envp);
  v4 = _negvsi2(v3);
  v5 = _addvsi3(v4, 101);
  v12 = _subvsi3(v5, 17);
  printf("r = %d == 42\n", v12);
  v11 = _mulvdi3(7, 0, 6, 0);
  v6 = _negvdi2(v12, v12 >> 31);
  v7 = _addvdi3(v6, v6 >> 31, 101, 0);
  v8 = _subvdi3(v7, v7 >> 31, 17, 0);
  printf("r = %lld == 42\n", HIDWORD(v8), v11);
  v13 = _mulvsi3(0x7FFFFFFF, 0x3FFFFFFF, v9);
  printf("ABORT %d\n", v13);
  return 0;
}

Better recognizition of string references

In this case too, the user could set the prototype of sub_1135FC as accepting a char * and this would be enough to reveal string references in the output, but the new decompiler can do it automatically.

  sub_1135FC(-266663568, "This is a long long long string");
  if ( v2 > 0x48u )
  {
    sub_108998("Another str");

  sub_1135FC(-266663568, 89351520);
  if ( v2 > 0x48u )
  {
    sub_108998(89351556);

Better handling of structures returned by value

The code on the left had a very awkward sequence to copy a structure. The code on the right eliminates it as unnecessary and useless.

  _BYTE v1[12]; // rax
  ...
  return *(mystruct *)v1;
}

  _BYTE v1[12]; // ax
  mystruct result; // 0:ax.11

  ...
  *(_QWORD *)result.ca1 = *(_QWORD *)v1;
  result.s1 = *(_WORD *)&v1[8];
  result.c1 = v1[10];
  return result;
}

More while loops

Do you care about this improvement? Probably you do not care because the difference is tiny. However, in additon to be simpler, the code on the right eliminated a temporary variable, v5. A tiny improvement, but an improvement it is.

while ( *++v4 )
    ;

do
    v5 = *++v4;
  while ( v5 );

Shorter code

Another tiny improvement made the output considerably shorter. We like it!

unsigned __int8 *__fastcall otp_memset(unsigned __int8 *pDest, unsigned __int8 val, int size)
{
  unsigned __int8 *i; // r3

  for ( i = pDest; (unsigned int)size-- >= 1; ++i )
    *i = val;
  return pDest;
}

unsigned __int8 *__fastcall otp_memset(unsigned __int8 *pDest, unsigned __int8 val, int size)
{
  unsigned __int8 *i; // r3
  _BOOL1 v4; // cf

  for ( i = pDest; ; ++i )
  {
    v4 = (unsigned int)size-- >= 1;
    if ( !v4 )
      break;
    *i = val;
  }
  return pDest;
}
}

Improved recognition of magic divisions

This is a very special case: a division that uses the rcr instruction. Our microcode does not have the opcode for it but we implemented the logic to handle some special cases, just so you do not waste your time trying to decipher the meaning of convoluted code (yes, rcr means code that is difficult to understand).

unsigned __int64 __fastcall konst_mod251_shr3(unsigned __int64 a1)
{
  return (a1 >> 3) % 0xFB;
}

__int64 __fastcall konst_mod251_shr3(unsigned __int64 a1)
{
  unsigned __int64 v1; // rcx

  v1 = a1 >> 3;
  _RDX = v1 + ((v1 * (unsigned __int128)0x5197F7D73404147ui64) >> 64);
  __asm { rcr     rdx, 1 }
  return v1 - 251 * (_RDX >> 7);
}

Less gotos

Well, we can not say that we produce less gotos in all cases, but there is some improvement for sure. Second, note that the return type got improved too: now it is immediately visible that the function returns a boolean (0/1) value.

_BOOL8 __fastcall sub_0(__int64 a1, int *a2)
{
  int v2; // eax
  int v3; // eax
  int v4; // eax

  v2 = *a2;
  if ( *a2 > 522 )
  {
    v4 = v2 - 4143;
    return !v4 || v4 == 40950;
  }
  if ( v2 != 522 )
  {
    v3 = v2 - 71;
    if ( v3 )
    {
      if ( (unsigned int)(v3 - 205) >= 2 )
        return 0;
    }
  }
  return 1;
}

__int64 __fastcall sub_0(__int64 a1, int *a2)
{
  int v2; // eax
  int v3; // eax
  int v4; // eax

  v2 = *a2;
  if ( *a2 > 522 )
  {
    v4 = v2 - 4143;
    if ( !v4 || v4 == 40950 )
      goto LABEL_8;
LABEL_9:
    return 0;
  }
  if ( v2 != 522 )
  {
    v3 = v2 - 71;
    if ( v3 )
    {
      if ( (unsigned int)(v3 - 205) >= 2 )
        goto LABEL_9;
    }
  }
LABEL_8:
  return 1;
}

Division may generate an exception

What a surprise, the code on the right is longer and more complex! Indeed, it is so, and it is because now the decompiler is more careful with the division instructions. They potentially may generate the zero division exception and completely hiding them from the output may be misleading. If you prefer the old behaviour, turn off the division preserving in the configuration file.

__int64 __fastcall sub_4008C0(int a1)
{
  int v1; // ecx
  int v2; // edx
  int v4; // [rsp+0h] [rbp-4h]

  v1 = 2;
  if ( a1 > 2 )
  {
    do
    {
      nanosleep(&rmtp, &rqtp);
      v2 = a1 % v1++;
      v4 = 1 / v2;
    }
    while ( v1 != a1 );
  }
  return 0LL;
}

__int64 __fastcall sub_4008C0(int a1)
{
  int v1; // ecx

  v1 = 2;
  if ( a1 > 2 )
  {
    do
    {
      nanosleep(&rmtp, &rqtp);
      ++v1;
    }
    while ( v1 != a1 );
  }
  return 0LL;
}

Order of variadic arguments

Do you notice the difference? If not, here is a hint: the order of arguments of sub_88 is different. The code on the right is more correct because the the format specifiers match the variable types. For example, %f matches float a. At the first sight the code on the left looks completely wrong but (surprise!) it works correctly on x64 machines. It is so because floating point and integer arguments are passed at different locations, so the relative order of floating/integer arguments in the call does not matter much. Nevertheless, the code on the right causes less confusion.

int __cdecl func1(const float a, int b, void *c)
{
  return sub_88("%f, %d, %p\n", a, (unsigned int)b, c);
}

int __cdecl func1(const float a, int b, void *c)
{
  return sub_88("%f, %d, %p\n", (unsigned int)b, c, a);
}

Improved division recognition

This is a never ending battle, but we advance!

int int_h_mod_m32ui64(void)
{
  return h() % 32;
}

int int_h_mod_m32ui64(void)
{
  __int64 v0; // r10

  v0 = h();
  return (abs64(v0) & 0x1F ^ (SHIDWORD(v0) >> 31)) - (SHIDWORD(v0) >> 31);
}

Hex-Rays v7.2 vs. v7.1 Decompiler Comparison Page

Below you will find side-by-side comparisons of v7.1 and v7.2 decompilations. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

NOTE: these are just some selected examples that can be illustrated as side-by-side differences. There are many other improvements and new features that are not mentioned on this page.

Magic divisions in 64-bit code

In the past the Decompiler was able to recognize magic divisions in 32-bit code. We now support magic divisions in 64-bit code too.

return 21600 * (t / 21600);

  return 21600
       * (((signed __int64)((unsigned __int128)(1749024623285053783LL
         * (signed __int128)t) >> 64) >> 11) - (t >> 63));

More aggressive 'if' to 'boolean' folding

More aggressive folding of if_one_else_zero constructs; the output is much shorter and easier to grasp.

return a1 << 28 != 0 && (a1 & (unsigned __int8)(a1 - 1)) == 0;

  v1 = 1;
  v2 = 1;
  if ( !(a1 << 28) )
    v2 = 0;
  if ( !((unsigned __int8)a1 & (unsigned __int8)(a1 - 1)) )
    v1 = 0;
  return v2 && !v1;

Better type of 'this' argument

The decompiler tries to guess the type of the first argument of a constructor. This leads to improved listing.

XImage *__fastcall XImage::setHotSpot(XImage *this, int a2, int a3)
{
  LOWORD(this->height) = a2;
  HIWORD(this->height) = a3;
  return this;
}

int __fastcall XImage::setHotSpot(int this, int a2, int a3)
{
  *(_WORD *)(this + 4) = a2;
  *(_WORD *)(this + 6) = a3;
  return this;
}

Improved union field selection

The decompiler has a better algorithm to find the correct union field. This reduces the number of casts in the output.

float __fastcall ret4f(__n128 a1)
{
  return a1.n128_f32[2];
}

float __fastcall ret4f(__n128 a1)
{
  return *(float *)&a1.n128_u32[2];
}

Improved recognition of 'for' loops

We improved recognition of 'for' loops, they are shorter and much easier to understand.

  for ( i = 0; i < 16; ++i )
  {
    printf("%x", *(unsigned __int8 *)(i + v2) >> 4);
    printf("%x", *(_BYTE *)(i + v2) & 0xF);
  }

v3 = 0;
do
{
  printf("%x", (unsigned int)*(unsigned __int8 *)(v3 + v2) >> 4);
  printf("%x", *(_BYTE *)(v3++ + v2) & 0xF);
}
while ( v3 < 16 );

Added support for shifted pointers

Please note that the code on the left is completely illegible; the assembler code is probably easier to work with in this case. However, the code on the right is very neat. JFYI, below is the class hierarchy for this example:

struct __cppobj B1
{
  B1_vtbl *__vftable /*VFT*/;
  char d1[4];
};
struct __cppobj B2
{
  B2_vtbl *__vftable /*VFT*/;
  char d2[4];
};
struct __cppobj A : B1, B2
{
  char d3[4];
};

Also please note that the source code had

A::a2(A *this)

but at the assembler level we have

A::a2(B2 *this)

Visual Studio plays such tricks.

int __thiscall A::a2(B2 *__shifted(A,8) this)
{
  printf("A::a2 %p\n", ADJ(this));
  printf("A::d2 %p\n", ADJ(this)->d2);
  return ADJ(this)->d3[0];
}

int __thiscall A::a2(B2 *this)
{
  B2 *v1; // ST08_4

  v1 = this;
  printf("A::a2 %p\n", this - 1);
  printf("A::d2 %p\n", (char *)v1 + 4);
  return *((char *)v1 + 8);
}

Better recognition of inlined standard functions

Yes, the code on the left and on the right do the same. We prefer the right side, very much.

if ( !memcmp(i + 10, "AMIBIOSC", 8u) )
      return i + 10;

    v2 = 0;
    v3 = 1;
    v4 = i + 10;
    v5 = "AMIBIOSC";
    v6 = 8;
    do
    {
      if ( !v6 )
        break;
      v2 = *v4 < (const unsigned __int8)*v5;
      v3 = *v4++ == *v5++;
      --v6;
    }
    while ( v3 );
    if ( (!v2 && !v3) == v2 )
      return i + 10;

Improved application of pre-increment and pre-decrement

Minor stuff, one would say, and we'd completely agree. However, these minor details make reading the output a pleasure.

    v5 = *++v4;
    result = --a4;

    v5 = (v4++)[1];
    result = a4-- - 1;

Added support for RRX addressing mode in ARM

This is a rare addressing mode that is nevertheless used by compilers. Now we support it nicely.

__int64 __fastcall sar64(__int64 a1)
{
  return a1 >> 1;
}

__int64 __fastcall sar64(__int64 a1)
{
  __int64 result; // r0

  SHIDWORD(a1) >>= 1;
  __asm { MOV     R0, R0,RRX }
  return result;
}

Improved constant propagation in global memory

The new decompiler managed to disentangle the obfuscation code and convert it into a nice strcpy()

strcpy((char *)&dword_1005DF9A, "basic_string");

  dword_1005DF9A = 0xADB0A3A3;
  dword_1005DF9E = 0xBCB499A6;
  dword_1005DFA2 = 0xABA5A3BB;
  LOBYTE(dword_1005DF9A) = 'b';
  BYTE1(dword_1005DF9A) ^= 0xC2u;
  HIWORD(dword_1005DF9A) = 'is';
  LOBYTE(dword_1005DF9E) = 'c';
  BYTE1(dword_1005DF9E) ^= 0xC6u;
  HIWORD(dword_1005DF9E) = 'ts';
  LOBYTE(dword_1005DFA2) = 'r';
  BYTE1(dword_1005DFA2) ^= 0xCAu;
  HIWORD(dword_1005DFA2) = 'gn';
  byte_1005DFA6 = 0;

Added support for Objective C blocks

The new version knows about ObjC blocks and can represent them correctly in the output. See Edit, Other, Objective-C submenu in IDA, it contains the necessary actions to analyze the blocks.

__int64 __fastcall sub_181450634(__int64 a1, __int64 a2, __int64 a3)
{
  Block_layout_18145064C blk; // [xsp+0h] [xbp-30h]

  blk.isa = _NSConcreteStackBlock;
  *(_QWORD *)&blk.flags = 0x42000000LL;
  blk.invoke = sub_181450694;
  blk.descriptor = (Block_descriptor_1 *)&unk_1B0668958;
  blk.lvar1 = *(_QWORD *)(a1 + 32);
  blk.lvar2 = a3;
  return sub_18144BD0C(a2, &blk);
}

__int64 __fastcall sub_181450634(__int64 a1, __int64 a2, __int64 a3)
{
  void *(*v4)[32]; // [xsp+0h] [xbp-30h]
  __int64 v5; // [xsp+8h] [xbp-28h]
  __int64 (__fastcall *v6)(); // [xsp+10h] [xbp-20h]
  void *v7; // [xsp+18h] [xbp-18h]
  __int64 v8; // [xsp+20h] [xbp-10h]
  __int64 v9; // [xsp+28h] [xbp-8h]

  v4 = _NSConcreteStackBlock;
  v5 = 1107296256LL;
  v6 = sub_181450694;
  v7 = &unk_1B0668958;
  v8 = *(_QWORD *)(a1 + 32);
  v9 = a3;
  return sub_18144BD0C(a2, &v4);
}

Improved recognition of 64-bit comparisons

We continue to improve recognition of 64-bit arithmetics. While it is impossible to handle all cases, we do not give up.

  gettimeofday(&tv, 0);
  v0 = 90 * (v3 / 1000 + 1000LL * *(_QWORD *)&tv);
  if ( v0 < 0xFFFFFFFFFFFFFFFFLL )
    stamp = 90 * (v3 / 1000 + 1000LL * *(_QWORD *)&tv);

  gettimeofday(&tv, 0);
  v0 = 1000LL * (unsigned int)tv.tv_usec;
  HIDWORD(v0) = (unsigned __int64)(1000LL * *(_QWORD *)&tv) >> 32;
  v1 = 90LL * (unsigned int)(v4 / 1000 + v0);
  HIDWORD(v1) = (unsigned __int64)(90 * (v4 / 1000 + v0)) >> 32;
  if ( HIDWORD(v1) < 0xFFFFFFFF || -1 == HIDWORD(v1) && (unsigned int)stamp > (unsigned int)v1 )
    stamp = v1;

Merged common code in 'if' branches

Yet another optimization rule that lifts common code from 'if' branches. We made it even more aggressive.

    mywcscpy();
    if ( a3 < 0 )
      v4 = -a3;

    if ( a3 >= 0 )
    {
      mywcscpy();
    }
    else
    {
      mywcscpy();
      v4 = -a3;
    }

Added forced stack variables

Sometimes compilers reuse the same stack slot for different purposes. Many our users asked us to add a feature to handle this situation. The new decompiler addresses this issue by adding a command to force creation of a new variable at the specified point. Currently we support only aliasable stack variables because this is the most common case.

In the sample above the slot of the p_data_format variable is reused. Initially it holds a pointer to an integer (data_format) and then it holds a simple integer (errcode). Previous versions of the decompiler could not handle this situation nicely and the output would necessarily have casts and quite difficult to read. The two different uses of the slot would be represented just by one variable. You can see it in the left listing.

The new version produces clean code and displays two variables. Naturally it happens after applying the force new variable command.

    data_format = *p_data_format;
    if ( *p_data_format < 0 || data_format > 13 )
    {
      errcode = 2;
      SetError(&this->status, &errcode, "format not one of accepted types");
    }

    data_format = *p_data_format;
    if ( *p_data_format < 0 || data_format > 13 )
    {
      p_data_format = (int *)2;
      SetError(&this->status, (errcode_t *)&p_data_format, "format not one of accepted types");
    }

Added support for virtual calls

Well, these listings require no comments, the new version apparently wins!

void __cdecl test3(D7 *a1)
{
  a1->f1(&a1->A1);
  a1->f2(&a1->D3);
  a1->f3(&a1->D5);
  a1->f4(&a1->A4);
  a1->f5(a1);
  a1->f6(a1);
  a1->g0(&a1->D5);
  a1->g5(&a1->D5);
  a1->g7(a1);
  if ( a1 )
    a1->~D7(a1);
}

void __cdecl test3(D7 *a1)
{
  (**((void (__cdecl ***)(char *))a1 + 12))((char *)a1 + 48);
  (*(void (__cdecl **)(char *))(*((_DWORD *)a1 + 10) + 12))((char *)a1 + 40);
  (**((void (__cdecl ***)(char *))a1 + 6))((char *)a1 + 24);
  (**((void (__cdecl ***)(char *))a1 + 26))((char *)a1 + 104);
  (**(void (__cdecl ***)(D7 *))a1)(a1);
  (*(void (__cdecl **)(D7 *))(*(_DWORD *)a1 + 12))(a1);
  (*(void (__cdecl **)(char *))(*((_DWORD *)a1 + 6) + 4))((char *)a1 + 24);
  (*(void (__cdecl **)(char *))(*((_DWORD *)a1 + 6) + 16))((char *)a1 + 24);
  (*(void (__cdecl **)(D7 *))(*(_DWORD *)a1 + 16))(a1);
  if ( a1 )
    (*(void (__cdecl **)(D7 *))(*(_DWORD *)a1 + 8))(a1);
}

Comparisons of ARM disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for ARM. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

Simple case
64-bit arithmetics
Conditional instructions
Conditional instructions - 2
Complex instructions
Compiler helper functions
Immediate constants
Position independent code

Simple case

The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.

; struct_result *__fastcall sub_210DC(struct_result *result)                 
                                         
 var_10          = -0x10                                                      
 var_4           = -4                                                         
                                                                              
                 MOV     R12, SP                                              
                 STMFD   SP!, {R0}                                            
                 STMFD   SP!, {R12,LR}                                        
                 SUB     SP, SP, #4                                           
                 LDR     R2, [SP,#0x10+var_4]
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 ADD     R2, R3, #4
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 ADD     R2, R3, #8
                 MOV     R3, #0
                 STR     R3, [R2]
                 LDR     R3, [SP,#0x10+var_4]
                 STR     R3, [SP,#0x10+var_10]
                 LDR     R0, [SP,#0x10+var_10]
                 ADD     SP, SP, #4
                 LDMFD   SP, {SP,LR}
                 BX      LR
 ; End of function sub_210DC

struct_result *__fastcall sub_210DC(struct_result *result)
{
  result->dword0 = 0;
  result->dword4 = 0;
  result->dword8 = 0;
  return result;
}

64-bit arithmetics

Sorry for a long code snippet, ARM code tends to be longer compared to x86 code. This makes our comparison even more impressive: look at how concise is the decompiler output!

 ; bool __cdecl uh_gt_uc()                                                    
                 EXPORT _uh_gt_uc__YA_NXZ                                     
 _uh_gt_uc__YA_NXZ                       ; DATA XREF: .pdata:$T7452o          
                                                                              
 var_2C          = -0x2C                                                      
 var_28          = -0x28                                                      
 var_24          = -0x24                                                      
 var_20          = -0x20                                                      
 var_1C          = -0x1C                                                      
 var_18          = -0x18                                                      
 var_14          = -0x14                                                      
 var_10          = -0x10                                                      
 var_C           = -0xC                                                       
 var_8           = -8                                                         
 var_4           = -4                                                         
                                                                              
                 STR     LR, [SP,#var_4]! ; $M7441                            
                                         ; $LN8@uh_gt_uc                      
                 SUB     SP, SP, #0x28                                        

 $M7449
                 BL      uh
                 STR     R1, [SP,#0x2C+var_24]
                 STR     R0, [SP,#0x2C+var_28]
                 BL      uc
                 STRB    R0, [SP,#0x2C+var_20]
                 LDRB    R3, [SP,#0x2C+var_20]
                 STR     R3, [SP,#0x2C+var_1C]
                 LDR     R1, [SP,#0x2C+var_1C]
                 LDR     R3, [SP,#0x2C+var_1C]
                 MOV     R2, R3,ASR#31
                 LDR     R3, [SP,#0x2C+var_28]
                 STR     R3, [SP,#0x2C+var_18]
                 LDR     R3, [SP,#0x2C+var_24]
                 STR     R3, [SP,#0x2C+var_14]
                 LDR     R3, [SP,#0x2C+var_18]
                 STR     R3, [SP,#0x2C+var_10]
                 STR     R1, [SP,#0x2C+var_C]
                 LDR     R3, [SP,#0x2C+var_14]
                 CMP     R3, R2
                 BCC     $LN3_8

 loc_6AC
                 BHI     $LN5_0

 loc_6B0
                 LDR     R2, [SP,#0x2C+var_10]
                 LDR     R3, [SP,#0x2C+var_C]
                 CMP     R2, R3
                 BLS     $LN3_8

 $LN5_0
                 MOV     R3, #1
                 STR     R3, [SP,#0x2C+var_8]
                 B       $LN4_8
 ; ---------------------------------------------------------------------------

 $LN3_8
                                         ; uh_gt_uc(void)+68j
                 MOV     R3, #0
                 STR     R3, [SP,#0x2C+var_8]

 $LN4_8
                 LDR     R3, [SP,#0x2C+var_8]
                 AND     R3, R3, #0xFF
                 STRB    R3, [SP,#0x2C+var_2C]
                 LDRB    R0, [SP,#0x2C+var_2C]
                 ADD     SP, SP, #0x28
                 LDR     PC, [SP+4+var_4],#4
 ; End of function uh_gt_uc(void)

bool __fastcall uh_gt_uc()
{
  unsigned __int64 v0; // ST04_8@1

  v0 = uh();
  return v0 > uc();
}

Conditional instructions

The pseudocode shows it much better and does not require any explanations.

A quiz question: did you notice that MOVNE loads zero to R0? (because I didn't:)

Also note that in the disassembly listing we see var_8 but the location really used is var_A, which corresponds to v4.

; int __cdecl ReadShort(void *, unsigned __int32 offset, int whence)         
 ReadShort                                                                    
                                                                              
 whence          = -0x18                                                      
 var_A           = -0xA                                                       
 var_8           = -8                                                         
                                                                              
                 STMFD   SP!, {R4,LR}                                         
                 SUB     SP, SP, #0x10   ; whence                             
                 MOV     R4, #0
                 ADD     R3, SP, #0x18+var_8
                 STRH    R4, [R3,#-2]!
                 STR     R2, [SP,#0x18+whence] ; whence
                 MOV     R2, R3          ; buffer
                 MOV     R3, #2          ; len
                 BL      ReadData
                 CMP     R0, R4
                 MOVNE   R0, R4
                 LDREQSH R0, [SP,#0x18+var_A]
                 ADD     SP, SP, #0x10
                 LDMFD   SP!, {R4,PC}
 ; End of function ReadShort

int __cdecl ReadShort(void *a1, unsigned __int32 offset, int whence)
{
  int result; // r0@2
  __int16 v4; // [sp+Eh] [bp-Ah]@1

  v4 = 0;
  if ( ReadData(a1, offset, &v4, 2u, whence) )
    result = 0;
  else
    result = v4;
  return result;
}

Conditional instructions - 2

The decompiler represented it perfectly well. I renamed some variables and set their types, but this was an easy task.

; signed int __fastcall get_next_byte(entry_t *entry)
 get_next_byte                           ; DATA XREF: sub_3BC+30o
                                         ;
                 LDR     R2, [R0,#4]
                 CMP     R2, #0
                 LDRNE   R3, [R0]
                 LDRNEB  R1, [R3],#1
                 CMPNE   R1, #0
                 MOVEQ   R1, #1
                 STREQ   R1, [R0,#0xC]
                 MOVEQ   R0, 0xFFFFFFFF
                 MOVEQ   PC, LR
                 SUB     R2, R2, #1
                 STR     R2, [R0,#4]
                 STR     R3, [R0]
                 MOV     R0, R1
                 RET
 ; End of function get_next_byte

signed int __fastcall get_next_byte(entry_t *entry)
{
  signed int chr; // r1@0
  unsigned __int8 *ptr; // r3@0
  int count; // r2@1
  char done; // zf@1
  signed int result; // r0@4

  count = entry->count;
  done = count == 0;
  if ( count )
  {
    ptr = entry->ptr + 1;
    chr = *entry->ptr;
    done = chr == 0;
  }
  if ( done )
  {
    entry->done = 1;
    result = -1;
  }
  else
  {
    entry->count = count - 1;
    entry->ptr = ptr;
    result = chr;
  }
  return result;
}

Complex instructions

In short, the disassembly listing is like Chinese. The pseudocode is longer but requires much less time to understand.

; void __fastcall sub_2A38(list_t *ptr, unsigned int a2)
 sub_2A38                                ; CODE XREF: sub_5C8+48p
                                         ; sub_648+5Cp ...
                 MOV     R2, #0
                 STMFD   SP!, {LR}                                            
                 MOV     R3, R2
                 MOV     R12, R2
                 MOV     LR, R2
                 SUBS    R1, R1, #0x20

 loc_2A50                                ; CODE XREF: sub_2A38+24j
                 STMCSIA R0!, {R2,R3,R12,LR}
                 STMCSIA R0!, {R2,R3,R12,LR}
                 SUBCSS  R1, R1, #0x20
                 BCS     loc_2A50
                 MOVS    R1, R1,LSL#28
                 STMCSIA R0!, {R2,R3,R12,LR}
                 STMMIIA R0!, {R2,R3}
                 LDMFD   SP!, {LR}
                 MOVS    R1, R1,LSL#2
                 STRCS   R2, [R0],#4
                 MOVEQ   PC, LR
                 STRMIH  R2, [R0],#2
                 TST     R1, #0x40000000
                 STRNEB  R2, [R0],#1
                 RET
 ; End of function sub_2A38

void __fastcall sub_2A38(list_t *ptr, unsigned int a2)
{
  char copybig; // cf@1
  unsigned int size; // r1@1
  list_t *v4; // r0@3
  int remains; // r1@4
  int final; // r1@8

  copybig = a2 >= 0x20;
  size = a2 - 32;
  do
  {
    if ( !copybig )
      break;
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr->dword8 = 0;
    ptr->dwordC = 0;
    v4 = ptr + 1;
    v4->dword0 = 0;
    v4->dword4 = 0;
    v4->dword8 = 0;
    v4->dwordC = 0;
    ptr = v4 + 1;
    copybig = size >= 0x20;
    size -= 32;
  }
  while ( copybig );
  remains = size << 28;
  if ( copybig )
  {
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr->dword8 = 0;
    ptr->dwordC = 0;
    ++ptr;
  }
  if ( remains < 0 )
  {
    ptr->dword0 = 0;
    ptr->dword4 = 0;
    ptr = (list_t *)((char *)ptr + 8);
  }
  final = 4 * remains;
  if ( copybig )
  {
    ptr->dword0 = 0;
    ptr = (list_t *)((char *)ptr + 4);
  }
  if ( final )
  {
    if ( final < 0 )
    {
      LOWORD(ptr->dword0) = 0;
      ptr = (list_t *)((char *)ptr + 2);
    }
    if ( final & 0x40000000 )
      LOBYTE(ptr->dword0) = 0;
  }
}

Compiler helper functions

Sorry for another long code snippet. Just wanted to show you that the decompiler can handle compiler helper functions (like __divdi3) and handles 64-bit arithmetic quite well.

EXPORT op_two64                                              
     op_two64                                ; CODE XREF: refer_all+31Cp          
                                             ; main+78p                           
                                                                                  
     anonymous_1     = -0x28                                                      
     var_20          = -0x20                                                      
     anonymous_0     = -0x18                                                      
     var_10          = -0x10                                                      
     arg_0           =  4                                                         
                                                                                  
 000                 MOV     R12, SP                                              
 000                 STMFD   SP!, {R4,R11,R12,LR,PC}                              
 014                 SUB     R11, R12, #4                                         
 014                 SUB     SP, SP, #0x18                                        
 02C                 SUB     R4, R11, #-var_10
 02C                 STMDB   R4, {R0,R1}
 02C                 MOV     R1, 0xFFFFFFF0
 02C                 SUB     R12, R11, #-var_10
 02C                 ADD     R1, R12, R1
 02C                 STMIA   R1, {R2,R3}
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #1
 02C                 BNE     loc_9C44
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R4, R11, #-var_10
 02C                 LDMDB   R4, {R1,R2}
 02C                 LDMIA   R3, {R3,R4}
 02C                 ADDS    R3, R3, R1
 02C                 ADC     R4, R4, R2
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9C44                                ; CODE XREF: op_two64+30j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #2
 02C                 BNE     loc_9C7C
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R4, R11, #-var_10
 02C                 LDMDB   R4, {R1,R2}
 02C                 LDMIA   R3, {R3,R4}
 02C                 SUBS    R3, R1, R3
 02C                 SBC     R4, R2, R4
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9C7C                                ; CODE XREF: op_two64+68j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #3
 02C                 BNE     loc_9CB8
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R2, R11, #-var_10
 02C                 LDMDB   R2, {R0,R1}
 02C                 LDMIA   R3, {R2,R3}
 02C                 BL      __muldi3
 02C                 MOV     R4, R1
 02C                 MOV     R3, R0
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9CB8                                ; CODE XREF: op_two64+A0j
 02C                 LDR     R3, [R11,#arg_0]
 02C                 CMP     R3, #4
 02C                 BNE     loc_9CF4
 02C                 MOV     R3, 0xFFFFFFF0
 02C                 SUB     R0, R11, #-var_10
 02C                 ADD     R3, R0, R3
 02C                 SUB     R2, R11, #-var_10
 02C                 LDMDB   R2, {R0,R1}
 02C                 LDMIA   R3, {R2,R3}
 02C                 BL      __divdi3
 02C                 MOV     R4, R1
 02C                 MOV     R3, R0
 02C                 SUB     R12, R11, #-var_20
 02C                 STMDB   R12, {R3,R4}
 02C                 B       loc_9D04
     ; ---------------------------------------------------------------------------

     loc_9CF4                                ; CODE XREF: op_two64+DCj
 02C                 MOV     R3, 0xFFFFFFFF
 02C                 MOV     R2, 0xFFFFFFFF
 02C                 SUB     R4, R11, #-var_20
 02C                 STMDB   R4, {R2,R3}

     loc_9D04                                ; CODE XREF: op_two64+5Cj
                                             ; op_two64+94j ...
 02C                 SUB     R12, R11, #-var_20
 02C                 LDMDB   R12, {R0,R1}
 02C                 SUB     SP, R11, #0x10
 014                 LDMFD   SP, {R4,R11,SP,PC}
     ; End of function op_two64

signed __int64 __fastcall op_two64(signed __int64 a1, signed __int64 a2, int a3)
{
  signed __int64 v4; // [sp+0h] [bp-28h]@2

  switch ( a3 )
  {
    case 1:
      v4 = a2 + a1;
      break;
    case 2:
      v4 = a1 - a2;
      break;
    case 3:
      v4 = a1 * a2;
      break;
    case 4:
      v4 = a1 / a2;
      break;
    default:
      v4 = -1LL;
      break;
  }
  return v4;
}

Immediate constants

Also a side: the decompiler can handle ARM mode as well as Thumb mode instructions. It just does not care about the instruction encoding because it is already handled by IDA.

loc_110D6                               ; CODE XREF: sub_10E38+43Cj
                                         ; sub_10E38+442j ...
                 LDR     R1, =(tmin_ptr - 0x1CDB8)
                 LDR     R2, =(tmax_ptr - 0x1CDB8)
                 LDR     R0, =(aRttMinAvgMaxMd - 0x1CDB8)
                 LDR     R6, [R7,R1]
                 LDR     R5, [R7,R2]
                 MOVS    R3, #0xFA
                 LDR     R4, [R6]
                 LSLS    R1, R3, #2
                 LDR     R6, [R5]
                 ADDS    R5, R7, R0      ; "rtt min/avg/max/mdev = %ld.%03ld/%lu.%0"...
                 MOVS    R0, R4
                 BLX     __aeabi_idiv
                 MOV     R8, R0
                 MOVS    R0, R4
                 MOVS    R4, #0xFA
                 LSLS    R1, R4, #2
                 BLX     __aeabi_idivmod
                 LDR     R3, =0
                 LDR     R2, =0x3E8
                 MOVS    R4, R1
                 LDR     R0, [SP,#0x78+var_40]
                 LDR     R1, [SP,#0x78+var_40+4]
                 BLX     __aeabi_ldivmod
                 LDR     R3, =0
                 LDR     R2, =0x3E8
                 STR     R0, [SP,#0x78+var_50]
                 STR     R1, [SP,#0x78+var_4C]
                 LDR     R0, [SP,#0x78+var_40]
                 LDR     R1, [SP,#0x78+var_40+4]
                 BLX     __aeabi_ldivmod
                 MOVS    R1, #0xFA
                 MOVS    R0, R6
                 LSLS    R1, R1, #2
                 STR     R2, [SP,#0x78+var_78]
                 BLX     __aeabi_idiv
                 STR     R0, [SP,#0x78+var_74]
                 MOVS    R0, R6
                 MOVS    R6, #0xFA
                 LSLS    R1, R6, #2
                 BLX     __aeabi_idivmod
                 MOVS    R2, #0xFA
                 STR     R1, [SP,#0x78+var_70]
                 LDR     R0, [SP,#0x78+var_38]
                 LSLS    R1, R2, #2
                 BLX     __aeabi_idiv
                 MOVS    R3, #0xFA
                 STR     R0, [SP,#0x78+var_6C]
                 LSLS    R1, R3, #2
                 LDR     R0, [SP,#0x78+var_38]
                 BLX     __aeabi_idivmod
                 MOVS    R0, R5          ; format
                 STR     R1, [SP,#0x78+var_68]
                 MOVS    R2, R4
                 MOV     R1, R8
                 LDR     R3, [SP,#0x78+var_50]
                 BLX     printf

 printf(
      "rtt min/avg/max/mdev = %ld.%03ld/%lu.%03ld/%ld.%03ld/%ld.%03ld ms",
      tmin / 1000,
      tmin % 1000,
      v27 / 1000,
      v27 % 1000,
      tmax / 1000,
      tmax % 1000,
      v28 / 1000,
      v28 % 1000);

Position independent code

sub_65768                               ; DATA XREF: .data:007E37A4o         
                                                                              
 var_18          = -0x18                                                      
 var_14          = -0x14                                                      
 var_10          = -0x10                                                      
 arg_0           =  0                                                         
                                                                              
                 PUSH    {LR}                                                 
                 LDR.W   R12, =aResponsetype ; "responseType"
                 SUB     SP, SP, #0x14                                        
                 ADR.W   LR, loc_65774

 loc_65774                               ; DATA XREF: sub_65768+8o
                 ADD     R12, LR
                 LDR.W   LR, [SP,#0x18+arg_0]
                 STR.W   LR, [SP,#0x18+var_18]
                 MOV.W   LR, #0x10
                 STR.W   LR, [SP,#0x18+var_14]
                 LDR.W   LR, =0xFFF0883C
                 ADD     R12, LR
                 STR.W   R12, [SP,#0x18+var_10]
                 BL      sub_65378
                 ADD     SP, SP, #0x14
                 POP     {PC}
 ; End of function sub_65768

int __fastcall sub_65768(int a1, int a2, int a3, int a4, int a5)
{
  return sub_65378(a1, a2, a3, a4, a5, 16, (int)myarray);
}

Hex-Rays v7.3 vs. v7.2 Decompiler Comparison Page

Below you will find side-by-side comparisons of v7.2 and v7.3 decompilations. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

objc-related improvements
value range analysis can eliminate more useless code
better resolving of got-relative memory references
too big shift amounts are converted to lower values (e.g. 33->1)
more for-loops
better handling of fragemented variables
many other things...

More hexadecimal numbers in the output

bool __fastcall ge_100000001(__int64 a1)
{
  return a1 >= 0x100000001LL;
}

bool __fastcall ge_100000001(__int64 a1)
{
  return a1 >= 4294967297LL;
}

Support for variable size structures

EfiBootRecord points to a structure that has RecordExtents[0] as the last member. Such structures are considered as variable size structures in C/C++. Now we handle them nicely.

BlockNumber = EfiBootRecord->RecordExtents[ExtentIndex64].BlockNumber;
BlockCount = EfiBootRecord->RecordExtents[ExtentIndex64].BlockCount;

BlockNumber = *(UINT64 *)((char *)&EfiBootRecord[1].BlockHeader.Checksum + ExtentIndex64);
BlockCount = *(UINT64 *)((char *)&EfiBootRecord[1].BlockHeader.ObjectOid + ExtentIndex64);

UTF-32 strings are printed inline

We were printing UTF-8 and other string types, UTF-32 was not supported yet. Now we print it with the 'U' prefix.

v3 = std::operator<<<std::char_traits<char>>(&std::cout, U"This is U\"Hello\"

    .rodata:0000000000000120  text "UTF-32LE", 'This is U"Hello"',0
    ...
    v10 = std::ostream::operator<<(v9, aThisIsUHello_0);

Better argument detection for printf

int __fastcall ididi(int a1, __int64 a2, int a3, __int64 a4, int a5)
{
  int varg_r0; // [sp+28h] [bp-10h]
  __int64 varg_r2; // [sp+30h] [bp-8h]

  varg_r0 = a1;
  varg_r2 = a2;
  my_print("d=%I64d\n", a2);
  my_print("d1=%I64d\n", a4);
  my_print("%d-%I64d-%d-%I64d-%d\n", varg_r0, varg_r2, a3, a4, a5);
  return 0;
}

int __fastcall ididi(int a1, int a2, __int64 a3, int a4, __int64 a5, int a6)
{
  int v6; // r1
  char v8; // [sp+4h] [bp-34h]
  int varg_r0; // [sp+28h] [bp-10h]
  __int64 varg_r2; // [sp+30h] [bp-8h]

  varg_r0 = a1;
  varg_r2 = a3;
  my_print("d=%I64d\n", a2, a3);
  my_print("d1=%I64d\n", v6, a5);
  my_print("%d-%I64d-%d-%I64d-%d\n", varg_r0, varg_r2, a4, v8, a5, a6);
  return 0;
}

Better argument detection for scanf

scanf("8: %d%i %x%o %s%s %C%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13);
scanf("8:   %[ a-z]%c %2c%c %2c%2c %[ a-z]%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13);

  scanf("8: %d%i %x%o %s%s %C%c", &v12, &v7, &v3, &v4, &v2, &v9, &v8, &v13, &v10, &v0, &v6, &v5, &v1, &v11);
  scanf(
    "8:   %[ a-z]%c %2c%c %2c%2c %[ a-z]%c",
    &v12,
    &v7,
    &v3,
    &v4,
    &v2,
    &v9,
    &v8,
    &v13,
    &v10,
    &v0,
    &v6,
    &v5,
    &v1,
    &v11);

Resolved TEB references

While seasoned reversers know what is located at fs:0, it is still better to have it spelled out. Besides, the type of v15 is automatically detected as struct _EXCEPTION_REGISTRATION_RECORD *.

v15 = NtCurrentTeb()->NtTib.ExceptionList;

v15 = __readfsdword(0);

Better automatic selection of union fields

union __XmStringEntryRec
{
  _XmStringEmptyHeader empty;
  _XmStringOptSegHdrRec single;
  _XmStringUnoptSegHdrRec unopt_single;
  _XmStringArraySegHdrRec multiple;
};
struct __XmStringEmptyHeader
{
  unsigned __int32 type : 2;
};
struct __XmStringOptSegHdrRec
{
  unsigned __int32 type : 2;
  unsigned __int32 text_type : 2;
  unsigned __int32 tag_index : 3;
  unsigned __int32 rend_begin : 1;
  unsigned __int8 byte_count;
  unsigned __int32 rend_end : 1;
  unsigned __int32 rend_index : 4;
  unsigned __int32 str_dir : 2;
  unsigned __int32 flipped : 1;
  unsigned __int32 tabs_before : 3;
  unsigned __int32 permanent : 1;
  unsigned __int32 soft_line_break : 1;
  unsigned __int32 immediate : 1;
  unsigned __int32 pad : 2;
};

While we can not handle bitfields yet, their presence does not prevent using other, regular fields, of the structure.

if ( entry->single.byte_count )

if ( *((_BYTE *)&entry->empty + 1) )

Yet one more example of union fields

I could not resist the temptation to include one more example of automatic union selection. How beautiful the code on the right is!

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  res->w32[0] = argL->w32[argR->w32[0] & 7];
  res->w32[1] = argL->w32[argR->w32[1] & 7];
  res->w32[2] = argL->w32[argR->w32[2] & 7];
  res->w32[3] = argL->w32[argR->w32[3] & 7];
  res->w32[4] = argL->w32[argR->w32[4] & 7];
  res->w32[5] = argL->w32[argR->w32[5] & 7];
  res->w32[6] = argL->w32[argR->w32[6] & 7];
  res->w32[7] = argL->w32[argR->w32[7] & 7];
}

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  LODWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (argR->w64[0] & 7));
  HIDWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[0]) & 7));
  LODWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (argR->w64[1] & 7));
  HIDWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[1]) & 7));
  LODWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (argR->w64[2] & 7));
  HIDWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[2]) & 7));
  LODWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (argR->w64[3] & 7));
  HIDWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[3]) & 7));
}

Improved support for EABI helpers

No comments needed, we hope. The new decompiler managed to fold constant expressions after replacing EABI helpers with corresponding operators.

int __cdecl main(int argc, const char **argv, const char **envp)
{
  printf("r = %d == 42\n", 42);
  printf("r = %lld == 42\n", 42LL);
  printf("ABORT %d\n", 0x40000001);
  return 0;
}

void __fastcall h_generic_calc_Perm32x8(V256 *res, V256 *argL, V256 *argR)
{
  LODWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (argR->w64[0] & 7));
  HIDWORD(res->w64[0]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[0]) & 7));
  LODWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (argR->w64[1] & 7));
  HIDWORD(res->w64[1]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[1]) & 7));
  LODWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (argR->w64[2] & 7));
  HIDWORD(res->w64[2]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[2]) & 7));
  LODWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (argR->w64[3] & 7));
  HIDWORD(res->w64[3]) = *((_DWORD *)argL->w64 + (HIDWORD(argR->w64[3]) & 7));
}

Improved local variable allocation

Now it works better especially in complex cases.

tbd

int __cdecl main(int argc, const char **argv, const char **envp)
{
  int v3; // r0
  int v4; // r0
  int v5; // r0
  int v6; // r0
  int v7; // r0
  __int64 v8; // r0
  int v9; // r2
  __int64 v11; // [sp+0h] [bp-14h]
  int v12; // [sp+Ch] [bp-8h]
  int v13; // [sp+Ch] [bp-8h]

  v3 = _mulvsi3(7, 6, envp);
  v4 = _negvsi2(v3);
  v5 = _addvsi3(v4, 101);
  v12 = _subvsi3(v5, 17);
  printf("r = %d == 42\n", v12);
  v11 = _mulvdi3(7, 0, 6, 0);
  v6 = _negvdi2(v12, v12 >> 31);
  v7 = _addvdi3(v6, v6 >> 31, 101, 0);
  v8 = _subvdi3(v7, v7 >> 31, 17, 0);
  printf("r = %lld == 42\n", HIDWORD(v8), v11);
  v13 = _mulvsi3(0x7FFFFFFF, 0x3FFFFFFF, v9);
  printf("ABORT %d\n", v13);
  return 0;
}

Better recognizition of string references

  sub_1135FC(-266663568, "This is a long long long string");
  if ( v2 > 0x48u )
  {
    sub_108998("Another str");

  sub_1135FC(-266663568, 89351520);
  if ( v2 > 0x48u )
  {
    sub_108998(89351556);

Better handling of structures returned by value

The code on the left had a very awkward sequence to copy a structure. The code on the right eliminates it as unnecessary and useless.

  _BYTE v1[12]; // rax
  ...
  return *(mystruct *)v1;
}

  _BYTE v1[12]; // ax
  mystruct result; // 0:ax.11

  ...
  *(_QWORD *)result.ca1 = *(_QWORD *)v1;
  result.s1 = *(_WORD *)&v1[8];
  result.c1 = v1[10];
  return result;
}

More while loops

while ( *++v4 )
    ;

do
    v5 = *++v4;
  while ( v5 );

Shorter code

Another tiny improvement made the output considerably shorter. We like it!

unsigned __int8 *__fastcall otp_memset(unsigned __int8 *pDest, unsigned __int8 val, int size)
{
  unsigned __int8 *i; // r3

  for ( i = pDest; (unsigned int)size-- >= 1; ++i )
    *i = val;
  return pDest;
}

unsigned __int8 *__fastcall otp_memset(unsigned __int8 *pDest, unsigned __int8 val, int size)
{
  unsigned __int8 *i; // r3
  _BOOL1 v4; // cf

  for ( i = pDest; ; ++i )
  {
    v4 = (unsigned int)size-- >= 1;
    if ( !v4 )
      break;
    *i = val;
  }
  return pDest;
}
}

Improved recognition of magic divisions

unsigned __int64 __fastcall konst_mod251_shr3(unsigned __int64 a1)
{
  return (a1 >> 3) % 0xFB;
}

__int64 __fastcall konst_mod251_shr3(unsigned __int64 a1)
{
  unsigned __int64 v1; // rcx

  v1 = a1 >> 3;
  _RDX = v1 + ((v1 * (unsigned __int128)0x5197F7D73404147ui64) >> 64);
  __asm { rcr     rdx, 1 }
  return v1 - 251 * (_RDX >> 7);
}

Less gotos

_BOOL8 __fastcall sub_0(__int64 a1, int *a2)
{
  int v2; // eax
  int v3; // eax
  int v4; // eax

  v2 = *a2;
  if ( *a2 > 522 )
  {
    v4 = v2 - 4143;
    return !v4 || v4 == 40950;
  }
  if ( v2 != 522 )
  {
    v3 = v2 - 71;
    if ( v3 )
    {
      if ( (unsigned int)(v3 - 205) >= 2 )
        return 0;
    }
  }
  return 1;
}

__int64 __fastcall sub_0(__int64 a1, int *a2)
{
  int v2; // eax
  int v3; // eax
  int v4; // eax

  v2 = *a2;
  if ( *a2 > 522 )
  {
    v4 = v2 - 4143;
    if ( !v4 || v4 == 40950 )
      goto LABEL_8;
LABEL_9:
    return 0;
  }
  if ( v2 != 522 )
  {
    v3 = v2 - 71;
    if ( v3 )
    {
      if ( (unsigned int)(v3 - 205) >= 2 )
        goto LABEL_9;
    }
  }
LABEL_8:
  return 1;
}

Division may generate an exception

__int64 __fastcall sub_4008C0(int a1)
{
  int v1; // ecx
  int v2; // edx
  int v4; // [rsp+0h] [rbp-4h]

  v1 = 2;
  if ( a1 > 2 )
  {
    do
    {
      nanosleep(&rmtp, &rqtp);
      v2 = a1 % v1++;
      v4 = 1 / v2;
    }
    while ( v1 != a1 );
  }
  return 0LL;
}

__int64 __fastcall sub_4008C0(int a1)
{
  int v1; // ecx

  v1 = 2;
  if ( a1 > 2 )
  {
    do
    {
      nanosleep(&rmtp, &rqtp);
      ++v1;
    }
    while ( v1 != a1 );
  }
  return 0LL;
}

Order of variadic arguments

int __cdecl func1(const float a, int b, void *c)
{
  return sub_88("%f, %d, %p\n", a, (unsigned int)b, c);
}

int __cdecl func1(const float a, int b, void *c)
{
  return sub_88("%f, %d, %p\n", (unsigned int)b, c, a);
}

Improved division recognition

This is a never ending battle, but we advance!

int int_h_mod_m32ui64(void)
{
  return h() % 32;
}

int int_h_mod_m32ui64(void)
{
  __int64 v0; // r10

  v0 = h();
  return (abs64(v0) & 0x1F ^ (SHIDWORD(v0) >> 31)) - (SHIDWORD(v0) >> 31);
}

Introduction to Decompilation vs. Disassembly

A decompiler represents executable binary files in a readable form. More precisely, it transforms binary code into text that software developers can read and modify. The software security industry relies on this transformation to analyze and validate programs. The analysis is performed on the binary code because the source code (the text form of the software) traditionally is not available, because it is considered a commercial secret.

Programs to transform binary code into text form have always existed. Simple one-to-one mapping of processor instruction codes into instruction mnemonics is performed by disassemblers. Many disassemblers are available on the market, both free and commercial. The most powerful disassembler is our own IDA Pro. It can handle binary code for a huge number of processors and has open architecture that allows developers to write add-on analytic modules.

Decompilers are different from disassemblers in one very important aspect. While both generate human readable text, decompilers generate much higher level text which is more concise and much easier to read.

Compared to low level assembly language, high level language representation has several advantages:

It is consise.
It is structured.
It doesn't require developers to know the assembly language.
It recognizes and converts low level idioms into high level notions.
It is less confusing and therefore easier to understand.
It is less repetitive and less distracting.
It uses data flow analysis.

Let's consider these points in detail.

Usually the decompiler's output is five to ten times shorter than the disassembler's output. For example, a typical modern program contains from 400KB to 5MB of binary code. The disassembler's output for such a program will include around 5-100MB of text, which can take anything from several weeks to several months to analyze completely. Analysts cannot spend this much time on a single program for economic reasons.

The decompiler's output for a typical program will be from 400KB to 10MB. Although this is still a big volume to read and understand (about the size of a thick book), the time needed for analysis time is divided by 10 or more.

The second big difference is that the decompiler output is structured. Instead of a linear flow of instructions where each line is similar to all the others, the text is indented to make the program logic explicit. Control flow constructs such as conditional statements, loops, and switches are marked with the appropriate keywords.

The decompiler's output is easier to understand than the disassembler's output because it is high level. To be able to use a disassembler, an analyst must know the target processor's assembly language. Mainstream programmers do not use assembly languages for everyday tasks, but virtually everyone uses high level languages today. Decompilers remove the gap between the typical programming languages and the output language. More analysts can use a decompiler than a disassembler.

Decompilers convert assembly level idioms into high-level abstractions. Some idioms can be quite long and time consuming to analyze. The following one line code

x = y / 2;

can be transformed by the compiler into a series of 20-30 processor instructions. It takes at least 15- 30 seconds for an experienced analyst to recognize the pattern and mentally replace it with the original line. If the code includes many such idioms, an analyst is forced to take notes and mark each pattern with its short representation. All this slows down the analysis tremendously. Decompilers remove this burden from the analysts.

The amount of assembler instructions to analyze is huge. They look very similar to each other and their patterns are very repetitive. Reading disassembler output is nothing like reading a captivating story. In a compiler generated program 95% of the code will be really boring to read and analyze. It is extremely easy for an analyst to confuse two similar looking snippets of code, and simply lose his way in the output. These two factors (the size and the boring nature of the text) lead to the following phenomenon: binary programs are never fully analyzed. Analysts try to locate suspicious parts by using some heuristics and some automation tools. Exceptions happen when the program is extremely small or an analyst devotes a disproportionally huge amount of time to the analysis. Decompilers alleviate both problems: their output is shorter and less repetitive. The output still contains some repetition, but it is manageable by a human being. Besides, this repetition can be addressed by automating the analysis.

Repetitive patterns in the binary code call for a solution. One obvious solution is to employ the computer to find patterns and somehow reduce them into something shorter and easier for human analysts to grasp. Some disassemblers (including IDA Pro) provide a means to automate analysis. However, the number of available analytical modules stays low, so repetitive code continues to be a problem. The main reason is that recognizing binary patterns is a surprisingly difficult task. Any "simple" action, including basic arithmetic operations such as addition and subtraction, can be represented in an endless number of ways in binary form. The compiler might use the addition operator for subtraction and vice versa. It can store constant numbers somewhere in its memory and load them when needed. It can use the fact that, after some operations, the register value can be proven to be a known constant, and just use the register without reinitializing it. The diversity of methods used explains the small number of available analytical modules.

The situation is different with a decompiler. Automation becomes much easier because the decompiler provides the analyst with high level notions. Many patterns are automatically recognized and replaced with abstract notions. The remaining patterns can be detected easily because of the formalisms the decompiler introduces. For example, the notions of function parameters and calling conventions are strictly formalized. Decompilers make it extremely easy to find the parameters of any function call, even if those parameters are initialized far away from the call instruction. With a disassembler, this is a daunting task, which requires handling each case individually.

Decompilers, in contrast with disassemblers, perform extensive data flow analysis on the input. This means that questions such as, "Where is the variable initialized?"" and, "Is this variable used?" can be answered immediately, without doing any extensive search over the function. Analysts routinely pose and answer these questions, and having the answers immediately increases their productivity.

Side-by-side comparisons of disassembly and decompilation

Below you will find side-by-side comparisons of disassembly and decompilation outputs. The following examples are available:

The following examples are displayed on this page:

Division by two
Simple enough?
Where's my variable?
Arithmetics is not a rocket science
Sample window procedure
Short-circuit evaluation
Inlined string operations

Division by two

Just note the difference in size! While the disassemble output requires you not only to know that the compilers generate such convoluted code for signed divisions and modulo operations, but you will also have to spend your time recognizing the patterns. Needless to say, the decompiler makes things really simple.

; =============== S U B R O U T I N E =======================================
; int __cdecl sub_4061C0(char *Str, char *Dest)
sub_4061C0      proc near               ; CODE XREF: sub_4062F0+15p
                                        ; sub_4063D4+21p ...
Str             = dword ptr  4
Dest            = dword ptr  8
                push    esi
                push    offset aSmtp_   ; "smtp."
                push    [esp+8+Dest]    ; Dest
                call    _strcpy
                mov     esi, [esp+0Ch+Str]
                push    esi             ; Str
                call    _strlen
                add     esp, 0Ch
                xor     ecx, ecx
                test    eax, eax
                jle     short loc_4061ED
loc_4061E2:                             ; CODE XREF: sub_4061C0+2Bj
                cmp     byte ptr [ecx+esi], 40h
                jz      short loc_4061ED
                inc     ecx
                cmp     ecx, eax
                jl      short loc_4061E2
loc_4061ED:                             ; CODE XREF: sub_4061C0+20j
                                        ; sub_4061C0+26j
                dec     eax
                cmp     ecx, eax
                jl      short loc_4061F6
                xor     eax, eax
                pop     esi
                retn
; ---------------------------------------------------------------------------
loc_4061F6:                             ; CODE XREF: sub_4061C0+30j
                lea     eax, [ecx+esi+1]
                push    eax             ; Source
                push    [esp+8+Dest]    ; Dest
                call    _strcat
                pop     ecx
                pop     ecx
                push    1
                pop     eax
                pop     esi
                retn
sub_4061C0      endp

signed int __cdecl sub_4061C0(char *Str, char *Dest)
{
  int len; // eax@1
  int i; // ecx@1
  char *str2; // esi@1
  signed int result; // eax@5
  strcpy(Dest, "smtp.");
  str2 = Str;
  len = strlen(Str);
  for ( i = 0; i < len; ++i )
  {
    if ( str2[i] == 64 )
      break;
  }
  if ( i < len - 1 )
  {
    strcat(Dest, &str2[i + 1]);
    result = 1;
  }
  else
  {
    result = 0;
  }
  return result;
}

Simple enough?

Questions like

What are the possible return values of the function?
Does the function use any strings?
What does the function do?

can be answered almost instantaneously looking at the decompiler output. Needless to say that it looks better because I renamed the local variables. In the disassembler, registers are renamed very rarely because it hides the register use and can lead to confusion.

; =============== S U B R O U T I N E =======================================
; int __cdecl sub_4061C0(char *Str, char *Dest)
sub_4061C0      proc near               ; CODE XREF: sub_4062F0+15p
                                        ; sub_4063D4+21p ...
Str             = dword ptr  4
Dest            = dword ptr  8
                push    esi
                push    offset aSmtp_   ; "smtp."
                push    [esp+8+Dest]    ; Dest
                call    _strcpy
                mov     esi, [esp+0Ch+Str]
                push    esi             ; Str
                call    _strlen
                add     esp, 0Ch
                xor     ecx, ecx
                test    eax, eax
                jle     short loc_4061ED
loc_4061E2:                             ; CODE XREF: sub_4061C0+2Bj
                cmp     byte ptr [ecx+esi], 40h
                jz      short loc_4061ED
                inc     ecx
                cmp     ecx, eax
                jl      short loc_4061E2
loc_4061ED:                             ; CODE XREF: sub_4061C0+20j
                                        ; sub_4061C0+26j
                dec     eax
                cmp     ecx, eax
                jl      short loc_4061F6
                xor     eax, eax
                pop     esi
                retn
; ---------------------------------------------------------------------------
loc_4061F6:                             ; CODE XREF: sub_4061C0+30j
                lea     eax, [ecx+esi+1]
                push    eax             ; Source
                push    [esp+8+Dest]    ; Dest
                call    _strcat
                pop     ecx
                pop     ecx
                push    1
                pop     eax
                pop     esi
                retn
sub_4061C0      endp

signed int __cdecl sub_4061C0(char *Str, char *Dest)
{
  int len; // eax@1
  int i; // ecx@1
  char *str2; // esi@1
  signed int result; // eax@5
  strcpy(Dest, "smtp.");
  str2 = Str;
  len = strlen(Str);
  for ( i = 0; i < len; ++i )
  {
    if ( str2[i] == 64 )
      break;
  }
  if ( i < len - 1 )
  {
    strcat(Dest, &str2[i + 1]);
    result = 1;
  }
  else
  {
    result = 0;
  }
  return result;
}

Where's my variable?

IDA highlights the current identifier. This feature turns out to be much more useful with high level output. In this sample, I tried to trace how the retrieved function pointer is used by the function. In the disassembly output, many wrong eax occurrences are highlighted while the decompiler did exactly what I wanted.

; =============== S U B R O U T I N E =======================================
; int __cdecl myfunc(wchar_t *Str, int)
myfunc          proc near               ; CODE XREF: sub_4060+76p
                                        ; .text:42E4p
Str             = dword ptr  4
arg_4           = dword ptr  8
                mov     eax, dword_1001F608
                cmp     eax, 0FFFFFFFFh
                jnz     short loc_10003AB6
                push    offset aGetsystemwindo ; "GetSystemWindowsDirectoryW"
                push    offset aKernel32_dll ; "KERNEL32.DLL"
                call    ds:GetModuleHandleW
                push    eax             ; hModule
                call    ds:GetProcAddress
                mov     dword_1001F608, eax
loc_10003AB6:                           ; CODE XREF: myfunc+8j
                test    eax, eax
                push    esi
                mov     esi, [esp+4+arg_4]
                push    edi
                mov     edi, [esp+8+Str]
                push    esi
                push    edi
                jz      short loc_10003ACA
                call    eax ; dword_1001F608
                jmp     short loc_10003AD0
; ---------------------------------------------------------------------------
loc_10003ACA:                           ; CODE XREF: myfunc+34j
                call    ds:GetWindowsDirectoryW
loc_10003AD0:                           ; CODE XREF: myfunc+38j
                sub     esi, eax
                cmp     esi, 5
                jnb     short loc_10003ADD
                pop     edi
                add     eax, 5
                pop     esi
                retn
; ---------------------------------------------------------------------------
loc_10003ADD:                           ; CODE XREF: myfunc+45j
                push    offset aInf_0   ; "\\inf"
                push    edi             ; Dest
                call    _wcscat
                push    edi             ; Str
                call    _wcslen
                add     esp, 0Ch
                pop     edi
                pop     esi
                retn
myfunc          endp

size_t __cdecl myfunc(wchar_t *buf, int bufsize)
{
  int (__stdcall *func)(_DWORD, _DWORD); // eax@1
  wchar_t *buf2; // edi@3
  int bufsize; // esi@3
  UINT dirlen; // eax@4
  size_t outlen; // eax@7
  HMODULE h; // eax@2
  func = g_fptr;
  if ( g_fptr == (int (__stdcall *)(_DWORD, _DWORD))-1 )
  {
    h = GetModuleHandleW(L"KERNEL32.DLL");
    func = (int (__stdcall *)(_DWORD, _DWORD))
                GetProcAddress(h, "GetSystemWindowsDirectoryW");
    g_fptr = func;
  }
  bufsize = bufsize;
  buf2 = buf;
  if ( func )
    dirlen = func(buf, bufsize);
  else
    dirlen = GetWindowsDirectoryW(buf, bufsize);
  if ( bufsize - dirlen >= 5 )
  {
    wcscat(buf2, L"\\inf");
    outlen = wcslen(buf2);
  }
  else
  {
    outlen = dirlen + 5;
  }
  return outlen;
}

Arithmetics is not a rocket science

Arithmetics is not a rocket science but it is always better if someone handles it for you. You have more important things to focus on.

; =============== S U B R O U T I N E =======================================
; Attributes: bp-based frame
; sgell(__int64, __int64)
                public @sgell$qjj
@sgell$qjj      proc near
arg_0           = dword ptr  8
arg_4           = dword ptr  0Ch
arg_8           = dword ptr  10h
arg_C           = dword ptr  14h
                push    ebp
                mov     ebp, esp
                mov     eax, [ebp+arg_0]
                mov     edx, [ebp+arg_4]
                cmp     edx, [ebp+arg_C]
                jnz     short loc_10226
                cmp     eax, [ebp+arg_8]
                setnb   al
                jmp     short loc_10229
; ---------------------------------------------------------------------------
loc_10226:                          ; CODE XREF: sgell(__int64,__int64)+Cj
                setnl   al
loc_10229:                          ; CODE XREF: sgell(__int64,__int64)+14j
                and     eax, 1
                pop     ebp
                retn
@sgell$qjj      endp

bool __cdecl sgell(__int64 a1, __int64 a2)
{
  return a1 >= a2;
}

Sample window procedure

The decompiler recognized a switch statement and nicely represented the window procedure. Without this little help the user would have to calculate the message numbers herself. Nothing particularly difficult, just time consuming and boring. What if she makes a mistake?...

; =============== S U B R O U T I N E =======================================
wndproc         proc near               ; DATA XREF: sub_4010E0+21o
Paint           = tagPAINTSTRUCT ptr -0A4h
Buffer          = byte ptr -64h
hWnd            = dword ptr  4
Msg             = dword ptr  8
wParam          = dword ptr  0Ch
lParam          = dword ptr  10h
                mov     ecx, hInstance
                sub     esp, 0A4h
                lea     eax, [esp+0A4h+Buffer]
                push    64h             ; nBufferMax
                push    eax             ; lpBuffer
                push    6Ah             ; uID
                push    ecx             ; hInstance
                call    ds:LoadStringA
                mov     ecx, [esp+0A4h+Msg]
                mov     eax, ecx
                sub     eax, 2
                jz      loc_4013E8
                sub     eax, 0Dh
                jz      loc_4013B2
                sub     eax, 102h
                jz      short loc_401336
                mov     edx, [esp+0A4h+lParam]
                mov     eax, [esp+0A4h+wParam]
                push    edx             ; lParam
                push    eax             ; wParam
                push    ecx             ; Msg
                mov     ecx, [esp+0B0h+hWnd]
                push    ecx             ; hWnd
                call    ds:DefWindowProcA
                add     esp, 0A4h
                retn    10h
; ---------------------------------------------------------------------------
loc_401336:                             ; CODE XREF: wndproc+3Cj
                mov     ecx, [esp+0A4h+wParam]
                mov     eax, ecx
                and     eax, 0FFFFh
                sub     eax, 68h
                jz      short loc_40138A
                dec     eax
                jz      short loc_401371
                mov     edx, [esp+0A4h+lParam]
                mov     eax, [esp+0A4h+hWnd]
                push    edx             ; lParam
                push    ecx             ; wParam
                push    111h            ; Msg
                push    eax             ; hWnd
                call    ds:DefWindowProcA
                add     esp, 0A4h
                retn    10h
; ---------------------------------------------------------------------------
loc_401371:                             ; CODE XREF: wndproc+7Aj
                mov     ecx, [esp+0A4h+hWnd]
                push    ecx             ; hWnd
                call    ds:DestroyWindow
                xor     eax, eax
                add     esp, 0A4h
                retn    10h
; ---------------------------------------------------------------------------
loc_40138A:                             ; CODE XREF: wndproc+77j
                mov     edx, [esp+0A4h+hWnd]
                mov     eax, hInstance
                push    0               ; dwInitParam
                push    offset DialogFunc ; lpDialogFunc
                push    edx             ; hWndParent
                push    67h             ; lpTemplateName
                push    eax             ; hInstance
                call    ds:DialogBoxParamA
                xor     eax, eax
                add     esp, 0A4h
                retn    10h
; ---------------------------------------------------------------------------
loc_4013B2:                             ; CODE XREF: wndproc+31j
                push    esi
                mov     esi, [esp+0A8h+hWnd]
                lea     ecx, [esp+0A8h+Paint]
                push    ecx             ; lpPaint
                push    esi             ; hWnd
                call    ds:BeginPaint
                push    eax             ; HDC
                push    esi             ; hWnd
                call    my_paint
                add     esp, 8
                lea     edx, [esp+0A8h+Paint]
                push    edx             ; lpPaint
                push    esi             ; hWnd
                call    ds:EndPaint
                pop     esi
                xor     eax, eax
                add     esp, 0A4h
                retn    10h
; ---------------------------------------------------------------------------
loc_4013E8:                             ; CODE XREF: wndproc+28j
                push    0               ; nExitCode
                call    ds:PostQuitMessage
                xor     eax, eax
                add     esp, 0A4h
                retn    10h
wndproc         endp


LRESULT __stdcall wndproc(HWND hWnd, UINT Msg, WPARAM wParam, LPARAM lParam)
{
  LRESULT result; // eax@4
  HWND h; // esi@10
  HDC dc; // eax@10
  CHAR Buffer; // [sp+40h] [bp-64h]@1
  struct tagPAINTSTRUCT Paint; // [sp+0h] [bp-A4h]@10
  LoadStringA(hInstance, 0x6Au, &Buffer, 100);
  switch ( Msg )
  {
    case 2u:
      PostQuitMessage(0);
      result = 0;
      break;
    case 15u:
      h = hWnd;
      dc = BeginPaint(hWnd, &Paint);
      my_paint(h, dc);
      EndPaint(h, &Paint);
      result = 0;
      break;
    case 273u:
      if ( (_WORD)wParam == 104 )
      {
        DialogBoxParamA(hInstance, (LPCSTR)0x67, hWnd, DialogFunc, 0);
        result = 0;
      }
      else
      {
        if ( (_WORD)wParam == 105 )
        {
          DestroyWindow(hWnd);
          result = 0;
        }
        else
        {
          result = DefWindowProcA(hWnd, 0x111u, wParam, lParam);
        }
      }
      break;
    default:
      result = DefWindowProcA(hWnd, Msg, wParam, lParam);
      break;
  }
  return result;
}

Short-circuit evaluation

This is an excerpt from a big function to illustrate short-circuit evaluation. Complex things happen in long functions and it is very handy to have the decompiler to represent things in a human way. Please note how the code that was scattered over the address space is concisely displayed in two if statements.

loc_804BCC7:                            ; CODE XREF: sub_804BB10+A42j
                mov     [esp+28h+var_24], offset aUnzip ; "unzip"
                xor     eax, eax
                test    esi, esi
                setnz   al
                mov     edx, 1
                mov     ds:dword_804FBAC, edx
                lea     eax, [eax+eax+1]
                mov     ds:dword_804F780, eax
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jz      loc_804C4F1
loc_804BCFF:                            ; CODE XREF: sub_804BB10+9F8j
                mov     eax, 2
                mov     ds:dword_804FBAC, eax
loc_804BD09:                            ; CODE XREF: sub_804BB10+9FEj
                mov     [esp+28h+var_24], offset aZ2cat ; "z2cat"
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jz      loc_804C495
loc_804BD26:                            ; CODE XREF: sub_804BB10+99Cj
                                        ; sub_804BB10+9B9j ...
                mov     eax, 2
                mov     ds:dword_804FBAC, eax
                xor     eax, eax
                test    esi, esi
                setnz   al
                inc     eax
                mov     ds:dword_804F780, eax
  .............................. SKIP ............................
loc_804C495:                            ; CODE XREF: sub_804BB10+210j
                mov     [esp+28h+var_24], offset aZ2cat_0 ; "Z2CAT"
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jnz     loc_804BD26
                mov     [esp+28h+var_24], offset aZcat ; "zcat"
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jnz     loc_804BD26
                mov     [esp+28h+var_24], offset aZcat_0 ; "ZCAT"
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jnz     loc_804BD26
                jmp     loc_804BD3D
; ---------------------------------------------------------------------------
loc_804C4F1:                            ; CODE XREF: sub_804BB10+1E9j
                mov     [esp+28h+var_24], offset aUnzip_0 ; "UNZIP"
                mov     eax, ds:dword_804FFD4
                mov     [esp+28h+var_28], eax
                call    _strstr
                test    eax, eax
                jnz     loc_804BCFF
                jmp     loc_804BD09

dword_804F780 = 2 * (v9 != 0) + 1;
  if ( strstr(dword_804FFD4, "unzip") || strstr(dword_804FFD4, "UNZIP") )
    dword_804FBAC = 2;
  if ( strstr(dword_804FFD4, "z2cat")
    || strstr(dword_804FFD4, "Z2CAT")
    || strstr(dword_804FFD4, "zcat")
    || strstr(dword_804FFD4, "ZCAT") )
  {
    dword_804FBAC = 2;
    dword_804F780 = (v9 != 0) + 1;
  }

Inlined string operations

The decompiler tries to recognize frequently inlined string functions such as strcmp, strchr, strlen, etc. In this code snippet, calls to the strlen function has been recognized.

 mov     eax, [esp+argc]
                sub     esp, 8
                push    ebx
                push    ebp
                push    esi
                lea     ecx, ds:0Ch[eax*4]
                push    edi
                push    ecx             ; unsigned int
                call    ??2@YAPAXI@Z    ; operator new(uint)
                mov     edx, [esp+1Ch+argv]
                mov     ebp, eax
                or      ecx, 0FFFFFFFFh
                xor     eax, eax
                mov     esi, [edx]
                add     esp, 4
                mov     edi, esi
                repne scasb
                not     ecx
                dec     ecx
                cmp     ecx, 4
                jl      short loc_401064
                cmp     byte ptr [ecx+esi-4], '.'
                jnz     short loc_401064
                mov     al, [ecx+esi-3]
                cmp     al, 'e'
                jz      short loc_401047
                cmp     al, 'E'
                jnz     short loc_401064
loc_401047:                             ; CODE XREF: _main+41j
                mov     al, [ecx+esi-2]
                cmp     al, 'x'
                jz      short loc_401053
                cmp     al, 'X'
                jnz     short loc_401064
loc_401053:                             ; CODE XREF: _main+4Dj
                mov     al, [ecx+esi-1]
                cmp     al, 'e'
                jz      short loc_40105F
                cmp     al, 'E'
                jnz     short loc_401064
loc_40105F:                             ; CODE XREF: _main+59j
                mov     byte ptr [ecx+esi-4], 0
loc_401064:                             ; CODE XREF: _main+32j _main+39j ... 
                mov     edi, esi
                or      ecx, 0FFFFFFFFh
                xor     eax, eax
                repne scasb
                not     ecx
                add     ecx, 3
                push    ecx             ; unsigned int
                call    ??2@YAPAXI@Z    ; operator new(uint)
                mov     edx, eax