Instruction Similarity

Using Evoasm's Evoasm::X64::CPUState class it's possible to analyze how an instruction modifies the CPU's registers.

The following examples finds for each instruction the 32 most similar instructions by running each on a random CPU state and comparing the results.

The result is visualized in this table.

begin
  require 'evoasm'
  require 'evoasm/x64'
  require 'evoasm/x64/cpu_state'
rescue LoadError
  $LOAD_PATH.unshift '../lib'
end

require 'yaml'

instruction_names = Evoasm::X64::instruction_names(:gp, :rflags, :xmm)
instructions = instruction_names.map do |name|
  Evoasm::X64.instruction name
end


buffer = Evoasm::Buffer.new 1024, :mmap

def run(buffer, instruction, parameters, cpu_state)
  #puts "Running #{instruction.name} #{%i(reg0 reg1 reg2 imm0 reg0_high_byte? reg1_high_byte?).map {|p| parameters[p]}.inspect}"
  buffer.reset
  Evoasm::X64.emit_stack_frame buffer do
    cpu_state.emit_load buffer
    instruction.encode parameters, buffer
    cpu_state.emit_store buffer
  end

  begin
    buffer.execute!
  rescue Evoasm::ExceptionError => e
  end
end

dists = Array.new(instructions.size)

COLORS = [[1, 0, 0], [1, 1, 0], [0, 1, 0], [0, 1, 1], [0, 0, 1]].freeze


def text_color(bg_color)
  luminance = 1 - (0.299 * bg_color[0] + 0.587 * bg_color[1] + 0.114 * bg_color[2]) / 255.0
  return 'black' if luminance < 0.5
  'white'
end


def heat_color(dist, min_dist, max_dist)
  return ["black", "white"] if dist.nil?

  normalized_dist = (dist - min_dist) / (max_dist - min_dist)

  distance = normalized_dist * (COLORS.size - 2)
  floor_distance = distance.to_i
  floor_color = COLORS[floor_distance]
  ceil_color = COLORS[floor_distance + 1]
  relative_distance = distance - floor_distance

  heat_color = Array.new(3) do |color_index|
    floor_color[color_index] + relative_distance * (ceil_color[color_index] - floor_color[color_index])
  end

  heat_color.map! do |color|
    (color * 255).to_i
  end

  text_color = text_color(heat_color)
  bg_color = "#%02x%02x%02x".%(heat_color)

  [bg_color, text_color]
end

MAX_ABSDIFF_DIST = 1000
MAX_HAMMING_DIST = 0.15
MAX_TRIES = 50

def select_best(dists)
  dists.sort_by {|hash| hash[:dist]}
    .take(64)
end

IMMS = [0, 1, -1, +2, -2, -8, 8, 0xFF, 0x0F, 0xF0, 0xFF00, 0xFF00FF00, 0b01010101, 0b10101010, 2**10, 2**15, 2**20, 2**30]
PARAMS = %i(reg0 reg1 reg2 reg3 imm0)

param_names_cache = []
imm_insts = []

instructions.each_with_index do |inst, inst_index|
  param_names = inst.parameters.map(&:name)
  param_names_cache[inst_index] = param_names
  imm_insts[inst_index] = param_names.include? :imm0
end

instructions.each_with_index do |inst, index|

  absdiff_dists = []
  hamming_dists = []

  instructions.each_with_index do |other_inst, other_index|

    n = 10

    if !imm_insts[index] && imm_insts[other_index]
      imms = IMMS
    else
      imms = [nil]
    end

    imms.each do |imm|

      mean_absdiff_dist = 0
      mean_hamming_dist = 0

      actual_n = 0
      tries = 0

      while actual_n < n && tries < MAX_TRIES
        parameters = Evoasm::X64::Parameters.random inst, other_inst

        tries += 1

        if parameters.nil?
          next
        end

        #p [parameters[:reg0], parameters[:reg1], parameters[:reg3]]

        parameters[:imm0] = imm if imm

        cpu_state = Evoasm::X64::CPUState.random
        other_cpu_state = cpu_state.clone

        begin
          run buffer, inst, parameters, cpu_state
          run buffer, other_inst, parameters, other_cpu_state

          absdiff_dist = cpu_state.distance(other_cpu_state, :absdiff)
          hamming_dist = cpu_state.distance(other_cpu_state, :hamming)

          if inst.name == other_inst.name
            raise "#{absdiff_dist}" unless absdiff_dist == 0
            raise "#{hamming_dist}" unless hamming_dist == 0
          end

          mean_absdiff_dist += absdiff_dist / n
          mean_hamming_dist += hamming_dist / n
          actual_n += 1
        rescue Evoasm::Error => e
          #puts [e.message, other_instruction.name]
        end
      end

      if actual_n > 0
        if mean_absdiff_dist < MAX_ABSDIFF_DIST || mean_hamming_dist < MAX_HAMMING_DIST

          additional_params = (param_names_cache[other_index] - param_names_cache[index]) & PARAMS

          if mean_absdiff_dist < MAX_ABSDIFF_DIST
            absdiff_dists << {inst: other_inst.name, dist: mean_absdiff_dist, imm: imm, params: additional_params}
          end

          if mean_hamming_dist < MAX_HAMMING_DIST
            hamming_dists << {inst: other_inst.name, dist: mean_hamming_dist, imm: imm, params: additional_params}
          end

        end
      end
    end
  end

  dists[index] = {
    absdiff: select_best(absdiff_dists),
    hamming: select_best(hamming_dists),
  }

  puts "#{(index / instructions.size.to_f * 100).to_i}%"
  puts "#{index} / #{instructions.size}"
  puts
end


html = "<html>"
html << "<table>"
instructions.each_with_index do |instruction, index|

  dists[index].each_with_index do |(_, inst_dists), index|
    html << "<tr>"
    html << %(<th scope="row" rowspan="2">#{instruction.name}</th>) if index == 0

    max_dist = index.zero? ? MAX_ABSDIFF_DIST : MAX_HAMMING_DIST

    inst_dists.each do |hash|
      dist = hash[:dist]
      imm = hash[:imm]
      other_inst_name = hash[:inst]
      bg_color, text_color = heat_color dist, 0, max_dist
      html << %(<td style="background-color: #{bg_color}; color: #{text_color}">#{other_inst_name}/#{dist.round 3} (#{imm})</td>)
    end
    html << "</tr>"
  end


end

html << "</table>"
html << "</html>"

File.write File.join(__dir__, 'inst_dist.yml'), YAML.dump(instruction_names.zip(dists).to_h)
File.write File.join(__dir__, 'inst_dist.html'), html