#!/usr/bin/env ruby
# frozen_string_literal: true

# This helps benchmark current performance of Dalli
# as well as compare performance of optimized and non-optimized calls like multi-set vs set
#
# run with:
# bundle exec bin/benchmark
# RUBY_YJIT_ENABLE=1 BENCH_TARGET=get bundle exec bin/benchmark
require 'bundler/inline'

gemfile do
  source 'https://rubygems.org'
  gem 'benchmark-ips'
  gem 'logger'
end

require 'json'
require 'benchmark/ips'
require 'monitor'
require_relative '../lib/dalli'

##
# NoopSerializer is a serializer that avoids the overhead of Marshal or JSON.
##
class NoopSerializer
  def self.dump(value)
    value
  end

  def self.load(value)
    value
  end
end

dalli_url = ENV['BENCH_CACHE_URL'] || '127.0.0.1:11211'
bench_target = ENV['BENCH_TARGET'] || 'all'
bench_time = (ENV['BENCH_TIME'] || 10).to_i
bench_warmup = (ENV['BENCH_WARMUP'] || 3).to_i
bench_payload_size = (ENV['BENCH_PAYLOAD_SIZE'] || 700_000).to_i
payload = 'B' * bench_payload_size
TERMINATOR = "\r\n"
puts "yjit: #{RubyVM::YJIT.enabled?}"

client = Dalli::Client.new(dalli_url, serializer: NoopSerializer, compress: false, raw: true)
meta_client = Dalli::Client.new(dalli_url, protocol: :meta, serializer: NoopSerializer, compress: false, raw: true)
multi_client = Dalli::Client.new('localhost:11211,localhost:11222', serializer: NoopSerializer, compress: false,
                                                                    raw: true)

# The raw socket implementation is used to benchmark the performance of dalli & the overhead of the various abstractions
# in the library.
sock = TCPSocket.new('127.0.0.1', '11211', connect_timeout: 1)
sock.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, true)
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true)
# Benchmarks didn't see any performance gains from increasing the SO_RCVBUF buffer size
# sock.setsockopt(Socket::SOL_SOCKET, ::Socket::SO_RCVBUF, 1024 * 1024 * 8)
# Benchmarks did see an improvement in performance when increasing the SO_SNDBUF buffer size
# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 1024 * 1024 * 8)

# ensure the clients are all connected and working
client.set('key', payload)
meta_client.set('meta_key', payload)
multi_client.set('multi_key', payload)
sock.write("set sock_key 0 3600 #{payload.bytesize}\r\n")
sock.write(payload)
sock.write(TERMINATOR)
sock.flush
sock.readline # clear the buffer

raise 'dalli client mismatch' if payload != client.get('key')

raise 'multi dalli client mismatch' if payload != multi_client.get('multi_key')

sock.write("mg sock_key v\r\n")
sock.readline
sock_value = sock.read(payload.bytesize)
sock.read(TERMINATOR.bytesize)
raise 'sock mismatch' if payload != sock_value

# ensure we have basic data for the benchmarks and get calls
payload_smaller = 'B' * (bench_payload_size / 10)
pairs = {}
100.times do |i|
  pairs["multi_#{i}"] = payload_smaller
end
client.quiet do
  pairs.each do |key, value|
    client.set(key, value, 3600, raw: true)
  end
end

###
# GC Suite
# benchmark without GC skewing things
###
class GCSuite
  def warming(*)
    run_gc
  end

  def running(*)
    run_gc
  end

  def warmup_stats(*)
    GC.enable
  end

  def add_report(*)
    GC.enable
  end

  private

  def run_gc
    GC.enable
    GC.start
    GC.disable
  end
end
suite = GCSuite.new

# rubocop:disable Metrics/MethodLength
# rubocop:disable Metrics/PerceivedComplexity
# rubocop:disable Metrics/AbcSize
# rubocop:disable Metrics/CyclomaticComplexity
def sock_get_multi(sock, pairs)
  count = pairs.length
  pairs.each_key do |key|
    count -= 1
    tail = count.zero? ? '' : 'q'
    sock.write("mg #{key} v f k #{tail}\r\n")
  end
  sock.flush
  # read all the memcached responses back and build a hash of key value pairs
  results = {}
  last_result = false
  while (line = sock.readline.chomp!(TERMINATOR)) != ''
    last_result = true if line.start_with?('EN ')
    next unless line.start_with?('VA ') || last_result

    _, value_length, _flags, key = line.split
    results[key[1..]] = sock.read(value_length.to_i)
    sock.read(TERMINATOR.length)
    break if results.size == pairs.size
    break if last_result
  end
  results
end
# rubocop:enable Metrics/MethodLength
# rubocop:enable Metrics/PerceivedComplexity
# rubocop:enable Metrics/AbcSize
# rubocop:enable Metrics/CyclomaticComplexity

if %w[all set].include?(bench_target)
  Benchmark.ips do |x|
    x.config(warmup: bench_warmup, time: bench_time, suite: suite)
    x.report('client set') { client.set('key', payload) }
    x.report('meta client set') { meta_client.set('meta_key', payload) }
    # x.report('multi client set') { multi_client.set('string_key', payload) }
    x.report('raw sock set') do
      sock.write("ms sock_key #{payload.bytesize} T3600 MS\r\n")
      sock.write(payload)
      sock.write("\r\n")
      sock.flush
      sock.readline # clear the buffer
    end
    x.compare!
  end
end

@lock = Monitor.new
if %w[all get].include?(bench_target)
  Benchmark.ips do |x|
    x.config(warmup: bench_warmup, time: bench_time, suite: suite)
    x.report('get dalli') do
      result = client.get('key')
      raise 'mismatch' unless result == payload
    end
    x.report('get meta client') do
      result = meta_client.get('meta_key')
      raise 'mismatch' unless result == payload
    end
    # NOTE: while this is the fastest it is not thread safe and is blocking vs IO sharing friendly
    x.report('get sock') do
      sock.write("mg sock_key v\r\n")
      sock.readline
      result = sock.read(payload.bytesize)
      sock.read(TERMINATOR.bytesize)
      raise 'mismatch' unless result == payload
    end
    # NOTE: This shows that when adding thread safety & non-blocking IO we are slower for single process/thread use case
    x.report('get sock non-blocking') do
      @lock.synchronize do
        sock.write("mg sock_key v\r\n")
        sock.readline
        count = payload.bytesize
        value = String.new(capacity: count + 1)
        loop do
          begin
            value << sock.read_nonblock(count - value.bytesize)
          rescue Errno::EAGAIN
            sock.wait_readable
            retry
          rescue EOFError
            puts 'EOFError'
            break
          end
          break if value.bytesize == count
        end
        sock.read(TERMINATOR.bytesize)
        raise 'mismatch' unless value == payload
      end
    end
    x.compare!
  end
end

if %w[all get_multi].include?(bench_target)
  Benchmark.ips do |x|
    x.config(warmup: bench_warmup, time: bench_time, suite: suite)
    x.report('get 100 keys') do
      result = client.get_multi(pairs.keys)
      raise 'mismatch' unless result == pairs
    end
    x.report('get 100 keys meta client') do
      result = meta_client.get_multi(pairs.keys)
      raise 'mismatch' unless result == pairs
    end
    x.report('get 100 keys raw sock') do
      result = sock_get_multi(sock, pairs)
      raise 'mismatch' unless result == pairs
    end
    x.compare!
  end
end

if %w[all set_multi].include?(bench_target)
  Benchmark.ips do |x|
    x.config(warmup: bench_warmup, time: bench_time, suite: suite)
    x.report('write 100 keys simple') do
      client.quiet do
        pairs.each do |key, value|
          client.set(key, value, 3600, raw: true)
        end
      end
    end
    x.report('write 100 keys meta client') do
      meta_client.quiet do
        pairs.each do |key, value|
          meta_client.set(key, value, 3600, raw: true)
        end
      end
    end
    # TODO: uncomment this once we add PR adding set_multi
    # x.report('multi client set_multi 100') do
    #   multi_client.set_multi(pairs, 3600, raw: true)
    # end
    x.report('write 100 keys rawsock') do
      count = pairs.length
      tail = ''
      value_bytesize = payload_smaller.bytesize
      ttl = 3600

      pairs.each do |key, value|
        count -= 1
        tail = count.zero? ? '' : 'q'
        sock.write(String.new("ms #{key} #{value_bytesize} c F0 T#{ttl} MS #{tail}\r\n",
                              capacity: key.size + value_bytesize + 40) << value << TERMINATOR)
      end
      sock.flush
      sock.gets(TERMINATOR) # clear the buffer
    end
    # x.report('write_mutli 100 keys') { client.set_multi(pairs, 3600, raw: true) }
    x.compare!
  end
end
