#!/usr/bin/env ruby
# coding: utf-8
require 'open-uri'
require 'nokogiri'
require 'logger'
require 'yaml'

SksStatusDir = '/home/sks/sks-status.gwolf.org/'
Log = Logger.new(File.join(SksStatusDir, 'running.log'), 'weekly', 4)
Log.level = Logger::INFO
M = Mutex.new

StartFrom = [ 'pgpkeys.eu 11370',
              'sks,pgpkeys.eu 11370',
              'pgp.surf.nl 11370',
              'pgp.pm 11370'
              # pool.sks-keyservers.net no longer resolves as of 2021.06.21.
            ]
StatsPage = 'pks/lookup?op=stats'
Servers = {}
Status = {}
# We do not verify mailsync peers. We recognize them by having an
# '@' in their definitition. We also skip some well-known phrases
# that mean nothing for the analysis.
ServerExclude =  [ '@',
                   'Outgoing Mailsync Peers',
                   'Gossip peers on the left automatically redacted']

GraphvizBin = '/home/sks/graphviz/bin/neato'

n = Time.now
OutDir = File.join(SksStatusDir, '%04d%02d%02d-%02d%02d%02d' % [n.year, n.month, n.day, n.hour, n.min, n.sec])
Dir.mkdir(OutDir)
Dot = File.open(File.join(OutDir, 'walk-sks.dot'), 'w+')
GreenDot = File.open(File.join(OutDir, 'walk-sks.green.dot'), 'w')

def graph(what)
  # Needs a mutex, as is called by concurrent threads in walk_from()
  M.lock
  Dot.puts what
  M.unlock
end

def green_graph(what)
  # This is generated by the already-static Dot graph, so no mutex is needed
  GreenDot.puts what
end

def greenify(data)
  # The 'data' parameter is an array of lines from the full Dot graph
  nodes = {}
  res = []

  # First pass: We look for all of the green nodes, ignoring nodes of
  # any other color. We ignore all connections as well.
  data.each do |lin|
    next unless lin =~ /^\s+"([^"]+)".+\Wcolor=green/;
    nodes[$1] = 1
    res << lin
  end

  # Second pass: All of the connections between green nodes
  data.each do |lin|
    next unless lin =~ /^\s+"([^"]+)" -> "([^"]+)";$/
    from = $1
    to = $2
    next unless nodes[from] and nodes[to]
    res << lin
  end

  return res
end

def filter_peers(peers)
  # Do not consider peers on the ServerExclude list
  return peers.reject do |peer|
    ServerExclude.map { |excl|  peer.include?(excl) }.reduce {|a,b| a or b}
  end
end

def walk_from(server)
  raise RuntimeError, 'wrong data format' unless server.is_a? String
  return nil if Servers.has_key?(server) # Already visited
  
  Log.info("%3d visited; walking from %s" % [Servers.keys.size, server])
  threads = []

  color='green'
  fontcolor='black'
  status=''

  # SKS reports as 'server port', Hockeypuck as 'server:port' -- and
  # some Hockeypuck operators alter their output to match the SKS
  # style.
  #
  # And we also have to cater for IPv6 addresses... :-P
  if server =~ /^(.+)[\s:]+(\d+)$/
    host, port = $1, $2
  else
    Log.warn('Could not parse server line: «%s». Ignoring.' % server)
    Servers.delete(server)
    return nil
  end

  # What we have here is the "Recon port" (defaults to 11370), not
  # necessarily the HTTP port. It _usually_ is made available at port
  # 80, but some servers will only answer at a different one (defaults
  # to 11371). Better way to solve this than a double knock? ☹
  if port == '11370'
    begin
      open('http://%s:%s/' % [host, 11371])
      port = 11371
    rescue
      port = 80
    end
  else
    port = 80
  end
  uri = 'http://%s:%s/%s' % [host, port, StatsPage]

  Servers[server] = []
  begin
    Log.info('Opening server at %s' % uri)
    stats = Nokogiri(URI.open(uri))

    peers = stats.search('table').select { |t|
      t.inner_text =~ /Gossip Peers/
    }.first.search('td').map {|td| td.inner_text}
  rescue Errno::ENETUNREACH, Errno::ECONNREFUSED
    color, status, Thread.current[:output] = 'yellow', 'UNREACH/REFUSED', 'R'
  rescue Net::OpenTimeout
    color, status, Thread.current[:output] = 'red', 'Timeout', 'T'
  rescue SocketError,OpenURI::HTTPError
    color, status, Thread.current[:output] = 'orange', 'Sock/HTTPErr', 'S'
  rescue NoMethodError
    # Table with peers Thread.current[:output] = s nil (does not serve ?op=stats perhaps?)
    color, status, Thread.current[:output] = 'blue', 'Nil', 'N'
  rescue Exception => e
    color, fontcolor, status, Thread.current[:output] = 'black', 'white', e.class, '?'
  else
    graph '        "%s" [label="%s"];' % [server,host]
    Thread.current[:output] =  '.'
    Servers[server] = filter_peers(peers)
  end
  Status[Thread.current[:output]] ||= []
  Status[Thread.current[:output]] << server
  graph '        "%s" [color=%s, fontcolor=%s, label="%s\\n%s"];' % [server, color, fontcolor, host, status]

  peers ||= []
  peers.each do |peer|
    graph '        "%s" -> "%s";' % [server, peer]
    threads << Thread.new {walk_from(peer)}
  end

  thr_status = {}
  threads.each do |t|
    t.join
    out = t[:output]
    thr_status[out] ||= 0
    thr_status[out] += 1
  end
  my_threads = thr_status.map {|k,v| "#{k}: #{v}"}.join(' - ')
  Log.info("Thread for #{server} joined (#{thr_status.size}): #{my_threads}")
end

Log.warn('Starting SKS network probe and analysis')
['strict digraph G {',
 '        edge[len=5];',
 '        node[style=filled];'
].each { |lin|
  graph lin
  green_graph lin
}

t = StartFrom.map {|src| Thread.new {walk_from(src)} }
t.each {|t| t.join } # wait for all threads to finish
graph '}'

# Might seem backwards to generate the green graph from this
# data... but it was actually easier ;-) Sorry to anybody who wants to
# follow my mental processes.
Dot.seek(0)
green_graph greenify(Dot.readlines)
green_graph '}'

Dot.close
GreenDot.close

Log.warn('Analysis done. Finishing outputs and rendering.')

File.open(File.join(OutDir, 'walk-sks.yaml'), 'w') {|f| f.puts [Servers, Status].to_yaml }
system(GraphvizBin, '-Tsvg', '-O', Dot.path)
system(GraphvizBin, '-Tsvg', '-O', GreenDot.path)
Log.warn('Done!')
