Sunday, 27 July 2014

Self-Organizing Map in Scala

My Scala code of the day relates to Self-Organising Maps. The end result can be seen on this short video. Let's start with some basic definition
case class Coord(x: Int, y: Int)
case class Weight(r: Double, g: Double, b: Double) // Represents the color vector
case class Node(coord: Coord, weight: Weight) // The node in the lattice
Some utility class, wrapped in a Scala object. As you can see, Euclidean distance is used for weight proximity (r, g, b) and nodes (x, y)
object SOMUtils {
  private val rnd = new java.security.SecureRandom()
  def rndWeight = Weight(rnd.nextDouble(), rnd.nextDouble(), rnd.nextDouble())
  def squa(d: Double) = d*d
  def euclDist(w1: Weight, w2: Weight) = sqrt(squa(w1.r-w2.r)+squa(w1.g-w2.g)+squa(w1.b-w2.b)) // Euclidian distance
  def euclDist(n1: Node, n2: Node) = sqrt(squa(n1.coord.x-n2.coord.x)+squa(n1.coord.y-n2.coord.y)) // Euclidian distance
  def rndElem[T](list: List[T]):T = list(rnd.nextInt(list.size))
}
Follows the definition of the lattice
import SOMUtils._

class Lattice(val size: Int, val nodes: List[Node])
object Lattice {
  def apply(size: Int) = new Lattice(size, (for(x<-0 until size; y<-0 until size) yield Node(Coord(x, y), rndWeight)).toList)
}
And finally the core of the algorithm
class SOM(val size: Int, val numIterations: Int, val trainingSet: List[Weight]) {
  val mapRadius = size / 2.0
  val timeConstant = numIterations / log(mapRadius)
  def neighbourhoodRadius(iter: Double) = mapRadius * exp(-iter/timeConstant)
  def bmu(input: Weight, lattice: Lattice): Node = {
    val sortedNodesByDist = lattice.nodes.sortBy(n => euclDist(input, n.weight))
    sortedNodesByDist(0)
  }
  def bmuNeighbours(radius: Double, bmu: Node, lattice: Lattice): (List[(Node, Double)], List[(Node, Double)]) =
    lattice.nodes.map(n => (n, euclDist(n, bmu))).partition(n => n._2 <= radius)
  def learningRate(iter: Double) = 0.072 * exp(-iter/numIterations) // decays over time
  def theta(d2bmu: Double, radius: Double) = exp(-squa(d2bmu)/(2.0*squa(radius))) // learning proportional to distance
  def adjust(input: Weight, weight: Weight, learningRate: Double, theta: Double): Weight = {
    def adjust(iW: Double, nW: Double) = nW + learningRate * theta * (iW - nW)
    Weight(adjust(input.r, weight.r), adjust(input.g, weight.g), adjust(input.b, weight.b))
  }

  def nextLattice(iter: Int, lattice: Lattice): Lattice = {
    val randomInput = rndElem(trainingSet)
    val bmuNode = bmu(randomInput, lattice)
    val radius = neighbourhoodRadius(iter)
    val allNodes = bmuNeighbours(radius, bmuNode, lattice)
    val lrate = learningRate(iter)
    val adjustedNodes = allNodes._1.par.map(t => {
      val tTheta = theta(t._2, radius)
      val nWeight = adjust(randomInput, t._1.weight, lrate, tTheta)
      Node(t._1.coord, nWeight)
    }).toList
    new Lattice(lattice.size, adjustedNodes ++ allNodes._2.map(t => t._1))
  }

  def compute {
    @tailrec
    def helper(iter: Int, lattice: Lattice): Lattice =
      if (iter >= numIterations) lattice else helper(iter+1, nextLattice(iter, lattice))

    val endLattice = helper(0, Lattice(size))
    UIUtils.persist(endLattice, "lattice")
  }
}
That's it. Hope you enjoyed that. Check it out and learn more. Next step: to implement the 2-D grid version - still using Scala and JavaFX, develop a generic SOM-Scala lib, and finally, re-implement the 2-D version using Three.js

Wednesday, 23 July 2014

Scala Mandel

package jts.mandel

import scala.annotation.tailrec

object Mandel {
  type Pixel = (Int, Int)
  val maxIter = 255
  val size = 1024
  val palette = {
    val rnd = new java.security.SecureRandom
    ((for(n <- 0 until maxIter) yield rnd.nextInt()).toList) ++ List(0)
  }
  
  def mandel():Map[Pixel, Int] = {
    @tailrec
    def compute(p: Pixel, x: Double = 0, y: Double = 0, iter: Int = 0): Int = {
      def x2mx(lx: Int, w: Int) = -2.5 + (lx * 3.5) / w
      def y2my(ly: Int, h: Int) = 1.0 - ly * 2.0 / h
      val xmy = (x2mx(p._1, size), y2my(p._2, size))
    
      if (x*x+y*y > 4 || iter >= maxIter) 
        iter
      else
        compute(p, x*x-y*y+xmy._1, 2*x*y+xmy._2, iter+1)
    }
    
    val area = (for(x <- 0 until size; y <- 0 until size) yield (x, y))
    area.par.map(e => (e, compute(e))).toList.toMap
  }
  
  def persistToFile(pixels: Map[Pixel, Int]) {
    import java.awt.image.{BufferedImage => BI}
    val im = new BI(size, size, BI.TYPE_INT_RGB)
    pixels.par.foreach(e => {im.setRGB(e._1._1, e._1._2, palette(e._2))})
    javax.imageio.ImageIO.write(im, "jpg", new java.io.File("C:/EclipseWS/ScalaInvestigations/mandel.jpg"))
  }
  
  def main(args: Array[String]):Unit = {
    val t0 = System.currentTimeMillis()
    persistToFile(mandel())
    val tf = (System.currentTimeMillis() - t0)
    println(s"Done in $tf millis")
  }
}

Friday, 4 July 2014

Palindromes in Scala... there is always a better way

What does one do while having dessert? Write some Scala code to find out the longest palindrome in a string. Some piece of code that makes you humble... :-) I came up with this (based on a double array in O(N^2))
  def longestPalindrome(in: String): Option[String] = {
    val inLength = in.length
    val arr = Array.ofDim[Boolean](inLength, inLength)
    for(i <- 0 until in.length) {
      arr(i)(i) = true
      if (i < inLength - 1 && in(i) == in(i+1)) arr(i)(i+1) = true
    }
    
    def helper(k: Int, i: Int, pal: Option[String]): Option[String] = {
      val j = i + k - 1
      if (in(i) == in(j)) {
         arr(i)(j) = arr(i+1)(j-1)
         if (arr(i)(j)) {
           val npal = in.substring(i, j+1)
           pal match {
             case None => Some(npal)
             case Some(s: String) => if (npal.size > s.size) Some(npal) else pal
           }
         } else pal
      } else {
        arr(i)(j) = false
        pal
      }
    }
    
    val allPals = for(k <- 3 to inLength; i <- 0 until inLength - k) yield helper(k, i, None)
    val validPals = allPals.filter(o => o.isDefined).map(o => o.get)
    if (validPals.isEmpty)
      None
    else
      Some(validPals.maxBy(s => s.size))
  }                                               //> longestPalindrome: (in: String)Option[String]
  
  val str = "sdfsdfbbbgggbbggthierryjjyrreihtjdfdscdcdvdv"
                                                  //> str  : String = sdfsdfbbbgggbbggthierryjjyrreihtjdfdscdcdvdv
  longestPalindrome(str)                          //> res0: Option[String] = Some(thierryjjyrreiht)
  
But then I found out on StackOverFlow, a much better way of doing this..... I was LOL when I saw this piece of code:
  (for{i <- 2 to str.size; s <- str.sliding(i) if (s == s.reverse)} yield s).maxBy(s => s.size)
                                                  //> res1: String = thierryjjyrreiht
;-) There is always a better way

Thursday, 1 May 2014

Parsing XML data without vars

Another code snippet - was trying to parse a XML document in Scala without vars... Here is the XML:
<apm>
  <entry>
    <service>GOOGLE</service>
    <id>G-ID</id>
    <pwd>G-PWD</pwd>
    <url>www.google.com</url>
  </entry>
  
  <entry>
    <service>AMAZON</service>
    <id>A-ID</id>
    <pwd>A-PWD</pwd>
    <url>www.amazon.com</url>
  </entry>
</apm>
In the scala code that uses pattern matching and recursion.
package scala.fun

import scala.io.Source
import scala.xml.pull.XMLEventReader
import scala.xml.pull.XMLEvent
import scala.xml.pull.EvElemStart
import scala.xml.pull.EvText
import scala.xml.pull.EvElemEnd
case class Label(val lbl: String)

case class Service(val svr: String)
case class Id(val id: String)
case class Pwd(pwd: String)
case class URL(url: String)

case class Entry(val svr: Service, val id: Id, val pwd: Pwd, val url: URL)

object XmlP {
  def parse(xmlf: String): List[Entry] = {
    def ip(xmlr: XMLEventReader, xmle: XMLEvent, start: Boolean = true, label: Label = Label(""),
           service: Service = Service(""), id: Id = Id(""), pwd: Pwd = Pwd(""), url: URL = URL(""),
           entries: List[Entry] = List()): List[Entry] = {
      xmle match {
        case EvElemStart(pre, lbl, attrs, scope) => ip(xmlr, xmlr.next, true, Label(lbl), service, id, pwd, url, entries)
        case EvText(txt) =>
          if (start)
            label.lbl match {
              case "service" => ip(xmlr, xmlr.next, true, label, Service(txt.trim), id, pwd, url, entries)
              case "id" => ip(xmlr, xmlr.next, true, label, service, Id(txt.trim), pwd, url, entries)
              case "pwd" => ip(xmlr, xmlr.next, true, label, service, id, Pwd(txt.trim), url, entries)
              case "url" => ip(xmlr, xmlr.next, true, label, service, id, pwd, URL(txt.trim), entries)
              case _ => ip(xmlr, xmlr.next, false, label, service, id, pwd, url, entries)
            }
          else ip(xmlr, xmlr.next, false, label, service, id, pwd, url, entries)
        case EvElemEnd(pre, lbl) => {
          val newEntries = if ("entry".equals(lbl)) Entry(service, id, pwd, url) +: entries else entries
          if (xmlr.hasNext) ip(xmlr, xmlr.next, false, label, service, id, pwd, url, newEntries) else entries
        }
        case _ => if (xmlr.hasNext) ip(xmlr, xmlr.next, false, label, service, id, pwd, url, entries) else entries
      } // xmle match
    } //ip
    
    val xml = new XMLEventReader(Source.fromFile(xmlf))
    ip(xml, xml.next)
  }
  
  val result = parse("apm.xml")
  println(s"result: $result")

  def main(args: Array[String]): Unit = {}
}

Wednesday, 23 April 2014

A neat Scala code snippet for depth aggregation

case class Price(val value: Double) extends AnyVal
case class Qty(val value: Int) extends AnyVal
case class Depth(price: Price, qty: Qty)

object Agg {
  def aggregate(in: List[Depth]) = in.foldLeft(List[Depth]())((l, c) => {
    if (l.isEmpty) List(c)
    else if (l.head.price == c.price) l updated (0, Depth(l.head.price, Qty(l.head.qty.value+c.qty.value)))
    else c +: l
  }).reverse
}

Thursday, 27 February 2014

GAs speed x10

For the astute observer, you will notice that in my previous post, there is a slight performance problem if the evaluation function is expensive. This line of code:
val sortedChromos = population.sortBy(w => evaluate(decode(w), target))
Actually, it should not be even coded like this, the evaluation should happen once, and then we should sort. That's one thing. The other thing, is that we could simply parallelise this... First time I have actually found a good usage for .par ;-) With this in mind, here is a new implementation of the above:
def parSort(population: Population): Population = {
  val parPopulation = population.par
  val evaledParPopulation = parPopulation.map(e => (e, evaluate(decode(e), target))).toList
  val sortedParPopulation = evaledParPopulation.sortBy(e => e._2).map(e => e._1)
  sortedParPopulation
}

val sortedChromos = parSort(population)
The third problem with a maximum of 2,000 iterations with the first sort/eval takes 51 seconds, with the par/sort/eval, 5 seconds :-)

Sunday, 23 February 2014

GAs in Scala

For tonight, what about a small, generic algorithm Scala lib? Let's start with some basic definition:
case class NbChromos(val nb: Int) extends AnyVal
case class NbGenes(val nb: Int) extends AnyVal
Then the core lib, an abstract class that will be implemented for specific solutions. T is the type of the underlying gene, U, the type of the target (solution) You can see the implementations for the methods cross, mutates for a gene, and the whole chromosome. The 3 methods that must be implemented for a specific problem are (1) the basic gene mutation ~~(), (2) the decoder to decode the chromosome def decoder and (3) the evaluator function def evaluator.
abstract class GA[T, U] {
  import scala.language.implicitConversions
  implicit def nbgenes2int(obj: NbGenes): Int = obj.nb
  implicit def nbchromos2int(obj: NbChromos): Int = obj.nb
  
  protected val rnd = new scala.util.Random
  protected def nextInt(upper: Int) = rnd.nextInt(upper)
  
  type Gene = T
  type Chromo = Seq[Gene]
  type Population = Seq[Chromo]
  
  def ~~(): Gene // Generates a random gene
  def ~~(gene: Gene): Gene = ~~() // Mutates a given gene
  def xx(c1: Chromo, c2: Chromo): (Chromo, Chromo) = { // Crosses two chromosomes
    require(c1.length == c2.length, s"Crossing chromos of different lengths ${c1.length} != ${c2.length}")
    val idx = nextInt(c1.length)
    val rmi = c1.length - idx
    (c1.take(idx) ++ c2.takeRight(rmi), c2.take(idx) ++ c1.takeRight(rmi))
  }
  def ~~(c: Chromo): Chromo = { // Mutates a chromosome
    val idx = nextInt(c.length)
    val mc = ~~(c(idx))
    c.take(idx) ++ List(mc) ++ c.takeRight(c.length - 1 - idx)
  }
  def ~#(nbGenes: NbGenes): Chromo = // Generates a random chromosome 
    (for(i <- 0 until nbGenes) yield ~~())
  def ~#(nbChromos: NbChromos, nbGenes: NbGenes): Population = { // Generates a random population
    val nbc = if (nbChromos%2 == 0) nbChromos.nb else 1 + nbChromos.nb // Makes sure we have an even number
    for(i <- 0 until nbc) yield ~#(nbGenes)
  }
  
  // The algorithm uses a 'decoder' (to decoded a chromo into the target),
  // an 'evaluator' to evaluate a solution towards its target
  // and a generic 'solve' method
  def decoder(chromo: Chromo): U
  def evaluator(solution: U, target: Option[U]): Double // The higher the number the worst, 0 is the best
  
  def solve(decode: Chromo => U)(evaluate: (U, Option[U]) => Double)
    (nbChromos: NbChromos, nbGenes: NbGenes, target: Option[U], crossOverRate:Double = 0.7, mutationRate:Double = 0.2, maxIterations:Double = 10000): (Chromo, U, Double) = {
    val nbBests = 2
    val initialPopulation = ~#(nbChromos, nbGenes)
    
    def ~!#(in: Population, out: Population = Seq()): Population = {
      val rndDouble = rnd.nextDouble
      
      if (in.isEmpty) out else {
        val c1h = in.head
        val rcs = in.tail
        val c2h = rcs.head
        if (rndDouble < mutationRate) { // Let's mutate
          val mc1 = ~~(c1h)
          val mc2 = ~~(c2h)
          ~!#(rcs.tail, List(mc1, mc2) ++ out)
        } else if (rndDouble < crossOverRate) {
          val cxs = xx(c1h, c2h)
          ~!#(rcs.tail, List(cxs._1, cxs._2) ++ out)
        } else
          ~!#(rcs.tail, List(c1h, c2h) ++ out)
      }
    } // crosses, mutates, or copies the population into a new population
    
    def solve(population: Population, iter: Int = 0): Chromo = {
      val sortedChromos = population.sortBy(w => evaluate(decode(w), target))
      
      if (iter > maxIterations) {
        sortedChromos(0)
      } else {
        val bests = sortedChromos.slice(0, nbBests)
        val best = bests(0)
        val bestDecoded = decode(best)
        val evaled = evaluator(bestDecoded, target)
        if (iter%1000 == 0) println(s"Current iter $iter, found $bestDecoded")
        if (evaled == 0) {
          println(s"Found a solution in $iter iterations")
          best
        } else {
          val shuffledChromos = rnd.shuffle(sortedChromos.take(sortedChromos.length-nbBests))
          val nextChromos = ~!#(shuffledChromos)
          val newPopulation = bests ++ nextChromos
          solve(newPopulation, iter+1)
        }
      }
    } // solves the problem
    
    val best = solve(initialPopulation)
    val bestDecoded = decode(best)
    val bestScore = evaluate(bestDecoded, target)
    (best, bestDecoded, bestScore)
  } // def solve
}
That is it for the "core" lib. Let's try this on 3 problems: a word finder, a formula finder and a circle fitter (like the one at the bottom of this page) Problem one: Word finder Given a random set of characters, find a target string.
package org.jts.ga

class PhraseGA extends GA[Char, String] {
  private val from = 'a'; val to = 'z'
  private def rndChar: Char = (rnd.nextInt(to-from+1)+from).toChar
  
  override def ~~(): Gene = rndChar
  override def decoder(chromo: Chromo) = chromo.map(g => g.toString).mkString
  override def evaluator(sol: String, tgt: Option[String]): Double =
    if (sol.length() != tgt.get.length()) Double.MaxValue else
      (for(i <- 0 until sol.length) yield if (sol(i).equals(tgt.get(i))) 0 else 100).sum
}

object WordFinder {
  def main(args: Array[String]): Unit = {
    val sga = new PhraseGA
    val target = "abcdefghijklmnopqrstuvwxyz"
    val targetLength = target.length
    val result = sga.solve(sga.decoder)(sga.evaluator)(NbChromos(100), NbGenes(targetLength), Some(target))
    println(s"result: $result")
  }
}
Problem two: Formula finder Given a sets of digits and operands, find a formula that results into a given number.
package org.jts.ga

import org.mvel2.MVEL

class FormulaFinder extends GA[Char, String] {
  private val digits = List('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
  private val operands = List('+', '-', '/', '*')
  private val domain = digits ++ operands
  private def rndElem = domain(rnd.nextInt(domain.length))
  
  override def ~~(): Gene = rndElem
  override def decoder(chromo: Chromo) = chromo.map(g => g.toString()).mkString
  override def evaluator(sol: String, tgt: Option[String]): Double = {
    try {
      val eval = MVEL.eval(sol+"+0.0")
      val tgtSol = eval.asInstanceOf[Double]
      val tgtAsD = tgt.get.toDouble
      Math.abs(tgtSol-tgtAsD)
    } catch {
      case t: Throwable => Double.MaxValue
    }
  }
}

object FormulaFinder {
  def main(args: Array[String]): Unit = {
    val sga = new FormulaFinder
    val target = "123456"
    val result = sga.solve(sga.decoder)(sga.evaluator)(NbChromos(100), NbGenes(9), Some(target))
    println(s"result: $result")
  }
}
Problem three: Circle Fitter Fits the bigger circle possible in an area full of circles. This problem is the one at the bottom of this page.
package org.jts.ga

case class Circle(val x: Int, val y: Int, val radius: Int) {
  val dxy = (x - radius, y - radius)
  val dwh = (radius * 2, radius * 2)
  val surface = Math.PI * radius.toDouble * radius.toDouble
  
  def dist(other: Circle) = Math.sqrt((x - other.x) * (x - other.x) + (y - other.y) * (y - other.y))
  def intersect(other: Circle) = (radius + other.radius) > dist(other)
  
  def draw(g2d: java.awt.Graphics2D) = g2d.drawOval(dxy._1, dxy._2, dwh._1, dwh._2)
  override def toString = s"C($x, $y, $radius)"
}

case class Area(val w: Int, val h: Int, val max: Int) {
  val maxVal = Math.max(w, h)
  val rnd = new scala.util.Random
  val circles = for(i <- 0 until max) yield Circle(ni(w), ni(h), ni((w*.2).toInt))
  val nbCircles = circles.length
  
  def ni(i: Int) = rnd.nextInt(i)
  def persist2file(best: Circle) {
    val image = new java.awt.image.BufferedImage(w, h, java.awt.image.BufferedImage.TYPE_INT_ARGB)
    val g2d = image.createGraphics
    g2d.setColor(java.awt.Color.BLACK)
    circles.foreach(c => {
      c.draw(g2d)
      g2d.drawString(c.toString, c.x, c.y)
    })
    g2d.setColor(java.awt.Color.RED)
    best.draw(g2d)
    g2d.drawString(best.toString, best.x, best.y)
    javax.imageio.ImageIO.write(image, "png", new java.io.File("area.png"))
    println("Drawn circles")
  }
}

class CircleFitter(val area: Area) extends GA[Int, Circle] {
  private def rndVal = rnd.nextInt(area.maxVal)
  private val maxRadius = Math.min(area.h/2, area.w/2)
  private val maxSurface = Math.PI * maxRadius * maxRadius
  
   override def ~~(): Gene = rndVal
   override def decoder(chromo: Chromo) = Circle(chromo(0), chromo(1), chromo(2))
   override def evaluator(sol: Circle, tgt: Option[Circle]): Double = {
    // bigger the surface, the better
    val surfaceScore = Math.abs(1.0 - sol.surface / maxSurface)
    // fewer number of intersections, the better
    val nbIntersects = (for(c <- area.circles) yield if (sol.intersect(c)) 1.0 else 0.0).sum
    val interesectScore = nbIntersects / area.nbCircles
    // the solution must be strongly inside
    val insideScore = if ((sol.x-sol.radius < 0) || (sol.x+sol.radius > area.w) ||
                          (sol.y-sol.radius < 0) || (sol.y+sol.radius > area.h)) 10.0 else 0.0
    surfaceScore + interesectScore * 3.0 + insideScore
  }
}

object CircleFitter {
  def main(args: Array[String]): Unit = {
    val area = Area(800, 600, 25)
    val circleFitter = new CircleFitter(area)
    val best = circleFitter.solve(circleFitter.decoder)(circleFitter.evaluator)(NbChromos(1000), NbGenes(3), None, .7, .2, 1000)
    area.persist2file(best._2)
  }
}
Have fun.