Biology

Needleman-Wunsch

See the Wikipedia page on the Needleman-Wunsch algorithm.

Example

Imports and implicits

import org.jblas.DoubleMatrix

import cats.implicits._

import spire.algebra.Ring
import spire.algebra.NRoot
import spire.algebra.Field

import axle.algebra._
import axle.algebra.functors._
import axle.bio._
import NeedlemanWunsch.optimalAlignment
import NeedlemanWunschDefaults._

implicit val nrootDouble: NRoot[Double] = spire.implicits.DoubleAlgebra
implicit val ringInt: Ring[Int] = spire.implicits.IntAlgebra
import axle.algebra.modules.doubleIntModule

implicit val laJblasInt = {
  implicit val fieldDouble: Field[Double] = spire.implicits.DoubleAlgebra
  axle.jblas.linearAlgebraDoubleMatrix[Double]
}
val dna1 = "ATGCGGCC"
val dna2 = "ATCGCCGG"

Setup

val nwAlignment = optimalAlignment[IndexedSeq, Char, DoubleMatrix, Int, Double](
  dna1, dna2, similarity, gap, gapPenalty)
// nwAlignment: (IndexedSeq[Char], IndexedSeq[Char]) = (
//   Vector('A', 'T', 'G', 'C', 'G', 'G', 'C', 'C', '-', '-'),
//   Vector('A', 'T', '-', 'C', '-', 'G', 'C', 'C', 'G', 'G')
// )

Score aligment

import NeedlemanWunsch.alignmentScore

alignmentScore(nwAlignment._1, nwAlignment._2, gap, similarity, gapPenalty)
// res0: Double = 32.0

Compute distance

val space = NeedlemanWunschSimilaritySpace[IndexedSeq, Char, DoubleMatrix, Int, Double](similarity, gapPenalty)
// space: NeedlemanWunschSimilaritySpace[IndexedSeq, Char, DoubleMatrix, Int, Double] = NeedlemanWunschSimilaritySpace(
//   baseSimilarity = <function2>,
//   gapPenalty = -5.0
// )

space.similarity(dna1, dna2)
// res1: Double = 32.0

Smith-Waterman

See the Wikipedia page on the Smith-Waterman algorithm.

Smith-Waterman Example

Imports and implicits

import org.jblas.DoubleMatrix

import cats.implicits._

import spire.algebra.Ring
import spire.algebra.NRoot

import axle.bio._
import SmithWatermanDefaults._
import SmithWaterman.optimalAlignment

implicit val ringInt: Ring[Int] = spire.implicits.IntAlgebra
implicit val nrootInt: NRoot[Int] = spire.implicits.IntAlgebra
implicit val laJblasInt = axle.jblas.linearAlgebraDoubleMatrix[Int]

Setup

val dna3 = "ACACACTA"
val dna4 = "AGCACACA"

Align the sequences

val swAlignment = optimalAlignment[IndexedSeq, Char, DoubleMatrix, Int, Int](
  dna3, dna4, w, mismatchPenalty, gap)
// swAlignment: (IndexedSeq[Char], IndexedSeq[Char]) = (
//   Vector('A', '-', 'C', 'A', 'C', 'A', 'C', 'T', 'A'),
//   Vector('A', 'G', 'C', 'A', 'C', 'A', 'C', '-', 'A')
// )

Compute distance of the sequences

val space = SmithWatermanSimilaritySpace[IndexedSeq, Char, DoubleMatrix, Int, Int](w, mismatchPenalty)
// space: SmithWatermanSimilaritySpace[IndexedSeq, Char, DoubleMatrix, Int, Int] = SmithWatermanSimilaritySpace(
//   w = <function3>,
//   mismatchPenalty = -1
// )

space.similarity(dna3, dna4)
// res3: Int = 12