Skip to content

Commit

Permalink
java implementation of sortedIntersectionCount that avoids unnecessar…
Browse files Browse the repository at this point in the history
…y boxing (#36)

For whatever reason this function always boxes the Ints when implemented in scala. When I moved it to java I found it doesn't box them which brings the Kosarak benchmark down from ~105sec to ~62sec on my laptop. Also a few other cosmetic changes.
  • Loading branch information
alexklibisz authored Feb 15, 2020
1 parent 3c2d25c commit fed9110
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 36 deletions.
7 changes: 7 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
- Improved exact Jaccard performance by implementing a critical path in Java so that it uses primitive `int []` arrays instead of boxed integers in scala.
---
- Fixed performance regression.
---
- Client and core library interface improvements.
- Added use_cache parameter to KNearestNeighborsQuery which signals that the vectors should only be read once from Lucene and then cached in memory.
---
- Releasing versioned python client library to PyPi.
---
- Releasing versioned elastiknn plugin zip file.
Expand Down
1 change: 1 addition & 0 deletions core/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ protobuf {
scalapb {
// add any ScalaPB generator options here. See: https://scalapb.github.io/scalapbc.html#passing-generator-parameters
option 'flat_package'
option 'no_lenses'
}
}
}
Expand Down
45 changes: 45 additions & 0 deletions core/src/main/java/com/klibisz/elastiknn/Hotspots.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package com.klibisz.elastiknn;

/**
* Java implementations of some particularly performance-critical code paths.
*/
public class Hotspots {

private static void unsortedException(int lit, int big) {
throw new IllegalArgumentException(String.format("Called on unsorted array: %d came after %d", lit, big));
}

public static int sortedIntersectionCount(int [] xs, int [] ys) {
int n = 0;
int xi = 0;
int yi = 0;
int xmax = Integer.MIN_VALUE;
int ymax = Integer.MIN_VALUE;
while (xi < xs.length && yi < ys.length) {
int x = xs[xi];
int y = ys[yi];
if (x < xmax) unsortedException(x, xmax);
else xmax = x;
if (y < ymax) unsortedException(y, ymax);
else ymax = y;
if (x < y) xi += 1;
else if (x > y) yi += 1;
else {
n += 1;
xi += 1;
yi += 1;
}
}
while(xi < xs.length) {
if (xs[xi] < xmax) unsortedException(xs[xi], xmax);
xi += 1;
}
while(yi < ys.length) {
if (ys[yi] < ymax) unsortedException(ys[yi], ymax);
yi += 1;
}
return n;
}


}
6 changes: 0 additions & 6 deletions core/src/main/java/com/klibisz/elastiknn/JavaDummy.java

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package com.klibisz.elastiknn.utils

import com.klibisz.elastiknn.Hotspots

import scala.annotation.tailrec
import scala.util.Try

Expand All @@ -14,33 +16,7 @@ trait PerformanceUtils {

final def fastfor(i: Int, pred: Int => Boolean)(f: Int => Unit): Unit = fastfor(i, pred, _ + 1)(f)

private def unsortedException(little: Int, big: Int): Unit =
throw new IllegalArgumentException(s"Called on unsorted array: $little came after $big")

private[elastiknn] def sortedIntersectionCount(xs: IndexedSeq[Int], ys: IndexedSeq[Int]): Try[Int] = Try {
var (n, xi, yi, xmax, ymax) = (0, 0, 0, Int.MinValue, Int.MinValue)
while (xi < xs.length && yi < ys.length) {
val (x, y) = (xs(xi), ys(yi))
if (x < xmax) unsortedException(x, xmax) else xmax = x
if (y < ymax) unsortedException(y, ymax) else ymax = y
if (x < y) xi += 1
else if (x > y) yi += 1
else {
n += 1
xi += 1
yi += 1
}
}
while (xi < xs.length) {
if (xs(xi) < xmax) unsortedException(xs(xi), xmax)
xi += 1
}
while (yi < ys.length) {
if (ys(yi) < xmax) unsortedException(ys(yi), xmax)
yi += 1
}
n
}
private[elastiknn] def sortedIntersectionCount(xs: Array[Int], ys: Array[Int]): Try[Int] = Try(Hotspots.sortedIntersectionCount(xs, ys))

}

Expand Down
4 changes: 2 additions & 2 deletions reference/build.gradle
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dependencies {
implementation project(':core')
runtime 'org.scala-lang:scala-library:2.12.8'
implementation 'org.scala-lang:scala-library:2.12.8'
runtime "org.scala-lang:scala-library:${scalaVersion}"
implementation "org.scala-lang:scala-library:${scalaVersion}"
implementation 'org.apache.commons:commons-math3:3.6.1'
implementation 'org.apache.spark:spark-mllib_2.12:2.4.4'
}
2 changes: 1 addition & 1 deletion version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.0-PRE5
0.1.0-PRE6

0 comments on commit fed9110

Please sign in to comment.