2020-03-21 00:37:09 +00:00
|
|
|
package de.wwwu.awolf.presenter.algorithms.advanced;
|
|
|
|
|
|
|
|
import de.wwwu.awolf.model.Interval;
|
|
|
|
import de.wwwu.awolf.model.Line;
|
|
|
|
import de.wwwu.awolf.model.Point;
|
|
|
|
import de.wwwu.awolf.model.communication.AlgorithmData;
|
|
|
|
import de.wwwu.awolf.model.communication.Data;
|
|
|
|
import de.wwwu.awolf.model.communication.SubscriberType;
|
|
|
|
import de.wwwu.awolf.presenter.Presenter;
|
|
|
|
import de.wwwu.awolf.presenter.algorithms.Algorithm;
|
|
|
|
import de.wwwu.awolf.presenter.util.BinomialCoeffizient;
|
|
|
|
import de.wwwu.awolf.presenter.util.FastElementSelector;
|
|
|
|
import de.wwwu.awolf.presenter.util.IntersectionCounter;
|
|
|
|
import de.wwwu.awolf.presenter.util.RandomSampler;
|
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.LinkedHashSet;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.Set;
|
|
|
|
import java.util.concurrent.Flow;
|
2017-06-26 14:01:54 +00:00
|
|
|
|
2017-05-28 12:00:01 +00:00
|
|
|
/**
|
|
|
|
* Implementierung verschiedener Algorithmen zur Berechnung von Ausgleichsgeraden.
|
|
|
|
*
|
|
|
|
* @Author: Armin Wolf
|
|
|
|
* @Email: a_wolf28@uni-muenster.de
|
|
|
|
* @Date: 28.05.2017.
|
|
|
|
*/
|
2020-03-21 00:37:09 +00:00
|
|
|
public class TheilSenEstimator implements Algorithm, Flow.Publisher<Data> {
|
2017-06-26 14:01:54 +00:00
|
|
|
|
2019-08-01 05:19:04 +00:00
|
|
|
private final double POSITIV_INF = 99999.0;
|
|
|
|
private final double NEGATIV_INF = -99999.0;
|
|
|
|
private final double EPSILON = 0.00001;
|
2020-03-21 00:37:09 +00:00
|
|
|
private List<Line> setOfLines;
|
|
|
|
private List<Point> setOfIntersections;
|
|
|
|
private List<Point> intervalIntersections;
|
|
|
|
private List<Double> sampledIntersections;
|
2017-06-29 15:32:54 +00:00
|
|
|
//wird benötigt um den y Achsenabschnitt zu Berechnen
|
2020-03-21 00:37:09 +00:00
|
|
|
private List<Double> yCoordinates;
|
|
|
|
private List<Double> xCoordinates;
|
2017-06-29 15:32:54 +00:00
|
|
|
//Hilfsvariablen (siehe original Paper)
|
2019-08-01 05:19:04 +00:00
|
|
|
private double j;
|
|
|
|
private int jA;
|
|
|
|
private int jB;
|
|
|
|
private double r;
|
|
|
|
private int n;
|
|
|
|
private double N;
|
|
|
|
private int k;
|
2017-06-29 15:32:54 +00:00
|
|
|
//Intervall und die temporaeren Grenzen
|
|
|
|
private Interval interval;
|
2019-08-01 05:19:04 +00:00
|
|
|
private double aVariant;
|
|
|
|
private double bVariant;
|
|
|
|
private double slope;
|
|
|
|
private double yInterception;
|
2020-03-21 00:37:09 +00:00
|
|
|
private Flow.Subscriber<? super AlgorithmData> subscriber;
|
2017-08-01 19:59:33 +00:00
|
|
|
|
2017-10-15 13:21:53 +00:00
|
|
|
/**
|
|
|
|
* Konstruktor
|
2020-03-20 17:08:18 +00:00
|
|
|
*
|
|
|
|
* @param setOfLines Liste der Geraden
|
|
|
|
* @param setOfIntersections Liste der Schnittpunkte
|
|
|
|
* @param presenter Presenter (Beobachter)
|
2017-10-15 13:21:53 +00:00
|
|
|
*/
|
2020-03-21 00:37:09 +00:00
|
|
|
public TheilSenEstimator(List<Line> setOfLines, List<Point> setOfIntersections, Presenter presenter) {
|
|
|
|
|
2017-06-29 15:32:54 +00:00
|
|
|
this.setOfLines = new ArrayList<>(setOfLines);
|
|
|
|
this.setOfIntersections = new ArrayList<>(setOfIntersections);
|
|
|
|
this.intervalIntersections = new ArrayList<>(setOfIntersections);
|
|
|
|
|
|
|
|
this.n = setOfLines.size();
|
|
|
|
this.sampledIntersections = new ArrayList<>();
|
|
|
|
this.yCoordinates = new ArrayList<>();
|
|
|
|
this.xCoordinates = new ArrayList<>();
|
|
|
|
this.N = BinomialCoeffizient.run(n, 2);
|
2017-09-23 12:13:09 +00:00
|
|
|
//this.k = Integer.valueOf((int) (N * 0.5)) - 1;
|
|
|
|
this.k = (int) (N / 2);
|
2017-10-23 15:48:36 +00:00
|
|
|
|
|
|
|
interval = new Interval(NEGATIV_INF, POSITIV_INF);
|
2020-03-21 00:37:09 +00:00
|
|
|
subscribe(presenter);
|
2017-06-29 15:32:54 +00:00
|
|
|
}
|
|
|
|
|
2017-10-15 13:21:53 +00:00
|
|
|
/**
|
|
|
|
* Konstruktor
|
2020-03-20 17:08:18 +00:00
|
|
|
*
|
|
|
|
* @param setOfLines Liste der Geraden
|
|
|
|
* @param setOfIntersections Liste der Schnittpunkte
|
2017-10-15 13:21:53 +00:00
|
|
|
*/
|
2020-03-21 00:37:09 +00:00
|
|
|
public TheilSenEstimator(List<Line> setOfLines, List<Point> setOfIntersections) {
|
2017-09-10 15:45:47 +00:00
|
|
|
this(setOfLines, setOfIntersections, null);
|
2017-08-01 19:59:33 +00:00
|
|
|
}
|
|
|
|
|
2017-06-29 15:32:54 +00:00
|
|
|
/**
|
|
|
|
* Randomisierter Algorithmus zur Berechnung des Theil-Sen Schätzers.
|
|
|
|
* Algorithmus stammt aus dem Paper:
|
|
|
|
* "Jiri Matousek, Randomized optimal algorithm for slope selection,
|
|
|
|
* Information Processing Letters 39 (1991) 183-187
|
|
|
|
*/
|
2020-03-21 19:54:03 +00:00
|
|
|
public Line call() {
|
2017-06-29 15:32:54 +00:00
|
|
|
//damit eine initiale Ordnung herscht
|
2017-06-30 08:34:01 +00:00
|
|
|
//Collections.sort(intervalIntersections);
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2020-03-20 17:08:18 +00:00
|
|
|
r = n;
|
2017-06-29 15:32:54 +00:00
|
|
|
while (true) {
|
2017-09-09 17:41:32 +00:00
|
|
|
if (this.N <= n || (Math.abs(interval.getUpper() - interval.getLower())) < EPSILON) {
|
2017-06-29 15:32:54 +00:00
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
//Anzahl der Schnittpunkte im Intervall [-Inf, a)
|
2017-09-07 09:21:48 +00:00
|
|
|
int numberOfIntersections = getOpenIntervalSize(NEGATIV_INF, interval.getLower(), setOfIntersections);
|
2017-06-29 15:32:54 +00:00
|
|
|
|
|
|
|
//Randomized Interpolating Search
|
|
|
|
j = (r / N) * (double) (k - numberOfIntersections);
|
2017-09-09 17:41:32 +00:00
|
|
|
jA = (int) Math.max(1, Math.floor(j - (1.5 * Math.sqrt(r))));
|
|
|
|
jB = (int) Math.min(r, Math.floor(j + (1.5 * Math.sqrt(r))));
|
2017-06-29 15:32:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
/* Suche nach einem passenderen und kleineren Intervall
|
|
|
|
Schleife terminiert wenn die das k-te Elemnet zwischen aVariant und bVariant liegt und
|
|
|
|
das Intrvall weniger als 11*N / sqrt(r) Elemente besitzt */
|
|
|
|
do {
|
|
|
|
//zufällige Stichprobe
|
2017-08-02 20:42:01 +00:00
|
|
|
sampledIntersections = RandomSampler.run(intervalIntersections, r);
|
2017-06-29 21:22:34 +00:00
|
|
|
aVariant = FastElementSelector.randomizedSelect(sampledIntersections, jA);
|
|
|
|
bVariant = FastElementSelector.randomizedSelect(sampledIntersections, jB);
|
2017-06-29 15:32:54 +00:00
|
|
|
} while (!checkCondition());
|
|
|
|
|
|
|
|
interval.setLower(aVariant);
|
|
|
|
interval.setUpper(bVariant);
|
2017-09-07 09:21:48 +00:00
|
|
|
intervalIntersections = getOpenIntervalElements(interval.getLower(), interval.getUpper());
|
2020-03-20 17:08:18 +00:00
|
|
|
N = getOpenIntervalSize(interval.getLower(), interval.getUpper(), intervalIntersections);
|
2017-06-29 15:32:54 +00:00
|
|
|
}
|
|
|
|
}
|
2019-08-01 05:19:04 +00:00
|
|
|
|
2020-03-21 19:54:03 +00:00
|
|
|
return pepareResult();
|
2017-06-29 15:32:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Diese Funktion überprüft ob die Bedingung für das Interval erfüllt ist. Dabei muss der k-te
|
|
|
|
* Schnittpunkt in diesem Interval enthalten sein. des weiteren soll die Anzahl der Schnittpunkte
|
|
|
|
* im Interval kleiner oder gleich dem Term: (11*N)/sqrt(r) sein.
|
|
|
|
*
|
|
|
|
* @return Boolscher Wert ob die Bedingung erfüllt ist
|
|
|
|
*/
|
|
|
|
private Boolean checkCondition() {
|
2017-08-03 06:27:38 +00:00
|
|
|
//Double kthElement = FastElementSelector.randomizedSelect(xCoordinates, k);
|
|
|
|
//Boolean cond1 = (kthElement > aVariant) && (kthElement <= bVariant);
|
2017-09-07 09:21:48 +00:00
|
|
|
|
2017-09-23 12:13:09 +00:00
|
|
|
int lowerCount = getIntervalSize(NEGATIV_INF, aVariant);
|
|
|
|
int higherCount = getIntervalSize(NEGATIV_INF, bVariant);
|
2017-09-07 09:21:48 +00:00
|
|
|
|
|
|
|
Boolean conda = k > lowerCount;
|
|
|
|
Boolean condb = k <= higherCount;
|
2017-08-03 06:27:38 +00:00
|
|
|
|
|
|
|
Boolean cond1 = conda && condb;
|
|
|
|
|
2017-09-10 15:45:47 +00:00
|
|
|
Boolean cond2 = (higherCount - lowerCount) <= ((11 * N) / Math.sqrt(r));
|
2017-06-29 21:22:34 +00:00
|
|
|
|
2017-09-10 15:45:47 +00:00
|
|
|
return (cond1 && cond2) || (aVariant == bVariant);
|
2017-06-29 15:32:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Berechne wieviele von den Schnittpunkten in dem Interval zwischen <code>a</code> und <code>b</code>
|
|
|
|
* enthalten sind.
|
|
|
|
*
|
|
|
|
* @param a untere Grenze des Intervals
|
|
|
|
* @param b obere Grenze des Intrvals
|
|
|
|
* @return Anzahl der Schnittpunkte im Interval [a,b)
|
|
|
|
*/
|
2017-09-23 12:13:09 +00:00
|
|
|
public int getIntervalSize(double a, double b) {
|
2020-03-20 17:08:18 +00:00
|
|
|
IntersectionCounter ic = new IntersectionCounter();
|
|
|
|
return ic.run(setOfLines, new Interval(a, b));
|
2017-06-26 14:01:54 +00:00
|
|
|
}
|
|
|
|
|
2017-09-07 09:21:48 +00:00
|
|
|
/**
|
|
|
|
* Berechne wieviele von den Schnittpunkten in dem Interval zwischen <code>a</code> und <code>b</code>
|
|
|
|
* enthalten sind.
|
2020-03-20 17:08:18 +00:00
|
|
|
* <p>
|
2017-09-23 12:13:09 +00:00
|
|
|
* Inspiriert durch:
|
2020-03-20 17:08:18 +00:00
|
|
|
* <url>https://stackoverflow.com/questions/136474/best-way-to-pick-a-random-subset-from-a-collection</url>
|
2017-09-23 12:13:09 +00:00
|
|
|
*
|
2017-09-07 09:21:48 +00:00
|
|
|
* @param a untere Grenze des Intervals
|
|
|
|
* @param b obere Grenze des Intrvals
|
|
|
|
* @return Anzahl der Schnittpunkte im Interval (a,b)
|
|
|
|
*/
|
2020-03-21 00:37:09 +00:00
|
|
|
public int getOpenIntervalSize(double a, double b, List<Point> set) {
|
2017-09-07 09:21:48 +00:00
|
|
|
int counter = 0;
|
2017-09-10 15:45:47 +00:00
|
|
|
for (int i = 0; i < set.size(); i++) {
|
2017-09-09 17:41:32 +00:00
|
|
|
Point x = set.get(i);
|
2017-09-07 09:21:48 +00:00
|
|
|
if (x.getX() > a && x.getX() < b) {
|
2017-06-29 15:32:54 +00:00
|
|
|
counter++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return counter;
|
2017-06-26 14:01:54 +00:00
|
|
|
}
|
2017-06-29 15:32:54 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Berechne wieviele von den Schnittpunkten in dem Interval zwischen <code>a</code> und <code>b</code>
|
|
|
|
* enthalten sind. Zusätzlich werden diese Schnittpunkte in einer Liste festgehalten und diese werden
|
|
|
|
* zurückgeliefert.
|
|
|
|
*
|
|
|
|
* @param a untere Grenze des Intervals
|
|
|
|
* @param b obere Grenze des Intrvals
|
2017-09-07 09:21:48 +00:00
|
|
|
* @return Liste der Schnittpunkte die im Interval (a,b) vertreten sind
|
2017-06-29 15:32:54 +00:00
|
|
|
*/
|
2020-03-21 00:37:09 +00:00
|
|
|
public List<Point> getOpenIntervalElements(double a, double b) {
|
|
|
|
List<Point> list = new ArrayList<>();
|
2017-09-10 15:45:47 +00:00
|
|
|
for (int i = 0; i < intervalIntersections.size(); i++) {
|
2017-06-29 15:32:54 +00:00
|
|
|
Point x = intervalIntersections.get(i);
|
2017-09-09 17:41:32 +00:00
|
|
|
if ((x.getX() > a && x.getX() < b) || (Math.abs(interval.getUpper() - interval.getLower())) < EPSILON) {
|
2017-06-29 15:32:54 +00:00
|
|
|
list.add(x);
|
|
|
|
}
|
|
|
|
}
|
2017-09-07 09:21:48 +00:00
|
|
|
intervalIntersections.clear();
|
|
|
|
intervalIntersections = null;
|
2017-06-29 15:32:54 +00:00
|
|
|
return list;
|
2017-06-27 08:08:32 +00:00
|
|
|
}
|
|
|
|
|
2020-03-21 19:54:03 +00:00
|
|
|
private Line pepareResult() {
|
2017-08-02 05:40:08 +00:00
|
|
|
double m, x;
|
|
|
|
double b, y;
|
|
|
|
|
2020-03-21 00:37:09 +00:00
|
|
|
List<Point> resultSt = getOpenIntervalElements(interval.getLower(), interval.getUpper());
|
|
|
|
List<Double> resultAbscissas = new ArrayList<>();
|
2017-09-09 17:41:32 +00:00
|
|
|
|
2017-09-10 15:45:47 +00:00
|
|
|
for (Point p : resultSt) {
|
2017-09-09 17:41:32 +00:00
|
|
|
resultAbscissas.add(p.getX());
|
2017-08-02 05:40:08 +00:00
|
|
|
}
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2017-09-10 15:45:47 +00:00
|
|
|
for (Point p : setOfIntersections) {
|
2017-09-09 17:41:32 +00:00
|
|
|
yCoordinates.add(p.getY());
|
2017-08-02 05:40:08 +00:00
|
|
|
}
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2020-03-21 00:37:09 +00:00
|
|
|
double pseudoIndex = getOpenIntervalSize(NEGATIV_INF, interval.getLower(), setOfIntersections) * 1.0;
|
2017-09-10 15:45:47 +00:00
|
|
|
m = FastElementSelector.randomizedSelect(resultAbscissas, k - pseudoIndex);
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2017-09-09 17:41:32 +00:00
|
|
|
Set<Double> unique = new LinkedHashSet<>(yCoordinates);
|
|
|
|
yCoordinates.clear();
|
|
|
|
yCoordinates.addAll(unique);
|
2017-09-10 15:45:47 +00:00
|
|
|
b = FastElementSelector.randomizedSelect(yCoordinates, yCoordinates.size() * 0.5) * -1;
|
2017-08-02 05:40:08 +00:00
|
|
|
slope = m;
|
|
|
|
yInterception = b;
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2020-03-21 00:37:09 +00:00
|
|
|
if (this.subscriber != null) {
|
|
|
|
AlgorithmData data = new AlgorithmData();
|
|
|
|
data.setType(SubscriberType.TS);
|
|
|
|
data.setLineData(new Line(m, b));
|
|
|
|
this.subscriber.onNext(data);
|
2017-06-29 15:32:54 +00:00
|
|
|
}
|
2020-03-21 19:54:03 +00:00
|
|
|
|
|
|
|
return new Line(getSlope(), getYInterception());
|
2017-06-29 11:10:15 +00:00
|
|
|
}
|
2017-06-29 15:32:54 +00:00
|
|
|
|
2017-10-15 13:21:53 +00:00
|
|
|
/**
|
|
|
|
* @return Steigung
|
|
|
|
*/
|
2017-08-01 19:59:33 +00:00
|
|
|
public Double getSlope() {
|
|
|
|
return slope;
|
|
|
|
}
|
|
|
|
|
2017-10-15 13:21:53 +00:00
|
|
|
/**
|
|
|
|
* @return y-Achsenabschnitt
|
|
|
|
*/
|
2020-03-21 19:54:03 +00:00
|
|
|
public Double getYInterception() {
|
2017-08-01 19:59:33 +00:00
|
|
|
return yInterception;
|
|
|
|
}
|
2020-03-21 00:37:09 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public void subscribe(Flow.Subscriber<? super Data> subscriber) {
|
|
|
|
this.subscriber = subscriber;
|
|
|
|
}
|
2017-05-28 12:00:01 +00:00
|
|
|
}
|