add Java implementations

This commit is contained in:
Denis-Cosmin Nutiu 2024-11-16 00:27:50 +02:00
parent ceb50c824b
commit 9cd29b1d2e
15 changed files with 1258 additions and 4 deletions

View file

@ -1,4 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4"> <project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" /> <component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="FrameworkDetectionExcludesConfiguration"> <component name="FrameworkDetectionExcludesConfiguration">

124
.idea/uiDesigner.xml Normal file
View file

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Palette2">
<group name="Swing">
<item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
</item>
<item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
</item>
<item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
<default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
</item>
<item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
<initial-values>
<property name="text" value="Button" />
</initial-values>
</item>
<item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="RadioButton" />
</initial-values>
</item>
<item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
<initial-values>
<property name="text" value="CheckBox" />
</initial-values>
</item>
<item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
<initial-values>
<property name="text" value="Label" />
</initial-values>
</item>
<item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
<preferred-size width="150" height="-1" />
</default-constraints>
</item>
<item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
</item>
<item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
<preferred-size width="150" height="50" />
</default-constraints>
</item>
<item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
<preferred-size width="200" height="200" />
</default-constraints>
</item>
<item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
</item>
<item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
</item>
<item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
</item>
<item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
<preferred-size width="-1" height="20" />
</default-constraints>
</item>
<item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
<default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
</item>
<item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
<default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
</item>
</group>
</component>
</project>

0
gradlew vendored Normal file → Executable file
View file

View file

@ -1,14 +1,14 @@
# Data Structures and Algorithms # Data Structures and Algorithms
This repository is my self study guide for data structures and algorithms. I implement them from scratch in Kotlin This repository is my self study guide for data structures and algorithms. I implement them from scratch in Kotlin & Java
and then write unit test for them. and then write unit test for them.
What better way to learn a language and new concepts exists other than practicing them. What better way to learn a language and new concepts exists other than practicing them.
## Data Structures ## Data Structures
* Linked List [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/data_structures/linked_list/LinkedList.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/data_structures/linked_list/LinkedListTest.kt) * Linked List (Kotlin) [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/data_structures/linked_list/LinkedList.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/data_structures/linked_list/LinkedListTest.kt)
## Ranking ## Ranking
* BM25+ [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/ranking/bm25/Bm25Plus.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/ranking/bm25/BM25PlusTest.kt) | [Reference Paper](http://www.cs.otago.ac.nz/homepages/andrew/papers/2014-2.pdf) * BM25+ (Kotlin) [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/ranking/bm25/Bm25Plus.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/ranking/bm25/BM25PlusTest.kt) | [Reference Paper](http://www.cs.otago.ac.nz/homepages/andrew/papers/2014-2.pdf)

View file

@ -0,0 +1,271 @@
package data_structures.linked_list;
import java.util.*;
import java.util.function.Consumer;
/**
* LinkedList models a simple linked list.
*
* @param <T> - The type of the list.
*/
public class LinkedList<T> implements Iterable<T> {
private Node<T> head = null;
private Node<T> tail = null;
private int size = 0;
/**
* Returns an empty linked list.
* @return - Empty list.
* @param <T> - The list type.
*/
public static <T> LinkedList<T> emptyList() {
return new LinkedList<>();
}
/**
* Builds a list of given values.
* @param values the values
* @return The linked list.
* @param <T> - The value type
*/
@SafeVarargs
public static <T> LinkedList<T> of(T ...values) {
var list = new LinkedList<T>();
for (T value : values) {
list.append(value);
}
return list;
}
/**
* Returns the head of the list.
*/
public Optional<Node<T>> getFirst() {
if (head == null) {
return Optional.empty();
}
return Optional.of(head);
}
/**
* Returns the tail of the list.
*/
public Optional<Node<T>> getLast() {
if (tail == null) {
return Optional.empty();
}
return Optional.of(tail);
}
/**
* Appends the value to the list.
*
* @param value - The value to append.
*/
public void append(T value) {
var newNode = Node.of(value);
if (head == null) {
head = newNode;
} else {
tail.setNext(newNode);
}
tail = newNode;
size += 1;
}
/**
* Appends the values to the list.
*
* @param values - The value to append.
*/
@SafeVarargs
public final void append(T... values) {
for (T value : values) {
append(value);
}
}
/**
* Insert value at position in the list.
* @param value - The value.
* @param position - The position.
*/
public void insert(T value, int position) {
if (position < 0 || position > size) {
throw new IllegalArgumentException("invalid position given");
}
// handle insert last
if (position == size) {
append(value);
return;
}
// handle insert first
if (position == 0) {
var newNode = Node.of(value);
newNode.setNext(head);
head = newNode;
size += 1;
return;
}
// handle insert at position
var currentPosition = 0;
var currentNode = head;
Node<T> previousNode = null;
// search for position to insert at
while (true) {
if (currentPosition == position) {
var newNode = Node.of(value);
newNode.setNext(currentNode);
previousNode.setNext(newNode);
size += 1;
break;
}
currentPosition += 1;
previousNode = currentNode;
currentNode = currentNode.getNext();
}
}
/**
* Returns the size of the list.
*
* @return The list size.
*/
public int getSize() {
return size;
}
/**
* Returns the element at the given position.
* @param position - The position
* @return - The element.
*/
public Optional<Node<T>> get(int position) {
var currentPosition = 0;
var currentNode = head;
while (currentPosition <= position && currentNode != null) {
if (currentPosition == position) {
return Optional.of(currentNode);
}
currentNode = currentNode.getNext();
currentPosition += 1;
}
return Optional.empty();
}
/**
* Deletes an element at the given position.
* @param position - The position
*/
public void delete(int position) {
if (position < 0 || position > size) {
throw new IllegalArgumentException("invalid position given");
}
// Delete head
if (position == 0) {
head = head.getNext();
// if the list size is 1 then we don't have elements anymore
if (size == 1) {
tail = null;
}
size -= 1;
return;
}
// delete anything from tail
var currentNode = head;
Node<T> previousNode = null;
var currentPosition = 0;
while (true) {
previousNode = currentNode;
currentNode = currentNode.getNext();
currentPosition += 1;
// we found element at position N which is about to get deleted
if (currentPosition == position) {
previousNode.setNext(currentNode.getNext());
// we deleted the tail, so we need to update tail var.
if (currentPosition == size - 1) {
tail = previousNode;
}
break;
}
}
size -= 1;
}
/**
* Converts the LinkedList to a List.
* @return - The List.
*/
public List<T> toList() {
var list = new ArrayList<T>();
var currentNode = head;
while (currentNode != null) {
list.add(currentNode.value());
currentNode = currentNode.getNext();
}
return list;
}
/**
* Converts the LinkedList to an array.
* @return The array
*/
public T[] toArray() {
var array = new Object[getSize()];
var currentNode = head;
var index = 0;
while (currentNode != null) {
array[index] = currentNode.value();
currentNode = currentNode.getNext();
index += 1;
}
return (T[]) array;
}
/**
* Reverses the list in-place.
*/
public void reverse() {
if (size == 1) {
return;
}
tail = head;
var currentNode = head;
Node<T> previousNode = null;
var next = head;
// we iterate through the list and updates next accordingly, until we reach the tail.
while (next != null) {
// save the next
next = currentNode.getNext();
// current node's next will be set to previous node.
currentNode.setNext(previousNode);
// track previous node by settings it to current node
previousNode = currentNode;
// track the current node by setting it to next
currentNode = next;
}
// update the head
head = previousNode;
}
@Override
public Iterator<T> iterator() {
return new NodeIterator<T>(this);
}
@Override
public void forEach(Consumer<? super T> action) {
Iterable.super.forEach(action);
}
@Override
public Spliterator<T> spliterator() {
return Iterable.super.spliterator();
}
}

View file

@ -0,0 +1,55 @@
package data_structures.linked_list;
/**
* Node represents a linked list node.
* @param <T> - The type of the Node's value.
*/
public class Node<T> {
final private T value;
private Node<T> next;
/**
* Constructs a Node instance.
* @param value - The value of the node.
* @param next - A pointer to the next node.
*/
private Node(T value, Node<T> next) {
this.value = value;
this.next = next;
}
/**
* Constructs a Node instance of a given value.
* @param value - The node value
* @return - The node instance.
* @param <T> - The type of the node value.
*/
public static <T> Node<T> of(T value) {
return new Node<>(value, null);
}
/**
* Sets the next pointer.
* @param next - The node.
*/
public void setNext(Node<T> next) {
this.next = next;
}
/**
* Gets the next node in the list.
* @return The node optional.
*/
public Node<T> getNext() {
return next;
}
/**
* Returns the value of the node.
* @return - The value.
*/
public T value() {
return this.value;
}
}

View file

@ -0,0 +1,23 @@
package data_structures.linked_list;
import java.util.Iterator;
public class NodeIterator<T> implements Iterator<T> {
private Node<T> currentNode;
public NodeIterator(LinkedList<T> linkedList) {
currentNode = linkedList.getFirst().orElse(null);
}
@Override
public boolean hasNext() {
return currentNode != null;
}
@Override
public T next() {
var node = currentNode;
currentNode = currentNode.getNext();
return node.value();
}
}

View file

@ -0,0 +1,92 @@
package data_structures.set;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
/**
* Set is a set of items with simple operations.
*
* @param <T>
*/
public class Set<T> {
private final ArrayList<T>[] hashTable;
private final int capacity;
private final ArrayList<Integer> values;
@SuppressWarnings("unchecked")
private Set(int capacity) {
hashTable = new ArrayList[capacity];
values = new ArrayList<>();
this.capacity = capacity;
}
/**
* Insets an element into the set.
*
* @param element - The element.
*/
public void insert(T element) {
if (this.contains(element)) {
return;
}
var hash = getHash(element);
var list = hashTable[hash];
if (list == null) {
hashTable[hash] = new ArrayList<T>();
hashTable[hash].add(element);
values.add(hash);
} else {
list.add(element);
}
}
/**
* Checks if the element is present in the set.
*
* @param element - The element.
*/
public boolean contains(T element) {
var list = hashTable[getHash(element)];
if (list != null) {
return list.contains(element);
} else {
return false;
}
}
/**
* Returns all the values from the set.
*
* @return A list of values.
*/
public List<T> getValues() {
return this.values.stream().flatMap(hash -> this.hashTable[hash].stream()).collect(Collectors.toList());
}
private int getHash(T element) {
return Math.abs(element.hashCode()) % capacity;
}
/**
* Constructs a new Set<T> of given capacity.
*
* @param capacity - The capacity of the set.
* @param <T> - The type of the set.
* @return - The set object.
*/
public static <T> Set<T> of(int capacity) {
return new Set<>(capacity);
}
/**
* Constructs a new Set<T> with a default capacity.
*
* @param <T> - The type of the set.
* @return - The set object.
*/
public static <T> Set<T> construct() {
return new Set<>(1024);
}
}

View file

@ -0,0 +1,161 @@
package ranking.bm25;
import utils.Pair;
import java.util.*;
import java.util.stream.Collectors;
/**
* Implements the BM25+ ranking algorithm.
*/
public class Bm25Plus {
/**
* The storage holds a mapping of document id -> document.
*/
final private HashMap<Integer, TokenizedDocument> storage = new HashMap<>();
/**
* The term frequency index holds a mapping of term -> list of documents in which the term occurs.
*/
final private HashMap<String, Set<Integer>> termFrequencyIndex = new HashMap<>();
/**
* The tuning parameters are used to tune the result of the algorithm.
* <p>
* These values were taken directly from the paper.
*/
final private Double tuningParameterB = 0.3;
final private Double tuningParameterK1 = 1.6;
final private Double tuningParameterDelta = 0.7;
private Integer totalTokens = 0;
private Double meanDocumentLengths = 0.0;
/**
* Returns the index size.
*/
public int getIndexSize() {
return storage.size();
}
/**
* Indexes a document
*/
public void index(Document document) {
// Tokenize the document, for educational purposes and simplicity we will consider tokens only
// the words delimited by a space and transform them into lowercase.
TokenizedDocument tokenizedDocument = TokenizedDocument.fromDocument(document);
// Document does not exist in index
if (!storage.containsKey(document.documentId())) {
storage.put(document.documentId(), tokenizedDocument);
totalTokens += tokenizedDocument.getTokens().size();
meanDocumentLengths = (double) totalTokens / storage.size();
// Index all tokens
for (String token : tokenizedDocument.getTokens()) {
if (termFrequencyIndex.containsKey(token.toLowerCase())) {
termFrequencyIndex.get(token.toLowerCase()).add(document.documentId());
} else {
Set<Integer> documentIds = new HashSet<>();
documentIds.add(document.documentId());
termFrequencyIndex.put(token.toLowerCase(), documentIds);
}
}
}
}
/**
* Indexes all documents.
*
* @param documents - The documents.
*/
public void indexAll(Document... documents) {
Arrays.stream(documents).forEach(this::index);
}
/**
* Executes a term query against the index and ranks the results using bm25+.
*
* @param term - The term
*/
public List<Pair<Double, Document>> termQuery(String term) {
Set<Integer> documentIds = termFrequencyIndex.get(term.toLowerCase());
if (documentIds == null) {
return Collections.emptyList();
}
List<Pair<Double, Document>> results = new ArrayList<>();
for (Integer id : documentIds) {
TokenizedDocument document = storage.get(id);
if (document == null) {
continue;
}
double documentRsv = computeRsv(term.toLowerCase(), document);
results.add(new Pair<>(documentRsv, document.getDocument()));
}
results.removeIf(entry -> !Double.isFinite(entry.first()));
results.sort((a, b) -> Double.compare(b.first(), a.first()));
return results;
}
/**
* Executes a terms query against the index and ranks the results using bm25+.
*
* @param terms - The terms
*/
public List<Pair<Double, Document>> termsQuery(String... terms) {
var documentIds = Arrays.stream(terms).map(term -> termFrequencyIndex.getOrDefault(term.toLowerCase(), Set.of())).reduce((acc, value) -> {
acc.addAll(value);
return acc;
}).orElse(Set.of());
var results = documentIds.stream().map(i -> {
var document = storage.get(i);
// Sum the RSV of each term.
double rsvSum = 0;
for (String term : terms) {
rsvSum += computeRsv(term, document);
}
return new Pair<>(rsvSum, document.getDocument());
}).collect(Collectors.toCollection(ArrayList::new));
results.removeIf(entry -> !Double.isFinite(entry.first()));
results.sort((a, b) -> Double.compare(b.first(), a.first()));
return results;
}
/**
* Computes the inverse document frequency for a given term.
* <p>
* The IDF is defined as the total number of documents (N) divided by the documents that contain the term (dft).
* In the BM25+ version the IDF is the (N+1)/(dft)
*/
private double computeInverseDocumentFrequency(String term) {
int numberOfDocumentsContainingTheTerm = termFrequencyIndex.containsKey(term) ? termFrequencyIndex.get(term).size() : 0;
return (storage.size() + 1) / (double) numberOfDocumentsContainingTheTerm;
}
/**
* Computes the RSV for the given term and document.
* The RSV (Retrieval Status Value) is computed for every document using the BM25+ formula from the paper.
*/
private double computeRsv(String term, TokenizedDocument document) {
double inverseDocumentFrequencyLog = Math.log10(computeInverseDocumentFrequency(term));
double termOccurringInDocumentFrequency = (double) document.getTokens().stream()
.filter(token -> token.equals(term))
.count();
double documentLength = document.getTokens().size();
return inverseDocumentFrequencyLog *
(((tuningParameterK1 + 1) * termOccurringInDocumentFrequency) /
((tuningParameterK1 * ((1 - tuningParameterB) + tuningParameterB * (documentLength / meanDocumentLengths))) + termOccurringInDocumentFrequency)
+ tuningParameterDelta);
}
}

View file

@ -0,0 +1,4 @@
package ranking.bm25;
public record Document(int documentId, String text) {
}

View file

@ -0,0 +1,56 @@
package ranking.bm25;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
public class TokenizedDocument {
final private Document document;
final private List<String> tokens = new ArrayList<>();
private TokenizedDocument(Document document) {
this.document = document;
this.tokens.addAll(Arrays.stream(document.text().split(" "))
.map(
i -> i.chars()
.filter(Character::isLetterOrDigit)
.mapToObj(j -> String.valueOf((char) j))
.collect(Collectors.joining())
)
.filter(i -> !i.isEmpty()).map(String::toLowerCase).collect(Collectors.toCollection(ArrayList::new)));
}
public static TokenizedDocument fromDocument(Document document) {
return new TokenizedDocument(document);
}
/**
* Returns the tokenized tokens
* @return tokens list
*/
public List<String> getTokens() {
return tokens;
}
/**
* Returns the document.
*/
public Document getDocument() {
return document;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TokenizedDocument that = (TokenizedDocument) o;
return Objects.equals(document, that.document);
}
@Override
public int hashCode() {
return Objects.hashCode(document);
}
}

View file

@ -0,0 +1,10 @@
package utils;
/**
* Represents a simple pair.
* @param first - The first item from the pair.
* @param second - The second item from the pair
* @param <T> - The type of the first item.
* @param <V> - The type of the second item.
*/
public record Pair<T, V>(T first, V second) { }

View file

@ -0,0 +1,253 @@
package data_structures.linked_list;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public class JavaLinkedListTest {
@Test
public void test_getFirst_EmptyList() {
// Setup
var linkedList = new LinkedList<String>();
// Test
Assertions.assertTrue(linkedList.getFirst().isEmpty());
}
@Test
public void test_getFirst_empty() {
// Setup
var linkedList = LinkedList.emptyList();
// Test
Assertions.assertTrue(linkedList.getFirst().isEmpty());
}
@Test
public void test_getFirst() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value());
}
@Test
public void test_getFirst_of() {
// Setup
var linkedList = LinkedList.of("First", "Second", "Third");
// Test
Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value());
}
@Test
public void test_getLast() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value());
}
@Test
public void test_get() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
Assertions.assertEquals("First", linkedList.get(0).orElseThrow().value());
Assertions.assertEquals("Second", linkedList.get(1).orElseThrow().value());
Assertions.assertEquals("Third", linkedList.get(2).orElseThrow().value());
Assertions.assertTrue(linkedList.get(3).isEmpty());
}
@Test
public void test_deleteHead() {
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
linkedList.delete(0);
// Assert
Assertions.assertEquals("Second", linkedList.getFirst().orElseThrow().value());
Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value());
}
@Test
public void test_deleteMiddle() {
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
linkedList.delete(1);
// Assert
Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value());
Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value());
}
@Test
public void test_deleteLast() {
var linkedList = new LinkedList<String>();
linkedList.append("First");
linkedList.append("Second");
linkedList.append("Third");
// Test
linkedList.delete(2);
// Assert
Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value());
Assertions.assertEquals("Second", linkedList.getLast().orElseThrow().value());
}
@Test
public void test_deleteInvalidPosition() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First");
// Test
Assertions.assertThrows(IllegalArgumentException.class, () -> linkedList.delete(-1));
Assertions.assertThrows(IllegalArgumentException.class, () -> linkedList.delete(99));
}
@Test
public void test_insertFirst() {
// Setup
var linkedList = new LinkedList<String>();
// Test
linkedList.insert("First", 0);
// Assert
Assertions.assertEquals(1, linkedList.getSize());
Assertions.assertEquals("First", linkedList.get(0).get().value());
}
@Test
public void test_insertLast() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.insert("First", 0);
// Test
linkedList.insert("Second", linkedList.getSize());
// Assert
Assertions.assertEquals(2, linkedList.getSize());
Assertions.assertEquals("Second", linkedList.get(1).get().value());
}
@Test
public void test_insertMiddle() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.insert("First", 0);
linkedList.insert("Last", linkedList.getSize());
// Test
linkedList.insert("Middle", linkedList.getSize() - 1);
// Assert
Assertions.assertEquals(3, linkedList.getSize());
Assertions.assertEquals("First", linkedList.get(0).get().value());
Assertions.assertEquals("Middle", linkedList.get(1).get().value());
Assertions.assertEquals("Last", linkedList.get(2).get().value());
}
@Test
public void test_toList() {
// Given
var list = LinkedList.of("One", "Two", "Three");
// Then
Assertions.assertEquals(List.of("One", "Two", "Three"), list.toList());
}
@Test
public void test_toArray() {
// Given
var list = LinkedList.of("One", "Two", "Three");
// Then
Assertions.assertArrayEquals(List.of("One", "Two", "Three").toArray(), list.toArray());
}
@Test
public void test_reverseOneElement() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First");
// Test
linkedList.reverse();
// Assert
Assertions.assertEquals(List.of("First"), linkedList.toList());
}
@Test
public void test_reverseTwoElement() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First", "Second");
// Test
linkedList.reverse();
// Assert
Assertions.assertEquals(List.of("Second", "First"), linkedList.toList());
}
@Test
public void test_reverseFiveElement() {
// Setup
var linkedList = new LinkedList<String>();
linkedList.append("First", "Second", "Third", "Fourth");
// Test
linkedList.reverse();
// Assert
Assertions.assertEquals(List.of("Fourth", "Third", "Second", "First"), linkedList.toList());
}
@Test
public void test_linkedList_iteration() {
// Setup
var linkedList = LinkedList.of("First", "Second", "Third", "Fourth");
Iterable<String> iterable = linkedList::iterator;
Stream<String> targetStream = StreamSupport.stream(iterable.spliterator(), false);
// Test
var resultingList = targetStream.map(i -> String.format("%sX", i)).collect(Collectors.toCollection(ArrayList::new));
// Assert
Assertions.assertEquals(List.of("FirstX", "SecondX", "ThirdX", "FourthX"), resultingList);
}
}

View file

@ -0,0 +1,116 @@
package data_structures.set;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.List;
import java.util.Objects;
class TestPerson {
private final String name;
public TestPerson(String name) {
this.name = name;
}
@Override
public int hashCode() {
if (name.equals("Denis")) {
return 2;
}
return 1;
}
@Override
public String toString() {
return "TestPerson{" +
"name='" + name + '\'' +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
TestPerson that = (TestPerson) o;
return Objects.equals(name, that.name);
}
}
public class SetTest {
@Test
public void testInsertElement() {
// Given
var set = Set.<String>construct();
// Then
set.insert("SomeElement");
}
@Test
public void testContainsElementTrue() {
// Setup
var set = Set.<String>construct();
// Test
set.insert("SomeElement");
// Assert
Assertions.assertTrue(set.contains("SomeElement"));
}
@Test
public void testContainsElementFalse() {
// Setup
var set = Set.<String>construct();
// Test
set.insert("SomeElement");
// Assert
Assertions.assertFalse(set.contains("AnotherElement"));
}
@Test
public void testContainsElementWithCollision() {
// Setup
var set = Set.<TestPerson>construct();
// Test
set.insert(new TestPerson("Denis"));
set.insert(new TestPerson("Alex"));
// Assert
Assertions.assertTrue(set.contains(new TestPerson("Alex")));
Assertions.assertFalse(set.contains(new TestPerson("Paul")));
}
@Test
public void testGetValues() {
// Setup
var set = Set.<String>construct();
// Test
set.insert("SomeElement");
set.insert("AnotherElement");
// Assert
Assertions.assertEquals(List.of("SomeElement", "AnotherElement"), set.getValues());
}
@Test
public void testGetValuesWithCollision() {
// Setup
var set = Set.<TestPerson>construct();
// Test
set.insert(new TestPerson("Denis"));
set.insert(new TestPerson("Alex"));
set.insert(new TestPerson("Paul"));
// Assert
Assertions.assertEquals(List.of(new TestPerson("Denis"), new TestPerson("Alex"), new TestPerson("Paul")), set.getValues());
}
}

View file

@ -0,0 +1,90 @@
package ranking;
import ranking.bm25.Bm25Plus;
import ranking.bm25.Document;
import utils.Pair;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.*;
public class Bm25PlusTest {
@Test
public void test_index_and_indexSize() {
// Setup
Bm25Plus bm25Plus = new Bm25Plus();
Document document1 = new Document(1, "Ana are mere");
Document document2 = new Document(2, "Ana Ana Ana Ana Ana Ana Ana Ana");
// Test
bm25Plus.indexAll(document1, document2);
// Assert
Assertions.assertEquals(2, bm25Plus.getIndexSize());
}
@Test
public void test_termQuery() {
// Given
Bm25Plus bm25Plus = new Bm25Plus();
Document document1 = new Document(1, "Ana are mere");
Document document2 = new Document(2, "Ana Ana Ana Ana Ana Ana Ana Ana");
bm25Plus.indexAll(document1, document2);
// Then
Assertions.assertEquals(
new ArrayList<>(Arrays.asList(
new Pair<>(0.4963164745976794, document2),
new Pair<>(0.3154856374073922, document1)
)),
bm25Plus.termQuery("Ana")
);
Assertions.assertEquals(
new ArrayList<>(List.of(
new Pair<>(0.8548118968145402, document1)
)),
bm25Plus.termQuery("mere")
);
Assertions.assertEquals(
Collections.emptyList(),
bm25Plus.termQuery("batman")
);
Assertions.assertEquals(
new ArrayList<>(Arrays.asList(
new Pair<>(0.4963164745976794, document2),
new Pair<>(0.3154856374073922, document1)
)),
bm25Plus.termQuery("ana")
);
}
@Test
public void test_termsQuery() {
// Given
Bm25Plus bm25Plus = new Bm25Plus();
Document document1 = new Document(1, "A linked list is a fundamental data structure which consists of Nodes that are connected to each other.");
Document document2 = new Document(2, "The Linked List data structure permits the storage of data in an efficient manner.");
Document document3 = new Document(3, "The space and time complexity of the linked list operations depends on the implementation.");
Document document4 = new Document(4, "The operations that take O(N) time takes this much because you have to traverse the lists for at least N nodes in order to perform it successfully. On the other hand, operations that take O(1) time do not require any traversals because the list holds pointers to the head first Node and tail last Node.");
bm25Plus.indexAll(document1, document2, document3, document4);
// Then
Assertions.assertEquals(
new ArrayList<>(List.of(
new Pair<>(1.5977607472650388, document3),
new Pair<>(0.8361444686814765, document2),
new Pair<>(0.8296222299960145, document1),
new Pair<>(0.704549447544239, document4)
)),
bm25Plus.termsQuery("linked", "list", "complexity")
);
}
}