From 9cd29b1d2e1c5a36a793b79655863b521fc5774a Mon Sep 17 00:00:00 2001 From: Denis Nutiu Date: Sat, 16 Nov 2024 00:27:50 +0200 Subject: [PATCH] add Java implementations --- .idea/misc.xml | 1 - .idea/uiDesigner.xml | 124 ++++++++ gradlew | 0 readme.md | 6 +- .../linked_list/LinkedList.java | 271 ++++++++++++++++++ .../data_structures/linked_list/Node.java | 55 ++++ .../linked_list/NodeIterator.java | 23 ++ src/main/java/data_structures/set/Set.java | 92 ++++++ src/main/java/ranking/bm25/Bm25Plus.java | 161 +++++++++++ src/main/java/ranking/bm25/Document.java | 4 + .../java/ranking/bm25/TokenizedDocument.java | 56 ++++ src/main/java/utils/Pair.java | 10 + .../linked_list/JavaLinkedListTest.java | 253 ++++++++++++++++ .../java/data_structures/set/SetTest.java | 116 ++++++++ src/test/java/ranking/Bm25PlusTest.java | 90 ++++++ 15 files changed, 1258 insertions(+), 4 deletions(-) create mode 100644 .idea/uiDesigner.xml mode change 100644 => 100755 gradlew create mode 100644 src/main/java/data_structures/linked_list/LinkedList.java create mode 100644 src/main/java/data_structures/linked_list/Node.java create mode 100644 src/main/java/data_structures/linked_list/NodeIterator.java create mode 100644 src/main/java/data_structures/set/Set.java create mode 100644 src/main/java/ranking/bm25/Bm25Plus.java create mode 100644 src/main/java/ranking/bm25/Document.java create mode 100644 src/main/java/ranking/bm25/TokenizedDocument.java create mode 100644 src/main/java/utils/Pair.java create mode 100644 src/test/java/data_structures/linked_list/JavaLinkedListTest.java create mode 100644 src/test/java/data_structures/set/SetTest.java create mode 100644 src/test/java/ranking/Bm25PlusTest.java diff --git a/.idea/misc.xml b/.idea/misc.xml index 67f77c0..059db73 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,4 +1,3 @@ - diff --git a/.idea/uiDesigner.xml b/.idea/uiDesigner.xml new file mode 100644 index 0000000..2b63946 --- /dev/null +++ b/.idea/uiDesigner.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gradlew b/gradlew old mode 100644 new mode 100755 diff --git a/readme.md b/readme.md index 63bbd2b..dc2f626 100644 --- a/readme.md +++ b/readme.md @@ -1,14 +1,14 @@ # Data Structures and Algorithms -This repository is my self study guide for data structures and algorithms. I implement them from scratch in Kotlin +This repository is my self study guide for data structures and algorithms. I implement them from scratch in Kotlin & Java and then write unit test for them. What better way to learn a language and new concepts exists other than practicing them. ## Data Structures -* Linked List [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/data_structures/linked_list/LinkedList.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/data_structures/linked_list/LinkedListTest.kt) +* Linked List (Kotlin) [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/data_structures/linked_list/LinkedList.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/data_structures/linked_list/LinkedListTest.kt) ## Ranking -* BM25+ [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/ranking/bm25/Bm25Plus.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/ranking/bm25/BM25PlusTest.kt) | [Reference Paper](http://www.cs.otago.ac.nz/homepages/andrew/papers/2014-2.pdf) \ No newline at end of file +* BM25+ (Kotlin) [Implementation](https://github.com/dnutiu/dsa/blob/master/src/main/kotlin/ranking/bm25/Bm25Plus.kt) | [Tests](https://github.com/dnutiu/dsa/blob/master/src/test/kotlin/ranking/bm25/BM25PlusTest.kt) | [Reference Paper](http://www.cs.otago.ac.nz/homepages/andrew/papers/2014-2.pdf) \ No newline at end of file diff --git a/src/main/java/data_structures/linked_list/LinkedList.java b/src/main/java/data_structures/linked_list/LinkedList.java new file mode 100644 index 0000000..60989df --- /dev/null +++ b/src/main/java/data_structures/linked_list/LinkedList.java @@ -0,0 +1,271 @@ +package data_structures.linked_list; + + +import java.util.*; +import java.util.function.Consumer; + +/** + * LinkedList models a simple linked list. + * + * @param - The type of the list. + */ +public class LinkedList implements Iterable { + private Node head = null; + private Node tail = null; + private int size = 0; + + /** + * Returns an empty linked list. + * @return - Empty list. + * @param - The list type. + */ + public static LinkedList emptyList() { + return new LinkedList<>(); + } + + /** + * Builds a list of given values. + * @param values the values + * @return The linked list. + * @param - The value type + */ + @SafeVarargs + public static LinkedList of(T ...values) { + var list = new LinkedList(); + for (T value : values) { + list.append(value); + } + return list; + } + + /** + * Returns the head of the list. + */ + public Optional> getFirst() { + if (head == null) { + return Optional.empty(); + } + return Optional.of(head); + } + + /** + * Returns the tail of the list. + */ + public Optional> getLast() { + if (tail == null) { + return Optional.empty(); + } + return Optional.of(tail); + } + + /** + * Appends the value to the list. + * + * @param value - The value to append. + */ + public void append(T value) { + var newNode = Node.of(value); + if (head == null) { + head = newNode; + } else { + tail.setNext(newNode); + } + tail = newNode; + size += 1; + } + + /** + * Appends the values to the list. + * + * @param values - The value to append. + */ + @SafeVarargs + public final void append(T... values) { + for (T value : values) { + append(value); + } + } + + /** + * Insert value at position in the list. + * @param value - The value. + * @param position - The position. + */ + public void insert(T value, int position) { + if (position < 0 || position > size) { + throw new IllegalArgumentException("invalid position given"); + } + // handle insert last + if (position == size) { + append(value); + return; + } + // handle insert first + if (position == 0) { + var newNode = Node.of(value); + newNode.setNext(head); + head = newNode; + size += 1; + return; + } + // handle insert at position + var currentPosition = 0; + var currentNode = head; + Node previousNode = null; + // search for position to insert at + while (true) { + if (currentPosition == position) { + var newNode = Node.of(value); + newNode.setNext(currentNode); + previousNode.setNext(newNode); + size += 1; + break; + } + currentPosition += 1; + previousNode = currentNode; + currentNode = currentNode.getNext(); + } + } + + /** + * Returns the size of the list. + * + * @return The list size. + */ + public int getSize() { + return size; + } + + /** + * Returns the element at the given position. + * @param position - The position + * @return - The element. + */ + public Optional> get(int position) { + var currentPosition = 0; + var currentNode = head; + while (currentPosition <= position && currentNode != null) { + if (currentPosition == position) { + return Optional.of(currentNode); + } + currentNode = currentNode.getNext(); + currentPosition += 1; + } + return Optional.empty(); + } + + /** + * Deletes an element at the given position. + * @param position - The position + */ + public void delete(int position) { + if (position < 0 || position > size) { + throw new IllegalArgumentException("invalid position given"); + } + // Delete head + if (position == 0) { + head = head.getNext(); + // if the list size is 1 then we don't have elements anymore + if (size == 1) { + tail = null; + } + size -= 1; + return; + } + // delete anything from tail + var currentNode = head; + Node previousNode = null; + var currentPosition = 0; + while (true) { + previousNode = currentNode; + currentNode = currentNode.getNext(); + currentPosition += 1; + // we found element at position N which is about to get deleted + if (currentPosition == position) { + previousNode.setNext(currentNode.getNext()); + + // we deleted the tail, so we need to update tail var. + if (currentPosition == size - 1) { + tail = previousNode; + } + break; + } + } + size -= 1; + } + + /** + * Converts the LinkedList to a List. + * @return - The List. + */ + public List toList() { + var list = new ArrayList(); + + var currentNode = head; + while (currentNode != null) { + list.add(currentNode.value()); + currentNode = currentNode.getNext(); + } + + return list; + } + + /** + * Converts the LinkedList to an array. + * @return The array + */ + public T[] toArray() { + var array = new Object[getSize()]; + + var currentNode = head; + var index = 0; + while (currentNode != null) { + array[index] = currentNode.value(); + currentNode = currentNode.getNext(); + index += 1; + } + + return (T[]) array; + } + + /** + * Reverses the list in-place. + */ + public void reverse() { + if (size == 1) { + return; + } + tail = head; + var currentNode = head; + Node previousNode = null; + var next = head; + // we iterate through the list and updates next accordingly, until we reach the tail. + while (next != null) { + // save the next + next = currentNode.getNext(); + // current node's next will be set to previous node. + currentNode.setNext(previousNode); + // track previous node by settings it to current node + previousNode = currentNode; + // track the current node by setting it to next + currentNode = next; + } + // update the head + head = previousNode; + } + + @Override + public Iterator iterator() { + return new NodeIterator(this); + } + + @Override + public void forEach(Consumer action) { + Iterable.super.forEach(action); + } + + @Override + public Spliterator spliterator() { + return Iterable.super.spliterator(); + } +} diff --git a/src/main/java/data_structures/linked_list/Node.java b/src/main/java/data_structures/linked_list/Node.java new file mode 100644 index 0000000..f056b33 --- /dev/null +++ b/src/main/java/data_structures/linked_list/Node.java @@ -0,0 +1,55 @@ +package data_structures.linked_list; + +/** + * Node represents a linked list node. + * @param - The type of the Node's value. + */ +public class Node { + + final private T value; + private Node next; + + /** + * Constructs a Node instance. + * @param value - The value of the node. + * @param next - A pointer to the next node. + */ + private Node(T value, Node next) { + this.value = value; + this.next = next; + } + + /** + * Constructs a Node instance of a given value. + * @param value - The node value + * @return - The node instance. + * @param - The type of the node value. + */ + public static Node of(T value) { + return new Node<>(value, null); + } + + /** + * Sets the next pointer. + * @param next - The node. + */ + public void setNext(Node next) { + this.next = next; + } + + /** + * Gets the next node in the list. + * @return The node optional. + */ + public Node getNext() { + return next; + } + + /** + * Returns the value of the node. + * @return - The value. + */ + public T value() { + return this.value; + } +} diff --git a/src/main/java/data_structures/linked_list/NodeIterator.java b/src/main/java/data_structures/linked_list/NodeIterator.java new file mode 100644 index 0000000..a541d3b --- /dev/null +++ b/src/main/java/data_structures/linked_list/NodeIterator.java @@ -0,0 +1,23 @@ +package data_structures.linked_list; + +import java.util.Iterator; + +public class NodeIterator implements Iterator { + private Node currentNode; + + public NodeIterator(LinkedList linkedList) { + currentNode = linkedList.getFirst().orElse(null); + } + + @Override + public boolean hasNext() { + return currentNode != null; + } + + @Override + public T next() { + var node = currentNode; + currentNode = currentNode.getNext(); + return node.value(); + } +} diff --git a/src/main/java/data_structures/set/Set.java b/src/main/java/data_structures/set/Set.java new file mode 100644 index 0000000..1e9d68b --- /dev/null +++ b/src/main/java/data_structures/set/Set.java @@ -0,0 +1,92 @@ +package data_structures.set; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * Set is a set of items with simple operations. + * + * @param + */ +public class Set { + private final ArrayList[] hashTable; + private final int capacity; + private final ArrayList values; + + @SuppressWarnings("unchecked") + private Set(int capacity) { + hashTable = new ArrayList[capacity]; + values = new ArrayList<>(); + this.capacity = capacity; + } + + /** + * Insets an element into the set. + * + * @param element - The element. + */ + public void insert(T element) { + if (this.contains(element)) { + return; + } + var hash = getHash(element); + var list = hashTable[hash]; + if (list == null) { + hashTable[hash] = new ArrayList(); + hashTable[hash].add(element); + + values.add(hash); + } else { + list.add(element); + } + } + + /** + * Checks if the element is present in the set. + * + * @param element - The element. + */ + public boolean contains(T element) { + var list = hashTable[getHash(element)]; + if (list != null) { + return list.contains(element); + } else { + return false; + } + } + + /** + * Returns all the values from the set. + * + * @return A list of values. + */ + public List getValues() { + return this.values.stream().flatMap(hash -> this.hashTable[hash].stream()).collect(Collectors.toList()); + } + + private int getHash(T element) { + return Math.abs(element.hashCode()) % capacity; + } + + /** + * Constructs a new Set of given capacity. + * + * @param capacity - The capacity of the set. + * @param - The type of the set. + * @return - The set object. + */ + public static Set of(int capacity) { + return new Set<>(capacity); + } + + /** + * Constructs a new Set with a default capacity. + * + * @param - The type of the set. + * @return - The set object. + */ + public static Set construct() { + return new Set<>(1024); + } +} diff --git a/src/main/java/ranking/bm25/Bm25Plus.java b/src/main/java/ranking/bm25/Bm25Plus.java new file mode 100644 index 0000000..df4900b --- /dev/null +++ b/src/main/java/ranking/bm25/Bm25Plus.java @@ -0,0 +1,161 @@ +package ranking.bm25; + +import utils.Pair; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * Implements the BM25+ ranking algorithm. + */ +public class Bm25Plus { + /** + * The storage holds a mapping of document id -> document. + */ + final private HashMap storage = new HashMap<>(); + + /** + * The term frequency index holds a mapping of term -> list of documents in which the term occurs. + */ + final private HashMap> termFrequencyIndex = new HashMap<>(); + + /** + * The tuning parameters are used to tune the result of the algorithm. + *

+ * These values were taken directly from the paper. + */ + final private Double tuningParameterB = 0.3; + final private Double tuningParameterK1 = 1.6; + final private Double tuningParameterDelta = 0.7; + private Integer totalTokens = 0; + private Double meanDocumentLengths = 0.0; + + /** + * Returns the index size. + */ + public int getIndexSize() { + return storage.size(); + } + + /** + * Indexes a document + */ + public void index(Document document) { + // Tokenize the document, for educational purposes and simplicity we will consider tokens only + // the words delimited by a space and transform them into lowercase. + TokenizedDocument tokenizedDocument = TokenizedDocument.fromDocument(document); + + // Document does not exist in index + if (!storage.containsKey(document.documentId())) { + storage.put(document.documentId(), tokenizedDocument); + + totalTokens += tokenizedDocument.getTokens().size(); + meanDocumentLengths = (double) totalTokens / storage.size(); + + // Index all tokens + for (String token : tokenizedDocument.getTokens()) { + if (termFrequencyIndex.containsKey(token.toLowerCase())) { + termFrequencyIndex.get(token.toLowerCase()).add(document.documentId()); + } else { + Set documentIds = new HashSet<>(); + documentIds.add(document.documentId()); + termFrequencyIndex.put(token.toLowerCase(), documentIds); + } + } + } + } + + /** + * Indexes all documents. + * + * @param documents - The documents. + */ + public void indexAll(Document... documents) { + Arrays.stream(documents).forEach(this::index); + } + + /** + * Executes a term query against the index and ranks the results using bm25+. + * + * @param term - The term + */ + public List> termQuery(String term) { + Set documentIds = termFrequencyIndex.get(term.toLowerCase()); + if (documentIds == null) { + return Collections.emptyList(); + } + + List> results = new ArrayList<>(); + + for (Integer id : documentIds) { + TokenizedDocument document = storage.get(id); + if (document == null) { + continue; + } + double documentRsv = computeRsv(term.toLowerCase(), document); + results.add(new Pair<>(documentRsv, document.getDocument())); + } + + results.removeIf(entry -> !Double.isFinite(entry.first())); + results.sort((a, b) -> Double.compare(b.first(), a.first())); + + return results; + } + + /** + * Executes a terms query against the index and ranks the results using bm25+. + * + * @param terms - The terms + */ + public List> termsQuery(String... terms) { + var documentIds = Arrays.stream(terms).map(term -> termFrequencyIndex.getOrDefault(term.toLowerCase(), Set.of())).reduce((acc, value) -> { + acc.addAll(value); + return acc; + }).orElse(Set.of()); + + var results = documentIds.stream().map(i -> { + var document = storage.get(i); + + // Sum the RSV of each term. + double rsvSum = 0; + for (String term : terms) { + rsvSum += computeRsv(term, document); + } + + return new Pair<>(rsvSum, document.getDocument()); + }).collect(Collectors.toCollection(ArrayList::new)); + + results.removeIf(entry -> !Double.isFinite(entry.first())); + results.sort((a, b) -> Double.compare(b.first(), a.first())); + + return results; + } + + /** + * Computes the inverse document frequency for a given term. + *

+ * The IDF is defined as the total number of documents (N) divided by the documents that contain the term (dft). + * In the BM25+ version the IDF is the (N+1)/(dft) + */ + private double computeInverseDocumentFrequency(String term) { + int numberOfDocumentsContainingTheTerm = termFrequencyIndex.containsKey(term) ? termFrequencyIndex.get(term).size() : 0; + return (storage.size() + 1) / (double) numberOfDocumentsContainingTheTerm; + } + + /** + * Computes the RSV for the given term and document. + * The RSV (Retrieval Status Value) is computed for every document using the BM25+ formula from the paper. + */ + private double computeRsv(String term, TokenizedDocument document) { + double inverseDocumentFrequencyLog = Math.log10(computeInverseDocumentFrequency(term)); + double termOccurringInDocumentFrequency = (double) document.getTokens().stream() + .filter(token -> token.equals(term)) + .count(); + double documentLength = document.getTokens().size(); + + return inverseDocumentFrequencyLog * + (((tuningParameterK1 + 1) * termOccurringInDocumentFrequency) / + ((tuningParameterK1 * ((1 - tuningParameterB) + tuningParameterB * (documentLength / meanDocumentLengths))) + termOccurringInDocumentFrequency) + + tuningParameterDelta); + } +} diff --git a/src/main/java/ranking/bm25/Document.java b/src/main/java/ranking/bm25/Document.java new file mode 100644 index 0000000..c596371 --- /dev/null +++ b/src/main/java/ranking/bm25/Document.java @@ -0,0 +1,4 @@ +package ranking.bm25; + +public record Document(int documentId, String text) { +} diff --git a/src/main/java/ranking/bm25/TokenizedDocument.java b/src/main/java/ranking/bm25/TokenizedDocument.java new file mode 100644 index 0000000..0404c5f --- /dev/null +++ b/src/main/java/ranking/bm25/TokenizedDocument.java @@ -0,0 +1,56 @@ +package ranking.bm25; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +public class TokenizedDocument { + final private Document document; + final private List tokens = new ArrayList<>(); + + private TokenizedDocument(Document document) { + this.document = document; + this.tokens.addAll(Arrays.stream(document.text().split(" ")) + .map( + i -> i.chars() + .filter(Character::isLetterOrDigit) + .mapToObj(j -> String.valueOf((char) j)) + .collect(Collectors.joining()) + ) + .filter(i -> !i.isEmpty()).map(String::toLowerCase).collect(Collectors.toCollection(ArrayList::new))); + } + + public static TokenizedDocument fromDocument(Document document) { + return new TokenizedDocument(document); + } + + /** + * Returns the tokenized tokens + * @return tokens list + */ + public List getTokens() { + return tokens; + } + + /** + * Returns the document. + */ + public Document getDocument() { + return document; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TokenizedDocument that = (TokenizedDocument) o; + return Objects.equals(document, that.document); + } + + @Override + public int hashCode() { + return Objects.hashCode(document); + } +} diff --git a/src/main/java/utils/Pair.java b/src/main/java/utils/Pair.java new file mode 100644 index 0000000..e696103 --- /dev/null +++ b/src/main/java/utils/Pair.java @@ -0,0 +1,10 @@ +package utils; + +/** + * Represents a simple pair. + * @param first - The first item from the pair. + * @param second - The second item from the pair + * @param - The type of the first item. + * @param - The type of the second item. + */ +public record Pair(T first, V second) { } diff --git a/src/test/java/data_structures/linked_list/JavaLinkedListTest.java b/src/test/java/data_structures/linked_list/JavaLinkedListTest.java new file mode 100644 index 0000000..d664cb4 --- /dev/null +++ b/src/test/java/data_structures/linked_list/JavaLinkedListTest.java @@ -0,0 +1,253 @@ +package data_structures.linked_list; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +public class JavaLinkedListTest { + + + @Test + public void test_getFirst_EmptyList() { + // Setup + var linkedList = new LinkedList(); + + // Test + Assertions.assertTrue(linkedList.getFirst().isEmpty()); + } + + @Test + public void test_getFirst_empty() { + // Setup + var linkedList = LinkedList.emptyList(); + + // Test + Assertions.assertTrue(linkedList.getFirst().isEmpty()); + } + + @Test + public void test_getFirst() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value()); + } + + @Test + public void test_getFirst_of() { + // Setup + var linkedList = LinkedList.of("First", "Second", "Third"); + + // Test + Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value()); + } + + @Test + public void test_getLast() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value()); + } + + @Test + public void test_get() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + Assertions.assertEquals("First", linkedList.get(0).orElseThrow().value()); + Assertions.assertEquals("Second", linkedList.get(1).orElseThrow().value()); + Assertions.assertEquals("Third", linkedList.get(2).orElseThrow().value()); + Assertions.assertTrue(linkedList.get(3).isEmpty()); + } + + + @Test + public void test_deleteHead() { + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + linkedList.delete(0); + + // Assert + Assertions.assertEquals("Second", linkedList.getFirst().orElseThrow().value()); + Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value()); + } + + @Test + public void test_deleteMiddle() { + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + linkedList.delete(1); + + // Assert + Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value()); + Assertions.assertEquals("Third", linkedList.getLast().orElseThrow().value()); + } + + @Test + public void test_deleteLast() { + var linkedList = new LinkedList(); + linkedList.append("First"); + linkedList.append("Second"); + linkedList.append("Third"); + + // Test + linkedList.delete(2); + + // Assert + Assertions.assertEquals("First", linkedList.getFirst().orElseThrow().value()); + Assertions.assertEquals("Second", linkedList.getLast().orElseThrow().value()); + } + + @Test + public void test_deleteInvalidPosition() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First"); + + // Test + Assertions.assertThrows(IllegalArgumentException.class, () -> linkedList.delete(-1)); + Assertions.assertThrows(IllegalArgumentException.class, () -> linkedList.delete(99)); + } + + @Test + public void test_insertFirst() { + // Setup + var linkedList = new LinkedList(); + + // Test + linkedList.insert("First", 0); + + // Assert + Assertions.assertEquals(1, linkedList.getSize()); + Assertions.assertEquals("First", linkedList.get(0).get().value()); + } + + @Test + public void test_insertLast() { + // Setup + var linkedList = new LinkedList(); + linkedList.insert("First", 0); + + // Test + linkedList.insert("Second", linkedList.getSize()); + + // Assert + Assertions.assertEquals(2, linkedList.getSize()); + Assertions.assertEquals("Second", linkedList.get(1).get().value()); + } + + @Test + public void test_insertMiddle() { + // Setup + var linkedList = new LinkedList(); + linkedList.insert("First", 0); + linkedList.insert("Last", linkedList.getSize()); + + // Test + linkedList.insert("Middle", linkedList.getSize() - 1); + + // Assert + Assertions.assertEquals(3, linkedList.getSize()); + Assertions.assertEquals("First", linkedList.get(0).get().value()); + Assertions.assertEquals("Middle", linkedList.get(1).get().value()); + Assertions.assertEquals("Last", linkedList.get(2).get().value()); + } + + @Test + public void test_toList() { + // Given + var list = LinkedList.of("One", "Two", "Three"); + + // Then + Assertions.assertEquals(List.of("One", "Two", "Three"), list.toList()); + } + + @Test + public void test_toArray() { + // Given + var list = LinkedList.of("One", "Two", "Three"); + + // Then + Assertions.assertArrayEquals(List.of("One", "Two", "Three").toArray(), list.toArray()); + } + + + @Test + public void test_reverseOneElement() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First"); + + // Test + linkedList.reverse(); + + // Assert + Assertions.assertEquals(List.of("First"), linkedList.toList()); + } + + @Test + public void test_reverseTwoElement() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First", "Second"); + + // Test + linkedList.reverse(); + + // Assert + Assertions.assertEquals(List.of("Second", "First"), linkedList.toList()); + } + + @Test + public void test_reverseFiveElement() { + // Setup + var linkedList = new LinkedList(); + linkedList.append("First", "Second", "Third", "Fourth"); + + // Test + linkedList.reverse(); + + // Assert + Assertions.assertEquals(List.of("Fourth", "Third", "Second", "First"), linkedList.toList()); + } + + @Test + public void test_linkedList_iteration() { + // Setup + var linkedList = LinkedList.of("First", "Second", "Third", "Fourth"); + Iterable iterable = linkedList::iterator; + Stream targetStream = StreamSupport.stream(iterable.spliterator(), false); + + // Test + var resultingList = targetStream.map(i -> String.format("%sX", i)).collect(Collectors.toCollection(ArrayList::new)); + + // Assert + Assertions.assertEquals(List.of("FirstX", "SecondX", "ThirdX", "FourthX"), resultingList); + } +} diff --git a/src/test/java/data_structures/set/SetTest.java b/src/test/java/data_structures/set/SetTest.java new file mode 100644 index 0000000..1cafd2f --- /dev/null +++ b/src/test/java/data_structures/set/SetTest.java @@ -0,0 +1,116 @@ +package data_structures.set; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Objects; + +class TestPerson { + private final String name; + + public TestPerson(String name) { + this.name = name; + } + + @Override + public int hashCode() { + if (name.equals("Denis")) { + return 2; + } + return 1; + } + + @Override + public String toString() { + return "TestPerson{" + + "name='" + name + '\'' + + '}'; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + TestPerson that = (TestPerson) o; + return Objects.equals(name, that.name); + } +} + + +public class SetTest { + @Test + public void testInsertElement() { + // Given + var set = Set.construct(); + + // Then + set.insert("SomeElement"); + } + + @Test + public void testContainsElementTrue() { + // Setup + var set = Set.construct(); + + // Test + set.insert("SomeElement"); + + // Assert + Assertions.assertTrue(set.contains("SomeElement")); + } + + @Test + public void testContainsElementFalse() { + // Setup + var set = Set.construct(); + + // Test + set.insert("SomeElement"); + + // Assert + Assertions.assertFalse(set.contains("AnotherElement")); + } + + @Test + public void testContainsElementWithCollision() { + // Setup + var set = Set.construct(); + + // Test + set.insert(new TestPerson("Denis")); + set.insert(new TestPerson("Alex")); + + // Assert + Assertions.assertTrue(set.contains(new TestPerson("Alex"))); + Assertions.assertFalse(set.contains(new TestPerson("Paul"))); + } + + @Test + public void testGetValues() { + // Setup + var set = Set.construct(); + + // Test + set.insert("SomeElement"); + set.insert("AnotherElement"); + + // Assert + Assertions.assertEquals(List.of("SomeElement", "AnotherElement"), set.getValues()); + } + + @Test + public void testGetValuesWithCollision() { + // Setup + var set = Set.construct(); + + // Test + set.insert(new TestPerson("Denis")); + set.insert(new TestPerson("Alex")); + set.insert(new TestPerson("Paul")); + + // Assert + Assertions.assertEquals(List.of(new TestPerson("Denis"), new TestPerson("Alex"), new TestPerson("Paul")), set.getValues()); + + } +} diff --git a/src/test/java/ranking/Bm25PlusTest.java b/src/test/java/ranking/Bm25PlusTest.java new file mode 100644 index 0000000..4f79838 --- /dev/null +++ b/src/test/java/ranking/Bm25PlusTest.java @@ -0,0 +1,90 @@ +package ranking; + +import ranking.bm25.Bm25Plus; +import ranking.bm25.Document; +import utils.Pair; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.*; + +public class Bm25PlusTest { + @Test + public void test_index_and_indexSize() { + // Setup + Bm25Plus bm25Plus = new Bm25Plus(); + + Document document1 = new Document(1, "Ana are mere"); + Document document2 = new Document(2, "Ana Ana Ana Ana Ana Ana Ana Ana"); + + // Test + bm25Plus.indexAll(document1, document2); + + // Assert + Assertions.assertEquals(2, bm25Plus.getIndexSize()); + } + + @Test + public void test_termQuery() { + // Given + Bm25Plus bm25Plus = new Bm25Plus(); + + Document document1 = new Document(1, "Ana are mere"); + Document document2 = new Document(2, "Ana Ana Ana Ana Ana Ana Ana Ana"); + bm25Plus.indexAll(document1, document2); + + // Then + Assertions.assertEquals( + new ArrayList<>(Arrays.asList( + new Pair<>(0.4963164745976794, document2), + new Pair<>(0.3154856374073922, document1) + )), + bm25Plus.termQuery("Ana") + ); + + Assertions.assertEquals( + new ArrayList<>(List.of( + new Pair<>(0.8548118968145402, document1) + )), + bm25Plus.termQuery("mere") + ); + + Assertions.assertEquals( + Collections.emptyList(), + bm25Plus.termQuery("batman") + ); + + Assertions.assertEquals( + new ArrayList<>(Arrays.asList( + new Pair<>(0.4963164745976794, document2), + new Pair<>(0.3154856374073922, document1) + )), + bm25Plus.termQuery("ana") + ); + } + + @Test + public void test_termsQuery() { + // Given + Bm25Plus bm25Plus = new Bm25Plus(); + + Document document1 = new Document(1, "A linked list is a fundamental data structure which consists of Nodes that are connected to each other."); + Document document2 = new Document(2, "The Linked List data structure permits the storage of data in an efficient manner."); + Document document3 = new Document(3, "The space and time complexity of the linked list operations depends on the implementation."); + Document document4 = new Document(4, "The operations that take O(N) time takes this much because you have to traverse the list’s for at least N nodes in order to perform it successfully. On the other hand, operations that take O(1) time do not require any traversals because the list holds pointers to the head first Node and tail last Node."); + bm25Plus.indexAll(document1, document2, document3, document4); + + + // Then + Assertions.assertEquals( + new ArrayList<>(List.of( + new Pair<>(1.5977607472650388, document3), + new Pair<>(0.8361444686814765, document2), + new Pair<>(0.8296222299960145, document1), + new Pair<>(0.704549447544239, document4) + )), + bm25Plus.termsQuery("linked", "list", "complexity") + ); + } +} +