diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0c16d1afa53d24841833d143bc0dc4b1f9fda5cb..0b825bb3b47deef45fcf9c58ef2b9435eb3582dc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,3 +40,4 @@ include: - local: "/java-diamond/.gitlab-ci.yml" - local: "/diamond-ts/.gitlab-ci.yml" - local: "/java-memoizers/.gitlab-ci.yml" + - local: "/markov-chain/.gitlab-ci.yml" diff --git a/markov-chain/.gitlab-ci.yml b/markov-chain/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..47fe16779864979df896be2533293211a91e17a7 --- /dev/null +++ b/markov-chain/.gitlab-ci.yml @@ -0,0 +1,11 @@ +package-markov-chain: + variables: + PROJECT_FOLDER: "markov-chain" + extends: .java + only: + refs: + - master + - merge_requests + changes: + - ".gitlab-common-ci.yml" + - "markov-chain/**/*" diff --git a/markov-chain/README.md b/markov-chain/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ba4c301cb989f7880da67270cb77e1b7541a7264 --- /dev/null +++ b/markov-chain/README.md @@ -0,0 +1,9 @@ +# Markov Chain + +Découverte du kata [Markov Chain](https://codingdojo.org/kata/MarkovChain/) + +- **Auteurs** : Anthony REY et Colin DAMON +- **Date** : 06/09/2021 +- **Langage** : Java +- **Niveau** : Moyen +- **Replay** : [Twitch](https://www.twitch.tv/videos/1140906031) diff --git a/markov-chain/pom.xml b/markov-chain/pom.xml new file mode 100644 index 0000000000000000000000000000000000000000..dc53032376d0c3234c8646264f605ee8d0e16885 --- /dev/null +++ b/markov-chain/pom.xml @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <version>1.0.0</version> + <groupId>fr.ippon.kata</groupId> + <artifactId>java-parent</artifactId> + <relativePath>../java-parent</relativePath> + </parent> + + <version>1.0.0-SNAPSHOT</version> + <artifactId>markov-chain</artifactId> + + <name>MarkovChain</name> + + <developers> + <developer> + <email>arey@ippon.fr</email> + <name>Anthony REY</name> + </developer> + <developer> + <email>cdamon@ippon.fr</email> + <name>Colin DAMON</name> + </developer> + </developers> +</project> diff --git a/markov-chain/src/main/java/fr/ippon/markov/MarkovChain.java b/markov-chain/src/main/java/fr/ippon/markov/MarkovChain.java new file mode 100644 index 0000000000000000000000000000000000000000..439277f7a9f20cb7b7e06ed36a93ddebd3fd855b --- /dev/null +++ b/markov-chain/src/main/java/fr/ippon/markov/MarkovChain.java @@ -0,0 +1,39 @@ +package fr.ippon.markov; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class MarkovChain { + + private final Map<String, WordStats> stats = new ConcurrentHashMap<>(); + + public void learn(String text) { + String[] words = text.split(" "); + for (int i = 0; i < words.length - 1; i++) { + String current = words[i]; + String next = words[i + 1]; + + append(current, next); + } + } + + private void append(String current, String next) { + stats + .computeIfAbsent(current.toLowerCase(), + this::newWordStats) + .add(next); + } + + private WordStats newWordStats(String dummy) { + return new WordStats(); + } + + public WordStats stats(String word) { + return stats.getOrDefault(word.toLowerCase(), + new WordStats()); + } + + public Map<String, WordStats> stats() { + return stats; + } +} diff --git a/markov-chain/src/main/java/fr/ippon/markov/Stat.java b/markov-chain/src/main/java/fr/ippon/markov/Stat.java new file mode 100644 index 0000000000000000000000000000000000000000..92c0afaa130d7201e35da6a6fc9c351ee8c192cd --- /dev/null +++ b/markov-chain/src/main/java/fr/ippon/markov/Stat.java @@ -0,0 +1,5 @@ +package fr.ippon.markov; + +public record Stat(String word, float percentage) { + +} diff --git a/markov-chain/src/main/java/fr/ippon/markov/WordStats.java b/markov-chain/src/main/java/fr/ippon/markov/WordStats.java new file mode 100644 index 0000000000000000000000000000000000000000..a65493972e187739f21cb9b6cd18124b668b3dcb --- /dev/null +++ b/markov-chain/src/main/java/fr/ippon/markov/WordStats.java @@ -0,0 +1,28 @@ +package fr.ippon.markov; + +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +public class WordStats { + + private final AtomicInteger size = new AtomicInteger(); + private final Map<String, AtomicInteger> stats = new ConcurrentHashMap<>(); + + public void add(String word) { + size.incrementAndGet(); + stats.computeIfAbsent(word, key -> new AtomicInteger()) + .incrementAndGet(); + } + + public Collection<Stat> get() { + return stats.entrySet() + .stream() + .map(entry -> new Stat(entry.getKey(), + entry.getValue() + .floatValue() / size.floatValue())) + .toList(); + } + +} diff --git a/markov-chain/src/test/java/fr/ippon/markov/MarkovChainTest.java b/markov-chain/src/test/java/fr/ippon/markov/MarkovChainTest.java new file mode 100644 index 0000000000000000000000000000000000000000..8c9ff21c3353d6cb41f89e2ab664a3915e0f7dce --- /dev/null +++ b/markov-chain/src/test/java/fr/ippon/markov/MarkovChainTest.java @@ -0,0 +1,106 @@ +package fr.ippon.markov; + +import static org.assertj.core.api.Assertions.*; + +import java.util.Map; + +import org.junit.jupiter.api.Test; + +class MarkovChainTest { + private final MarkovChain markov = new MarkovChain(); + + @Test + void shouldGetStatsForOneWord() { + markov.learn("Les"); + + assertThat(markov.stats("Les") + .get()).isEmpty(); + } + + @Test + void shoulGetStatsForTwoWords() { + markov.learn("Les hommes"); + + assertThat(markov.stats("Les") + .get()).containsExactly(stat("hommes", 1f)); + } + + @Test + void shoulGetStatsWithMultipleFollowers() { + markov.learn("Les hommes les plus grands"); + + assertThat(markov.stats("Les") + .get()).containsExactly(stat("hommes", 0.5f), + new Stat("plus", 0.5f)); + assertThat(markov.stats("hommes") + .get()).containsExactly(stat("les", 1f)); + } + + @Test + void shoulGetStats() { + markov.learn("Les hommes les plus grands"); + + assertThatStats(markov.stats()).hasWord("les") + .withStat("hommes", 0.5f) + .withStat("plus", 0.5f) + .and() + .hasWord("hommes") + .withStat("les", 1f); + } + + private static Stat stat(String word, float stat) { + return new Stat(word, stat); + } + + private static StatsAsserter assertThatStats( + Map<String, WordStats> stats) { + return new StatsAsserter(stats); + } + + private static class StatsAsserter { + + private final Map<String, WordStats> stats; + + public StatsAsserter(Map<String, WordStats> stats) { + this.stats = stats; + } + + public WordStatAsserter hasWord(String word) { + assertThat(stats).containsKey(word); + + return new WordStatAsserter(stats.get(word), this); + } + + } + + private static class WordStatAsserter { + + private final WordStats wordStats; + private final StatsAsserter source; + + public WordStatAsserter(WordStats wordStats, + StatsAsserter source) { + this.wordStats = wordStats; + this.source = source; + } + + public WordStatAsserter withStat(String word, + float percent) { + float result = wordStats.get() + .stream() + .filter(stat -> stat.word() + .equals(word)) + .findFirst() + .orElseThrow(AssertionError::new) + .percentage(); + + assertThat(result).isEqualTo(percent); + + return this; + } + + public StatsAsserter and() { + return source; + } + } +} diff --git a/readme.md b/readme.md index 92959e1494258b38e38dae96f9d23cff0bd340da..26510f9806ec1c9bf6927e150a0c9051ba9b2962 100644 --- a/readme.md +++ b/readme.md @@ -44,6 +44,7 @@ Un kata de code est un petit exercice pensé pour s'entrainer jusqu'à maitriser - [Tennis Refactoring kata](/tennis/refactoring) - [Puzzles](java-puzzles) - [Diamond](java-diamond) +- [Markov chain](markov-chain) ### Énervé