package de.duehl.basics.text.extract;

import static org.junit.Assert.*;

import java.util.List;

import org.junit.Test;

public class WordExtractorTest {

    @Test
    public void extract1() {
        String text = "Drei Bananen liegen auf dem Tisch.";

        WordExtractor extractor = new WordExtractor(text);
        List<String> words = extractor.getWords();
        assertEquals(6, words.size());

        //printAsserts(words);

        assertEquals("auf", words.get(0));
        assertEquals("Bananen", words.get(1));
        assertEquals("dem", words.get(2));
        assertEquals("Drei", words.get(3));
        assertEquals("liegen", words.get(4));
        assertEquals("Tisch", words.get(5));
    }

    @Test
    public void extractNoDoubleWords() {
        String text = "Die drei Bananen liegen auf drei Tischen.";

        WordExtractor extractor = new WordExtractor(text);
        List<String> words = extractor.getWords();
        assertEquals(6, words.size());

        //printAsserts(words);

        assertEquals("auf", words.get(0));
        assertEquals("Bananen", words.get(1));
        assertEquals("Die", words.get(2));
        assertEquals("drei", words.get(3));
        assertEquals("liegen", words.get(4));
        assertEquals("Tischen", words.get(5));
    }

    @Test
    public void extractNoRubbish() {
        String text = "@Account christian.duehl@heinsundpartner.de bef- -angen.";

        WordExtractor extractor = new WordExtractor(text);
        List<String> words = extractor.getWords();
        assertEquals(2, words.size());

        //printAsserts(words);

        assertEquals("christian", words.get(0));
        assertEquals("de", words.get(1));
    }

    @Test
    public void extractLongText() {
        String text = "Sie musste über diese Fragen schlafen. Doch heute war schon die zehnte "
                + "Regenhand. Es war einfach zum aus der Haut fahren! Der 13. der Tag ihres "
                + "vierzigsten Geburtstags, der wie eine graue Wand immer näher kam, war kaum "
                + "mehr zu leugnen. Schon am kommenden Samstag würde es so weit sein. Und es gab "
                + "nicht mal jemanden, der ihr gratulieren oder mit ihr feiern würde. Falls sie "
                + "mit Thomas noch verbunden war, was sie sehr hoffte, so war dies doch seit "
                + "längerem eine eher theoretische Angelegenheit, da sie sich ganz praktisch sehr "
                + "lange nicht mehr gesehen hatten.";

        WordExtractor extractor = new WordExtractor(text);
        List<String> words = extractor.getWords();
        assertEquals(77, words.size());

        //printAsserts(words);

        assertEquals("am", words.get(0));
        assertEquals("Angelegenheit", words.get(1));
        assertEquals("aus", words.get(2));
        assertEquals("da", words.get(3));
        assertEquals("Der", words.get(4));
        assertEquals("der", words.get(5));
        assertEquals("die", words.get(6));
        assertEquals("dies", words.get(7));
        assertEquals("diese", words.get(8));
        assertEquals("Doch", words.get(9));
        assertEquals("doch", words.get(10));
        assertEquals("eher", words.get(11));
        assertEquals("eine", words.get(12));
        assertEquals("einfach", words.get(13));
        assertEquals("Es", words.get(14));
        assertEquals("es", words.get(15));
        assertEquals("fahren", words.get(16));
        assertEquals("Falls", words.get(17));
        assertEquals("feiern", words.get(18));
        assertEquals("Fragen", words.get(19));
        assertEquals("gab", words.get(20));
        assertEquals("ganz", words.get(21));
        assertEquals("Geburtstags", words.get(22));
        assertEquals("gesehen", words.get(23));
        assertEquals("gratulieren", words.get(24));
        assertEquals("graue", words.get(25));
        assertEquals("hatten", words.get(26));
        assertEquals("Haut", words.get(27));
        assertEquals("heute", words.get(28));
        assertEquals("hoffte", words.get(29));
        assertEquals("ihr", words.get(30));
        assertEquals("ihres", words.get(31));
        assertEquals("immer", words.get(32));
        assertEquals("jemanden", words.get(33));
        assertEquals("kam", words.get(34));
        assertEquals("kaum", words.get(35));
        assertEquals("kommenden", words.get(36));
        assertEquals("längerem", words.get(37));
        assertEquals("lange", words.get(38));
        assertEquals("leugnen", words.get(39));
        assertEquals("mal", words.get(40));
        assertEquals("mehr", words.get(41));
        assertEquals("mit", words.get(42));
        assertEquals("musste", words.get(43));
        assertEquals("näher", words.get(44));
        assertEquals("nicht", words.get(45));
        assertEquals("noch", words.get(46));
        assertEquals("oder", words.get(47));
        assertEquals("praktisch", words.get(48));
        assertEquals("Regenhand", words.get(49));
        assertEquals("Samstag", words.get(50));
        assertEquals("schlafen", words.get(51));
        assertEquals("Schon", words.get(52));
        assertEquals("schon", words.get(53));
        assertEquals("sehr", words.get(54));
        assertEquals("sein", words.get(55));
        assertEquals("seit", words.get(56));
        assertEquals("sich", words.get(57));
        assertEquals("Sie", words.get(58));
        assertEquals("sie", words.get(59));
        assertEquals("so", words.get(60));
        assertEquals("Tag", words.get(61));
        assertEquals("theoretische", words.get(62));
        assertEquals("Thomas", words.get(63));
        assertEquals("über", words.get(64));
        assertEquals("Und", words.get(65));
        assertEquals("verbunden", words.get(66));
        assertEquals("vierzigsten", words.get(67));
        assertEquals("Wand", words.get(68));
        assertEquals("war", words.get(69));
        assertEquals("was", words.get(70));
        assertEquals("weit", words.get(71));
        assertEquals("wie", words.get(72));
        assertEquals("würde", words.get(73));
        assertEquals("zehnte", words.get(74));
        assertEquals("zu", words.get(75));
        assertEquals("zum", words.get(76));
    }

    @SuppressWarnings("unused")
    private void printAsserts(List<String> words) {
        for (int index = 0; index < words.size(); ++index) {
            String word = words.get(index);
            System.out.println("assertEquals(\"" + word + "\", words.get(" + index + "));");
        }
    }

}
