package dev.langchain4j.data.document.splitter;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

/* loaded from: input_file:dev/langchain4j/data/document/splitter/DocumentByRegexSplitterTest.class */
class DocumentByRegexSplitterTest {
    DocumentByRegexSplitterTest() {
    }

    @ValueSource(strings = {" ", ",", "\n", "\n\n"})
    @ParameterizedTest
    void should_split_by(String str) {
        int i = 5;
        List split = new DocumentByRegexSplitter(str, str, 5, 0).split(Document.from(String.format("one%stwo%sthree", str, str), Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("one", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("two", Metadata.metadata("index", "1").put("document", "0")), TextSegment.textSegment("three", Metadata.metadata("index", "2").put("document", "0"))});
    }

    @Test
    void should_fit_multiple_parts_into_the_same_segment() {
        int i = 10;
        List split = new DocumentByRegexSplitter(" ", "\n", 10, 0).split(Document.from("one two three", Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("one\ntwo", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("three", Metadata.metadata("index", "1").put("document", "0"))});
    }

    @Test
    void should_split_part_into_sub_parts_if_it_does_not_fit_into_segment() {
        int i = 15;
        List split = new DocumentByRegexSplitter("\n", "\n", 15, 0, new DocumentByWordSplitter(15, 0)).split(Document.from("This is a first line.\nThis is a second line.\n\nThis is a third line.", Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("This is a first", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("line.", Metadata.metadata("index", "1").put("document", "0")), TextSegment.textSegment("This is a", Metadata.metadata("index", "2").put("document", "0")), TextSegment.textSegment("second line.", Metadata.metadata("index", "3").put("document", "0")), TextSegment.textSegment("This is a third", Metadata.metadata("index", "4").put("document", "0")), TextSegment.textSegment("line.", Metadata.metadata("index", "5").put("document", "0"))});
    }
}
