package dev.langchain4j.data.document.splitter;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.openai.OpenAiTokenizer;
import java.util.List;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:dev/langchain4j/data/document/splitter/DocumentBySentenceSplitterTest.class */
class DocumentBySentenceSplitterTest {
    DocumentBySentenceSplitterTest() {
    }

    @Test
    void should_split_into_segments_with_one_sentence_per_segment() {
        int i = 30;
        Assertions.assertThat("This is a first sentence.").hasSizeLessThan(30);
        Assertions.assertThat("This is a second sentence.").hasSizeLessThan(30);
        Assertions.assertThat("This is a first sentence." + " " + "This is a second sentence.").hasSizeGreaterThan(30);
        List split = new DocumentBySentenceSplitter(30, 0).split(Document.from(String.format(" %s  %s ", "This is a first sentence.", "This is a second sentence."), Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("This is a first sentence.", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("This is a second sentence.", Metadata.metadata("index", "1").put("document", "0"))});
    }

    @Test
    void should_split_into_segments_with_multiple_sentences_per_segment() {
        int i = 60;
        Assertions.assertThat("This is a first sentence." + " " + "This is a second sentence.").hasSizeLessThan(60);
        Assertions.assertThat("This is a first sentence." + " " + "This is a second sentence." + " " + "This is a third sentence.").hasSizeGreaterThan(60);
        List split = new DocumentBySentenceSplitter(60, 0).split(Document.from(String.format(" %s  %s  %s ", "This is a first sentence.", "This is a second sentence.", "This is a third sentence."), Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("This is a first sentence." + " " + "This is a second sentence.", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("This is a third sentence.", Metadata.metadata("index", "1").put("document", "0"))});
    }

    @Test
    void should_split_sentence_if_it_does_not_fit_into_segment() {
        int i = 40;
        Assertions.assertThat("This is a short sentence.").hasSizeLessThan(40);
        Assertions.assertThat("This is a very long sentence that does not fit into segment.").hasSizeGreaterThan(40);
        Assertions.assertThat("This is another short sentence.").hasSizeLessThan(40);
        List split = new DocumentBySentenceSplitter(40, 0).split(Document.from(String.format(" %s  %s  %s ", "This is a short sentence.", "This is a very long sentence that does not fit into segment.", "This is another short sentence."), Metadata.metadata("document", "0")));
        split.forEach(textSegment -> {
            Assertions.assertThat(textSegment.text().length()).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("This is a short sentence.", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("This is a very long sentence that does", Metadata.metadata("index", "1").put("document", "0")), TextSegment.textSegment("not fit into segment.", Metadata.metadata("index", "2").put("document", "0")), TextSegment.textSegment("This is another short sentence.", Metadata.metadata("index", "3").put("document", "0"))});
    }

    @Test
    void should_split_sample_text() {
        Document from = Document.from(String.format("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", "In a sleepy hamlet, where the trees towered high, there lived a young boy named Elias.", "He loved exploring.", "Fields of gold stretched as far as the eye could see, punctuated by tiny blossoms.", "The wind whispered.", "Sometimes, it would carry fragrances from the neighboring towns, which included chocolate, freshly baked bread, and the salty tang of", "the sea.", "In the middle of the town, a single lamppost stood.", "Cats lounged beneath it, stretching languidly in the dappled sunlight.", "Elias had a dream: to build a flying machine.", "Some days, it felt impossible.", "Yet, every evening, he would pull out his sketches, tinkering and toiling away.", "There was a resilience in his spirit.", "Birds often stopped to watch.", "Curiosity is the spark of invention.", "He believed.", "And one day, with the town gathered around him, Elias soared.", "The horizon awaited.", "Life is full of surprises.", "Embrace them."), Metadata.metadata("document", "0"));
        int i = 26;
        OpenAiTokenizer openAiTokenizer = new OpenAiTokenizer("gpt-3.5-turbo");
        List split = new DocumentBySentenceSplitter(26, 0, openAiTokenizer).split(from);
        split.forEach(textSegment -> {
            Assertions.assertThat(openAiTokenizer.estimateTokenCountInText(textSegment.text())).isLessThanOrEqualTo(i);
        });
        Assertions.assertThat(split).containsExactly(new TextSegment[]{TextSegment.textSegment("In a sleepy hamlet, where the trees towered high, there lived a young boy named Elias." + " " + "He loved exploring.", Metadata.metadata("index", "0").put("document", "0")), TextSegment.textSegment("Fields of gold stretched as far as the eye could see, punctuated by tiny blossoms." + " " + "The wind whispered.", Metadata.metadata("index", "1").put("document", "0")), TextSegment.textSegment("Sometimes, it would carry fragrances from the neighboring towns, which included chocolate, freshly baked bread, and the salty tang of", Metadata.metadata("index", "2").put("document", "0")), TextSegment.textSegment("the sea.", Metadata.metadata("index", "3").put("document", "0")), TextSegment.textSegment("In the middle of the town, a single lamppost stood.", Metadata.metadata("index", "4").put("document", "0")), TextSegment.textSegment("Cats lounged beneath it, stretching languidly in the dappled sunlight.", Metadata.metadata("index", "5").put("document", "0")), TextSegment.textSegment("Elias had a dream: to build a flying machine." + " " + "Some days, it felt impossible.", Metadata.metadata("index", "6").put("document", "0")), TextSegment.textSegment("Yet, every evening, he would pull out his sketches, tinkering and toiling away.", Metadata.metadata("index", "7").put("document", "0")), TextSegment.textSegment("There was a resilience in his spirit." + " " + "Birds often stopped to watch." + " " + "Curiosity is the spark of invention." + " " + "He believed.", Metadata.metadata("index", "8").put("document", "0")), TextSegment.textSegment("And one day, with the town gathered around him, Elias soared." + " " + "The horizon awaited." + " " + "Life is full of surprises.", Metadata.metadata("index", "9").put("document", "0")), TextSegment.textSegment("Embrace them.", Metadata.metadata("index", "10").put("document", "0"))});
    }
}
