/*
 * Decompiled with CFR 0.152.
 */
package dev.langchain4j.data.document.splitter;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.Metadata;
import dev.langchain4j.data.document.splitter.SegmentBuilder;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.internal.Utils;
import dev.langchain4j.internal.ValidationUtils;
import dev.langchain4j.model.Tokenizer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

public abstract class HierarchicalDocumentSplitter
implements DocumentSplitter {
    private static final String INDEX = "index";
    protected final int maxSegmentSize;
    protected final Tokenizer tokenizer;
    protected final DocumentSplitter subSplitter;

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInChars) {
        this(maxSegmentSizeInChars, null, null);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInChars, DocumentSplitter subSplitter) {
        this(maxSegmentSizeInChars, null, subSplitter);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInTokens, Tokenizer tokenizer) {
        this(maxSegmentSizeInTokens, tokenizer, null);
    }

    protected HierarchicalDocumentSplitter(int maxSegmentSizeInTokens, Tokenizer tokenizer, DocumentSplitter subSplitter) {
        this.maxSegmentSize = ValidationUtils.ensureGreaterThanZero((Integer)maxSegmentSizeInTokens, (String)"maxSegmentSize");
        this.tokenizer = tokenizer;
        this.subSplitter = subSplitter == null ? this.defaultSubSplitter() : subSplitter;
    }

    protected abstract String[] split(String var1);

    protected abstract String joinDelimiter();

    protected abstract DocumentSplitter defaultSubSplitter();

    public List<TextSegment> split(Document document) {
        String[] parts;
        ValidationUtils.ensureNotNull((Object)document, (String)"document");
        ArrayList<TextSegment> segments = new ArrayList<TextSegment>();
        SegmentBuilder segmentBuilder = new SegmentBuilder(this.maxSegmentSize, this::sizeOf, this.joinDelimiter());
        AtomicInteger index = new AtomicInteger(0);
        for (String part : parts = this.split(document.text())) {
            if (segmentBuilder.hasSpaceFor(part)) {
                segmentBuilder.append(part);
                continue;
            }
            if (segmentBuilder.isNotEmpty()) {
                segments.add(HierarchicalDocumentSplitter.createSegment(segmentBuilder.build(), document, index.getAndIncrement()));
                segmentBuilder.reset();
            }
            if (segmentBuilder.hasSpaceFor(part)) {
                segmentBuilder.append(part);
                continue;
            }
            if (this.subSplitter == null) {
                throw new RuntimeException(String.format("The text \"%s...\" (%s %s long) doesn't fit into the maximum segment size (%s %s), and there is no subSplitter defined to split it further.", Utils.firstChars((String)part, (int)30), this.sizeOf(part), this.tokenizer == null ? "characters" : "tokens", this.maxSegmentSize, this.tokenizer == null ? "characters" : "tokens"));
            }
            for (TextSegment segment : this.subSplitter.split(Document.from((String)part))) {
                segments.add(HierarchicalDocumentSplitter.createSegment(segment.text(), document, index.getAndIncrement()));
                segmentBuilder.reset();
            }
        }
        if (segmentBuilder.isNotEmpty()) {
            segments.add(HierarchicalDocumentSplitter.createSegment(segmentBuilder.build(), document, index.getAndIncrement()));
            segmentBuilder.reset();
        }
        return segments;
    }

    private int sizeOf(String text) {
        if (this.tokenizer != null) {
            return this.tokenizer.estimateTokenCountInText(text);
        }
        return text.length();
    }

    private static TextSegment createSegment(String text, Document document, int index) {
        Metadata metadata = document.metadata().copy().add(INDEX, (Object)index);
        return TextSegment.from((String)text, (Metadata)metadata);
    }
}

