package com.github.tjake.jlama.model.gemma;

import com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer;
import java.nio.file.Path;
import java.util.Optional;

/* loaded from: input_file:com/github/tjake/jlama/model/gemma/GemmaTokenizer.class */
public class GemmaTokenizer extends BPETokenizer {
    static final String SPIECE_UNDERLINE = "▁";
    private final int byteFallbackEncodingOffset;

    public GemmaTokenizer(Path path) {
        super(path);
        this.byteFallbackEncodingOffset = 217;
    }

    @Override // com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer
    protected long encodeCharacterAsToken(byte b) {
        return Byte.toUnsignedLong(b) + this.byteFallbackEncodingOffset;
    }

    @Override // com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer
    protected Optional<Character> maybeDecodeTokenAsCharacter(long j) {
        return (!this.model.byteFallback || j < ((long) this.byteFallbackEncodingOffset) || j >= ((long) (256 + this.byteFallbackEncodingOffset))) ? Optional.empty() : Optional.of(Character.valueOf((char) (j - this.byteFallbackEncodingOffset)));
    }

    @Override // com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer
    protected String preProcess(String str) {
        return str.replace(" ", SPIECE_UNDERLINE);
    }

    @Override // com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer
    protected String postProcess(String str) {
        return str.stripLeading();
    }

    @Override // com.github.tjake.jlama.safetensors.tokenizer.BPETokenizer
    protected String postProcessToken(String str) {
        if (str == null) {
            str = this.model.unkToken;
        }
        return str.replaceAll("</?s>", "").replaceAll(SPIECE_UNDERLINE, " ");
    }
}
