package org.textmining.extraction.word;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.List;
import org.apache.fop.render.afp.modca.AFPConstants;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LittleEndian;
import org.textmining.extraction.TextExtractor;
import org.textmining.extraction.word.model.CHPX;
import org.textmining.extraction.word.model.PieceDescriptor;
import org.textmining.extraction.word.model.TextPiece;

/* loaded from: input_file:WEB-INF/lib/tm-extractors-1.0-patched.jar:org/textmining/extraction/word/WordTextExtractor.class */
public abstract class WordTextExtractor implements TextExtractor {
    protected byte[] _header;
    protected boolean _fastSave;
    protected POIFSFileSystem _fsys;

    protected void doFastSaveExtraction(Writer writer, int i, List list, List list2, WordTextScrubber wordTextScrubber) throws UnsupportedEncodingException, IOException {
        for (int i2 = 0; i2 < list.size(); i2++) {
            TextPiece textPiece = (TextPiece) list.get(i2);
            PieceDescriptor pieceDescriptor = textPiece.getPieceDescriptor();
            int filePosition = pieceDescriptor.getFilePosition();
            int end = filePosition + ((textPiece.getEnd() - textPiece.getStart()) * ((pieceDescriptor.isUnicode() && supportsUnicode()) ? 2 : 1));
            for (int i3 = 0; i3 < list2.size(); i3++) {
                CHPX chpx = (CHPX) list2.get(i3);
                int start = chpx.getStart() + i;
                int end2 = chpx.getEnd() + i;
                if ((start >= filePosition || end2 > filePosition) && ((start < end || end2 <= end) && !isDeleted(chpx.getGrpprl()))) {
                    int max = Math.max(start, filePosition);
                    wordTextScrubber.append(writer, new String(this._header, max, Math.min(end2, end) - max, (pieceDescriptor.isUnicode() && supportsUnicode()) ? "UTF-16LE" : AFPConstants.ASCII_ENCODING));
                }
            }
        }
    }

    protected abstract boolean isDeleted(byte[] bArr);

    protected boolean supportsUnicode() {
        return false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void initWordHeader(InputStream inputStream) throws IOException, PasswordProtectedException {
        this._fsys = new POIFSFileSystem(inputStream);
        DocumentEntry documentEntry = (DocumentEntry) this._fsys.getRoot().getEntry("WordDocument");
        DocumentInputStream createDocumentInputStream = this._fsys.createDocumentInputStream("WordDocument");
        this._header = new byte[documentEntry.getSize()];
        createDocumentInputStream.read(this._header);
        createDocumentInputStream.close();
        initOptions();
    }

    protected void initOptions() throws PasswordProtectedException {
        short s = LittleEndian.getShort(this._header, 10);
        this._fastSave = (s & 4) != 0;
        if ((s & 256) != 0) {
            throw new PasswordProtectedException("This document is password protected");
        }
    }
}
