test_pdf_utils.f90 Source File


Source Code

module test_pdf_utils
    use, intrinsic :: iso_fortran_env, only: int8, int64
    use fortplot_zlib_core, only: zlib_decompress
    implicit none
contains
    subroutine extract_pdf_stream_text(filename, stream_text, status)
        character(len=*), intent(in) :: filename
        character(len=:), allocatable, intent(out) :: stream_text
        integer, intent(out) :: status

        character(len=1), allocatable :: data(:)
        integer(int64) :: fsize
        integer :: content_begin, content_len
        integer :: ios, pos
        integer :: stream_start, stream_end
        integer :: unit
        integer :: i

        status = 0
        allocate(character(len=0) :: stream_text)

        open(newunit=unit, file=filename, access='stream', form='unformatted', &
             status='old', iostat=ios)
        if (ios /= 0) then
            status = -1
            return
        end if

        inquire(unit=unit, size=fsize)
        if (fsize <= 0_int64) then
            close(unit)
            return
        end if

        allocate(character(len=1) :: data(int(fsize)))
        read(unit, iostat=ios) data
        close(unit)
        if (ios /= 0) then
            status = -2
            deallocate(data)
            return
        end if

        pos = 1
        do
            stream_start = find_subsequence(data, fsize, 'stream', pos)
            if (stream_start < 0) exit
            stream_end = find_subsequence(data, fsize, 'endstream', &
                stream_start + len('stream'))
            if (stream_end < 0) exit

            content_begin = stream_start + len('stream')
            if (content_begin <= int(fsize)) then
                if (data(content_begin) == achar(13)) then
                    content_begin = content_begin + 1
                    if (content_begin <= int(fsize)) then
                        if (data(content_begin) == achar(10)) then
                            content_begin = content_begin + 1
                        end if
                    end if
                else if (data(content_begin) == achar(10)) then
                    content_begin = content_begin + 1
                end if
            end if

            content_len = stream_end - content_begin
            if (content_len > 0) then
                block
                    integer(int8), allocatable :: compressed(:), decompressed_raw(:)
                    character(len=:), allocatable :: chunk_text
                    integer :: j
                    integer :: status_decomp

                    allocate(compressed(content_len))
                    do j = 1, content_len
                        compressed(j) = int(iachar(data(content_begin + j - 1)), int8)
                    end do
                    decompressed_raw = zlib_decompress(compressed, content_len, &
                        status_decomp, .false.)

                    if (status_decomp == 0 .and. size(decompressed_raw) > 0) then
                        allocate(character(len=size(decompressed_raw)) :: chunk_text)
                        call bytes_to_string(decompressed_raw, chunk_text)
                    else
                        allocate(character(len=content_len) :: chunk_text)
                        do j = 1, content_len
                            chunk_text(j:j) = data(content_begin + j - 1)
                        end do
                    end if

                    call append_string(stream_text, chunk_text)
                end block
            end if

            pos = stream_end + len('endstream')
        end do

        deallocate(data)
    end subroutine extract_pdf_stream_text

    subroutine append_string(target, chunk)
        character(len=:), allocatable, intent(inout) :: target
        character(len=*), intent(in) :: chunk
        character(len=:), allocatable :: combined
        integer :: new_len
        integer :: old_len

        old_len = len(target)
        new_len = old_len + len(chunk)

        allocate(character(len=new_len) :: combined)
        if (old_len > 0) combined(1:old_len) = target
        if (len(chunk) > 0) combined(old_len+1:new_len) = chunk
        call move_alloc(combined, target)
    end subroutine append_string

    subroutine bytes_to_string(bytes, text)
        integer(int8), intent(in) :: bytes(:)
        character(len=*), intent(out) :: text
        integer :: i

        do i = 1, size(bytes)
            text(i:i) = achar(iand(int(bytes(i)), 255))
        end do
    end subroutine bytes_to_string

    integer function find_subsequence(arr, n, pat, start_idx) result(pos)
        character(len=1), intent(in) :: arr(n)
        integer(int64), intent(in) :: n
        character(len=*), intent(in) :: pat
        integer, intent(in) :: start_idx
        integer :: i, j, pat_len

        pat_len = len_trim(pat)
        pos = -1
        if (pat_len <= 0) return
        do i = max(1, start_idx), int(n) - pat_len + 1
            do j = 1, pat_len
                if (arr(i + j - 1) /= pat(j:j)) exit
                if (j == pat_len) then
                    pos = i
                    return
                end if
            end do
        end do
    end function find_subsequence

end module test_pdf_utils