Last active
September 13, 2019 09:13
-
-
Save uenoB/185202f006725aa70f96ff7b47620b10 to your computer and use it in GitHub Desktop.
encapsulation of jpeg in tiff
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # To the extent possible under law, the author has waived all copyright | |
| # and related or neighboring rights to this software by associating the | |
| # CC0 1.0 (http://creativecommons.org/publicdomain/zero/1.0/) with it. | |
| # | |
| # This script embeds (possibly multiple) JPEG files into a (possibly | |
| # multi-page) TIFF file. Each of the given files must be a baseline | |
| # YCbCr JPEG file. '-' means stdin. Each JPEG file is embedded in the | |
| # TIFF file as a huge single strip, which is allowed but not recommended | |
| # according to TIFF 6.0 TechNote2. The output is written to the stdout. | |
| class JPG2TIF | |
| class Error < StandardError; end | |
| private | |
| def die(s) | |
| raise Error, s | |
| end | |
| class Buffer | |
| def initialize(n = 0); @pos, @a = n, []; end | |
| def write(s); @a.push(s); @pos += s.size; end | |
| def align(n); write([0].pack('c') * (n - 1 - (@pos + n - 1) % n)); end | |
| attr_reader :pos, :a | |
| end | |
| def parse_jpeg(src) | |
| pos = 0 | |
| begin | |
| a = (src[pos,4] || '').unpack('nn') | |
| case a[0] | |
| when 0xffff then pos += 1; next | |
| when 0xffd8, 0xffd9 then a[1] = 0 # start of image, end of image | |
| when 0..0xff00, nil then die 'not in jpeg format' | |
| else die 'segment without length found' unless a[1] and a[1] >= 2 | |
| end | |
| yield({:pos => pos, :mark => a[0], :len => a[1]}) | |
| pos += 2 + a[1] | |
| case a[0] when 0xffda then # start of scan | |
| m = /(?=\xff[^\x00\xd0-\xd7])/mn.match(src, pos) | |
| pos = m ? m.begin(0) : die('unterminated entropy-coded segment') | |
| end | |
| end while pos < src.length | |
| end | |
| def process(output, jpeg, start, has_next) | |
| icc, sof = nil, nil | |
| parse_jpeg(jpeg) do |mark:0, pos:0, len:0| | |
| $stderr.printf "pos %08x mark %04x len %04x\n", pos, mark, len if $VERBOSE | |
| case mark | |
| when 0xffc0 then # SOF0 | |
| die 'duplicate SOF0 found' if sof | |
| sof = jpeg[pos+4,len-2] | |
| when 0xffe2 then # ICCProfile | |
| if /\AICC_PROFILE\x00\x01\x01/n =~ jpeg[pos+4,len-2] then | |
| die 'duplicate ICC_PROFILE found' if icc | |
| icc = {:pos => pos + 18, :len => len - 16} | |
| icc[:copy] = jpeg[pos+18,len-16] if pos % 2 != 0 | |
| end | |
| when 0xffd8..0xffdb, 0xffdd then next # SOI, EOI, SOS, DQT, DRI | |
| when 0xffc4, 0xffcc then next # DHT, DAC | |
| when 0xffe0..0xffef, 0xfffe then next # APP, COM | |
| else die('not in baseline: %04x' % mark) | |
| end | |
| end | |
| case sof | |
| when /\A\x08....\x03\x01(.).\x02\x11.\x03\x11.\z/mn then | |
| sampling = (($1.getbyte(0) & 0xf) << 16) | (($1.getbyte(0) & 0xf0) >> 4) | |
| when nil then die 'no SOF0 found' | |
| else die 'not YCbCr 24 bit color with good subsampling' | |
| end | |
| height, width = sof[1,4].unpack('nn') | |
| bitsoff, rbwoff, jpegoff, iccoff, nextifd = 0, 0, 0, 0, 0 | |
| ifd = proc do | |
| e = [ | |
| [0x0100, 3, 1, width], # ImageWidth | |
| [0x0101, 3, 1, height], # ImageHeight | |
| [0x0102, 3, 3, bitsoff], # BitsPerSample | |
| [0x0103, 3, 1, 7], # Compression: JPEG | |
| [0x0106, 3, 1, 6], # Photometric: YCbCr | |
| [0x0111, 4, 1, jpegoff], # StripOffsets | |
| [0x0115, 3, 1, 3], # SamplesPerPixel: 3 | |
| [0x0116, 3, 1, height], # RowsPerStrip | |
| [0x0117, 4, 1, jpeg.size], # StripByteCounts | |
| [0x011c, 3, 1, 1], # PlanarConfiguration: Chunky | |
| [0x0212, 3, 2, sampling], # YCbCrSubSampling | |
| [0x0214, 5, 6, rbwoff], # ReferenceBlackWhite | |
| ] | |
| iccoff = icc[:pos] + jpegoff if icc and not icc[:copy] | |
| e.push [0x8773, 7, icc[:len], iccoff] if icc # ICCProfile | |
| [[e.size].pack('v'), e.map { |i| i.pack('vvVV') }, | |
| [nextifd].pack('V')].join | |
| end | |
| output.write [0x4949,0x2a,8].pack('vvV') if start == 0 | |
| start = 8 if start == 0 | |
| buf = Buffer.new(start + ifd.call.size) | |
| bitsoff = buf.pos | |
| rbwoff = buf.write([8,8,8].pack('v*')) | |
| jpegoff = buf.write([0,1,255,1,128,1,255,1,128,1,255,1].pack('V*')) | |
| buf.write jpeg | |
| iccoff = buf.align(2) if icc and icc[:copy] | |
| buf.write icc[:copy] if icc and icc[:copy] | |
| nextifd = buf.align(2) if has_next | |
| output.write ifd.call | |
| buf.a.each { |i| output.write i } | |
| nextifd | |
| end | |
| public | |
| def create(output, args) | |
| off = 0 | |
| args.each_with_index do |bin, i| | |
| $stderr.printf "image %d offset %d\n", i + 1, off if $VERBOSE | |
| jpeg = block_given? ? yield(bin) : bin | |
| off = process(output, jpeg, off, i + 1 < args.size) | |
| end | |
| end | |
| end | |
| if __FILE__ == $0 then | |
| $stdout.binmode | |
| $stdin.set_encoding Encoding::ASCII_8BIT | |
| files = ARGV.empty? ? ['-'] : ARGV | |
| JPG2TIF.new.create($stdout, files) do |input| | |
| input == '-' ? $stdin.read : File.binread(input) | |
| end | |
| end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # To the extent possible under law, the author has waived all copyright | |
| # and related or neighboring rights to this software by associating the | |
| # CC0 1.0 (http://creativecommons.org/publicdomain/zero/1.0/) with it. | |
| # | |
| # This script extracts JPEG files embedded in a TIFF file by jpg2tif.rb. | |
| # it reads a TIFF file from stdin and writes each jPEG image to each file | |
| # specified in the arguments. '-' means stdout. To extract more than one | |
| # images from a multi-page TIFF file, multiple output filenames must be | |
| # speficied in the arguments. The N-th page in the TIFF file is written | |
| # to the N-th file. | |
| class TIF2JPG | |
| class Error < StandardError; end | |
| private | |
| def die(s) | |
| raise Error, s | |
| end | |
| public | |
| def read(tiff) | |
| sig, ver, off = tiff.unpack('vvV') | |
| die 'not a tiff file' unless sig == 0x4949 and ver == 0x2a and off > 0 | |
| while off > 0 | |
| num_entries = tiff[off,2].unpack('v')[0] | |
| off += 2 | |
| die 'IFD not found' unless num_entries | |
| strip_offset, strip_length = nil | |
| num_entries.times do |i| | |
| tag, ty, count, offset = (tiff[off,12] || '').unpack('vvVV') | |
| off += 12 | |
| $stderr.printf "tag %04x type %04x count %08x offset %08x\n", | |
| tag, ty, count, offset if $VERBOSE | |
| case tag | |
| when 0x0111 # StripOffsets | |
| die 'unexpected StripOffsets' unless ty == 4 and count == 1 | |
| strip_offset = offset | |
| when 0x0117 # StripByteCounts | |
| die 'unexpected StripByteCounts' unless ty == 4 and count == 1 | |
| strip_length = offset | |
| end | |
| end | |
| die 'no strip found' unless strip_offset and strip_length | |
| off = (tiff[off,4] || '').unpack('V')[0] || 0 | |
| yield tiff[strip_offset, strip_length] | |
| end | |
| end | |
| end | |
| if __FILE__ == $0 then | |
| $stdout.binmode | |
| $stdin.set_encoding Encoding::ASCII_8BIT | |
| files = ARGV.empty? ? ['-'] : ARGV | |
| TIF2JPG.new.read($stdin.read) do |jpeg| | |
| break if files.empty? | |
| output = files.shift | |
| if output == '-' then | |
| $stdout.write jpeg | |
| else | |
| File.binwrite output, jpeg | |
| end | |
| end | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment