001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.compress.harmony.pack200; 018 019import java.io.IOException; 020import java.io.InputStream; 021 022/** 023 * A Codec allows a sequence of bytes to be decoded into integer values (or vice versa). 024 * 025 * There are a number of standard Codecs ({@link #UDELTA5}, {@link #UNSIGNED5}, {@link #BYTE1}, {@link #CHAR3}) that are 026 * used in the implementation of many bands; but there are a variety of other ones, and indeed the specification assumes 027 * that other combinations of values can result in more specific and efficient formats. There are also a sequence of 028 * canonical encodings defined by the Pack200 specification, which allow a Codec to be referred to by canonical number. 029 * {@link CodecEncoding#getCodec(int, InputStream, Codec)}) 030 */ 031public abstract class Codec { 032 033 /** 034 * BCI5 = (5,4): Used for storing branching information in bytecode. 035 */ 036 public static final BHSDCodec BCI5 = new BHSDCodec(5, 4); 037 038 /** 039 * BRANCH5 = (5,4,2): Used for storing branching information in bytecode. 040 */ 041 public static final BHSDCodec BRANCH5 = new BHSDCodec(5, 4, 2); 042 043 /** 044 * BYTE1 = (1,256): Used for storing plain bytes. 045 */ 046 public static final BHSDCodec BYTE1 = new BHSDCodec(1, 256); 047 048 /** 049 * CHAR3 = (3,128): Used for storing text (UTF-8) strings. NB This isn't quite the same as UTF-8, but has similar 050 * properties; ASCII characters < 127 are stored in a single byte. 051 */ 052 public static final BHSDCodec CHAR3 = new BHSDCodec(3, 128); 053 054 /** 055 * DELTA5 = (5,64,1,1): Used for the majority of numerical codings where there is a correlated sequence of signed 056 * values. 057 */ 058 public static final BHSDCodec DELTA5 = new BHSDCodec(5, 64, 1, 1); 059 060 /** 061 * MDELTA5 = (5,64,2,1): Used for the majority of numerical codings where there is a correlated sequence of signed 062 * values, but where most of them are expected to be non-negative. 063 */ 064 public static final BHSDCodec MDELTA5 = new BHSDCodec(5, 64, 2, 1); 065 066 /** 067 * SIGNED5 = (5,64,1): Used for small signed values. 068 */ 069 public static final BHSDCodec SIGNED5 = new BHSDCodec(5, 64, 1); 070 071 /** 072 * UDELTA5 = (5,64,0,1): Used for the majority of numerical codings where there is a correlated sequence of unsigned 073 * values. 074 */ 075 public static final BHSDCodec UDELTA5 = new BHSDCodec(5, 64, 0, 1); 076 077 /** 078 * UNSIGNED5 = (5,64): Used for small unsigned values. 079 */ 080 public static final BHSDCodec UNSIGNED5 = new BHSDCodec(5, 64); 081 082 public int lastBandLength; 083 084 /** 085 * Decode a sequence of bytes from the given input stream, returning the value as a long. Note that this method can 086 * only be applied for non-delta encodings. 087 * 088 * @param in the input stream to read from 089 * @return the value as a long 090 * @throws IOException if there is a problem reading from the underlying input stream 091 * @throws Pack200Exception if the encoding is a delta encoding 092 */ 093 public abstract int decode(InputStream in) throws IOException, Pack200Exception; 094 095 /** 096 * Encode a single value into a sequence of bytes. 097 * 098 * @param value the value to encode 099 * @param last the previous value encoded (for delta encodings) 100 * @return the encoded bytes 101 * @throws Pack200Exception TODO 102 */ 103 public abstract byte[] encode(int value, int last) throws Pack200Exception; 104 105 /** 106 * Encode a single value into a sequence of bytes. Note that this method can only be used for non-delta encodings. 107 * 108 * @param value the value to encode 109 * @return the encoded bytes 110 * @throws Pack200Exception TODO 111 */ 112 public abstract byte[] encode(int value) throws Pack200Exception; 113 114 /** 115 * Decode a sequence of bytes from the given input stream, returning the value as a long. If this encoding is a 116 * delta encoding (d=1) then the previous value must be passed in as a parameter. If it is a non-delta encoding, 117 * then it does not matter what value is passed in, so it makes sense for the value to be passed in by default using 118 * code similar to: 119 * 120 * <pre> 121 * long last = 0; 122 * while (condition) { 123 * last = codec.decode(in, last); 124 * // do something with last 125 * } 126 * </pre> 127 * 128 * @param in the input stream to read from 129 * @param last the previous value read, which must be supplied if the codec is a delta encoding 130 * @return the value as a long 131 * @throws IOException if there is a problem reading from the underlying input stream 132 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 133 */ 134 public abstract int decode(InputStream in, long last) throws IOException, Pack200Exception; 135 136 /** 137 * Decodes a sequence of <code>n</code> values from <code>in</code>. This should probably be used in most cases, 138 * since some codecs (such as {@link PopulationCodec}) only work when the number of values to be read is known. 139 * 140 * @param n the number of values to decode 141 * @param in the input stream to read from 142 * @return an array of <code>int</code> values corresponding to values decoded 143 * @throws IOException if there is a problem reading from the underlying input stream 144 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 145 */ 146 public int[] decodeInts(final int n, final InputStream in) throws IOException, Pack200Exception { 147 lastBandLength = 0; 148 final int result[] = new int[n]; 149 int last = 0; 150 for (int i = 0; i < n; i++) { 151 result[i] = last = decode(in, last); 152 } 153 return result; 154 } 155 156 /** 157 * Decodes a sequence of <code>n</code> values from <code>in</code>. 158 * 159 * @param n the number of values to decode 160 * @param in the input stream to read from 161 * @param firstValue the first value in the band if it has already been read 162 * @return an array of <code>int</code> values corresponding to values decoded, with firstValue as the first value 163 * in the array. 164 * @throws IOException if there is a problem reading from the underlying input stream 165 * @throws Pack200Exception if there is a problem decoding the value or that the value is invalid 166 */ 167 public int[] decodeInts(final int n, final InputStream in, final int firstValue) 168 throws IOException, Pack200Exception { 169 final int result[] = new int[n + 1]; 170 result[0] = firstValue; 171 int last = firstValue; 172 for (int i = 1; i < n + 1; i++) { 173 result[i] = last = decode(in, last); 174 } 175 return result; 176 } 177 178 /** 179 * Encode a sequence of integers into a byte array 180 * 181 * @param ints the values to encode 182 * @return byte[] encoded bytes 183 * @throws Pack200Exception if there is a problem encoding any of the values 184 */ 185 public byte[] encode(final int[] ints) throws Pack200Exception { 186 int total = 0; 187 final byte[][] bytes = new byte[ints.length][]; 188 for (int i = 0; i < ints.length; i++) { 189 bytes[i] = encode(ints[i], i > 0 ? ints[i - 1] : 0); 190 total += bytes[i].length; 191 } 192 final byte[] encoded = new byte[total]; 193 int index = 0; 194 for (int i = 0; i < bytes.length; i++) { 195 System.arraycopy(bytes[i], 0, encoded, index, bytes[i].length); 196 index += bytes[i].length; 197 } 198 return encoded; 199 } 200}