001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.PushbackInputStream; 008import java.io.UnsupportedEncodingException; 009import java.util.Optional; 010 011/** 012 * Detects the different UTF encodings from byte order mark. 013 * @since 3372 014 */ 015public final class UTFInputStreamReader extends InputStreamReader { 016 017 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 018 super(in, cs); 019 } 020 021 /** 022 * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding. 023 * @param input input stream 024 * @return A reader with the correct encoding. Starts to read after the BOM. 025 * @throws IOException if any I/O error occurs 026 * @see #create(java.io.InputStream, String) 027 */ 028 public static UTFInputStreamReader create(InputStream input) throws IOException { 029 return create(input, "UTF-8"); 030 } 031 032 /** 033 * Creates a new {@link InputStreamReader} from the {@link InputStream}. 034 * @param input input stream 035 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 036 * @return A reader with the correct encoding. Starts to read after the BOM. 037 * @throws IOException if any I/O error occurs 038 */ 039 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 040 byte[] bom = new byte[4]; 041 String encoding = defaultEncoding; 042 int unread; 043 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 044 int n = pushbackStream.read(bom, 0, 4); 045 046 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 047 encoding = "UTF-8"; 048 unread = n - 3; 049 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 050 encoding = "UTF-32BE"; 051 unread = n - 4; 052 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 053 encoding = "UTF-32LE"; 054 unread = n - 4; 055 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 056 encoding = "UTF-16BE"; 057 unread = n - 2; 058 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 059 encoding = "UTF-16LE"; 060 unread = n - 2; 061 } else { 062 unread = n; 063 } 064 065 if (unread > 0) { 066 pushbackStream.unread(bom, n - unread, unread); 067 } else if (unread < -1) { 068 pushbackStream.unread(bom, 0, 0); 069 } 070 return new UTFInputStreamReader(pushbackStream, Optional.ofNullable(encoding).orElse("UTF-8")); 071 } 072}