Package org.jcodings
Class Encoding
- java.lang.Object
-
- org.jcodings.Encoding
-
- All Implemented Interfaces:
java.lang.Cloneable
- Direct Known Subclasses:
AbstractEncoding
public abstract class Encoding extends java.lang.Object implements java.lang.Cloneable
-
-
Field Summary
Fields Modifier and Type Field Description static int
CHAR_INVALID
protected java.nio.charset.Charset
charset
private static int
count
protected int
hashCode
private int
index
protected boolean
isAsciiCompatible
protected boolean
isDummy
protected boolean
isFixedWidth
protected boolean
isSingleByte
protected int
maxLength
protected int
minLength
protected byte[]
name
static byte
NEW_LINE
-
Method Summary
All Methods Static Methods Instance Methods Abstract Methods Concrete Methods Modifier and Type Method Description abstract void
applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, java.lang.Object arg)
Expand case folds given a character class (used for case insensitive matching)static byte
asciiToLower(int c)
static byte
asciiToUpper(int c)
abstract CaseFoldCodeItem[]
caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString
) Oniguruma equivalent:get_case_fold_codes_by_str
abstract int
codeToMbc(int code, byte[] bytes, int p)
Extracts code point into it's multibyte representationabstract int
codeToMbcLength(int code)
Returns character length given a code point Oniguruma equivalent:code_to_mbclen
abstract int[]
ctypeCodeRange(int ctype, IntHolder sbOut)
Returns code range for a given character type Oniguruma equivalent:get_ctype_code_range
static int
digitVal(int code)
boolean
equals(java.lang.Object other)
java.nio.charset.Charset
getCharset()
If this encoding is capable of being represented by a Java Charset then provide it.java.lang.String
getCharsetName()
int
getIndex()
byte[]
getName()
int
hashCode()
boolean
isAlnum(int code)
boolean
isAlpha(int code)
static boolean
isAscii(byte b)
static boolean
isAscii(int code)
boolean
isAsciiCompatible()
boolean
isBlank(int code)
boolean
isCntrl(int code)
abstract boolean
isCodeCType(int code, int ctype)
Perform a check whether given code is of given character type (e.g.boolean
isDigit(int code)
boolean
isDummy()
boolean
isFixedWidth()
boolean
isGraph(int code)
boolean
isLower(int code)
static boolean
isMbcAscii(byte b)
boolean
isMbcCrnl(byte[] bytes, int p, int end)
boolean
isMbcHead(byte[] bytes, int p, int end)
boolean
isMbcWord(byte[] bytes, int p, int end)
abstract boolean
isNewLine(byte[] bytes, int p, int end)
Returns true ifbytes[p]
is a head of a new line character Oniguruma equivalent:is_mbc_newline
boolean
isNewLine(int code)
boolean
isPrint(int code)
boolean
isPunct(int code)
abstract boolean
isReverseMatchAllowed(byte[] bytes, int p, int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_match
boolean
isSbWord(int code)
boolean
isSingleByte()
boolean
isSpace(int code)
boolean
isUpper(int code)
boolean
isWord(int code)
static boolean
isWordGraphPrint(int ctype)
boolean
isXDigit(int code)
abstract int
leftAdjustCharHead(byte[] bytes, int p, int s, int end)
Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_head
abstract int
length(byte c)
Returns character length given character head returns1
for singlebyte encodings or performs direct length table lookup for multibyte ones.abstract int
length(byte[] bytes, int p, int end)
Returns character length given stream, character position and stream end returns1
for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwisestatic Encoding
load(java.lang.String name)
int
maxLength()
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_len
int
maxLengthDistance()
abstract int
mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
Performs case folding for a character atbytes[pp.value]
int
mbcodeStartPosition()
abstract int
mbcToCode(byte[] bytes, int p, int end)
Returns code point for a character Oniguruma equivalent:mbc_to_code
int
minLength()
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_len
static int
odigitVal(int code)
int
prevCharHead(byte[] bytes, int p, int s, int end)
abstract int
propertyNameToCType(byte[] bytes, int p, int end)
Returns character type given character type name (used when e.g.Encoding
replicate(byte[] name)
int
rightAdjustCharHead(byte[] bytes, int p, int s, int end)
int
rightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev)
protected void
setName(byte[] name)
protected void
setName(java.lang.String name)
int
step(byte[] bytes, int p, int end, int n)
int
stepBack(byte[] bytes, int p, int s, int end, int n)
int
strByteLengthNull(byte[] bytes, int p, int end)
abstract int
strCodeAt(byte[] bytes, int p, int end, int index)
abstract int
strLength(byte[] bytes, int p, int end)
int
strLengthNull(byte[] bytes, int p, int end)
int
strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
byte[]
toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwisenull
Used for fast case insensitive matching for some singlebyte encodingsjava.lang.String
toString()
int
xdigitVal(int code)
-
-
-
Field Detail
-
CHAR_INVALID
public static final int CHAR_INVALID
- See Also:
- Constant Field Values
-
count
private static int count
-
minLength
protected final int minLength
-
maxLength
protected final int maxLength
-
isFixedWidth
protected final boolean isFixedWidth
-
isSingleByte
protected final boolean isSingleByte
-
isDummy
protected final boolean isDummy
-
isAsciiCompatible
protected final boolean isAsciiCompatible
-
name
protected byte[] name
-
hashCode
protected int hashCode
-
index
private int index
-
charset
protected java.nio.charset.Charset charset
-
NEW_LINE
public static final byte NEW_LINE
- See Also:
- Constant Field Values
-
-
Method Detail
-
setName
protected final void setName(java.lang.String name)
-
setName
protected final void setName(byte[] name)
-
toString
public final java.lang.String toString()
- Overrides:
toString
in classjava.lang.Object
-
equals
public final boolean equals(java.lang.Object other)
- Overrides:
equals
in classjava.lang.Object
-
hashCode
public final int hashCode()
- Overrides:
hashCode
in classjava.lang.Object
-
getIndex
public final int getIndex()
-
getName
public final byte[] getName()
-
isDummy
public final boolean isDummy()
-
isAsciiCompatible
public final boolean isAsciiCompatible()
-
getCharset
public java.nio.charset.Charset getCharset()
If this encoding is capable of being represented by a Java Charset then provide it.
-
getCharsetName
public java.lang.String getCharsetName()
-
replicate
public Encoding replicate(byte[] name)
-
length
public abstract int length(byte c)
Returns character length given character head returns1
for singlebyte encodings or performs direct length table lookup for multibyte ones.- Parameters:
c
- Character head Oniguruma equivalent:mbc_enc_len
To be deprecated very soon (use length(byte[]bytes, int p, int end) version)
-
length
public abstract int length(byte[] bytes, int p, int end)
Returns character length given stream, character position and stream end returns1
for singlebyte encodings or performs sanity validations for multibyte ones and returns the character length, missing characters in the stream otherwise- Returns:
- 0 Never
> 0 Valid character, length returned
-1 Illegal/malformed character
< -1 (-1 - n) Number of missing bytes for character in p...end range
Oniguruma equivalent:
mbc_enc_len
modified for 1.9 purposes,
-
maxLength
public final int maxLength()
Returns maximum character byte length that can appear in an encoding Oniguruma equivalent:max_enc_len
-
maxLengthDistance
public final int maxLengthDistance()
-
minLength
public final int minLength()
Returns minimum character byte length that can appear in an encoding Oniguruma equivalent:min_enc_len
-
isNewLine
public abstract boolean isNewLine(byte[] bytes, int p, int end)
Returns true ifbytes[p]
is a head of a new line character Oniguruma equivalent:is_mbc_newline
-
mbcToCode
public abstract int mbcToCode(byte[] bytes, int p, int end)
Returns code point for a character Oniguruma equivalent:mbc_to_code
-
codeToMbcLength
public abstract int codeToMbcLength(int code)
Returns character length given a code point Oniguruma equivalent:code_to_mbclen
-
codeToMbc
public abstract int codeToMbc(int code, byte[] bytes, int p)
Extracts code point into it's multibyte representation- Returns:
- character length for the given code point
Oniguruma equivalent:
code_to_mbc
-
mbcCaseFold
public abstract int mbcCaseFold(int flag, byte[] bytes, IntHolder pp, int end, byte[] to)
Performs case folding for a character atbytes[pp.value]
- Parameters:
flag
- case fold flagpp
- anIntHolder
that points at character headto
- a buffer where to extract case folded character Oniguruma equivalent:mbc_case_fold
-
toLowerCaseTable
public byte[] toLowerCaseTable()
Returns lower case table if it's safe to use it directly, otherwisenull
Used for fast case insensitive matching for some singlebyte encodings- Returns:
- lower case table
-
applyAllCaseFold
public abstract void applyAllCaseFold(int flag, ApplyAllCaseFoldFunction fun, java.lang.Object arg)
Expand case folds given a character class (used for case insensitive matching)- Parameters:
flag
- case fold flagfun
- case folding functor (look at:ApplyCaseFold
)arg
- case folding functor argument (look at:ApplyCaseFoldArg
) Oniguruma equivalent:apply_all_case_fold
-
caseFoldCodesByString
public abstract CaseFoldCodeItem[] caseFoldCodesByString(int flag, byte[] bytes, int p, int end)
Expand AST string nodes into their folded alternatives (look at:Analyser.expandCaseFoldString
) Oniguruma equivalent:get_case_fold_codes_by_str
-
propertyNameToCType
public abstract int propertyNameToCType(byte[] bytes, int p, int end)
Returns character type given character type name (used when e.g. \p{Alpha}) Oniguruma equivalent:property_name_to_ctype
-
isCodeCType
public abstract boolean isCodeCType(int code, int ctype)
Perform a check whether given code is of given character type (e.g. used by isWord(someByte) and similar methods)- Parameters:
code
- a code point of a characterctype
- a character type to check against Oniguruma equivalent:is_code_ctype
-
ctypeCodeRange
public abstract int[] ctypeCodeRange(int ctype, IntHolder sbOut)
Returns code range for a given character type Oniguruma equivalent:get_ctype_code_range
-
leftAdjustCharHead
public abstract int leftAdjustCharHead(byte[] bytes, int p, int s, int end)
Seeks the previous character head in a stream Oniguruma equivalent:left_adjust_char_head
- Parameters:
bytes
- byte streamp
- positions
- stopend
- end
-
isReverseMatchAllowed
public abstract boolean isReverseMatchAllowed(byte[] bytes, int p, int end)
Returns true if it's safe to use reversal Boyer-Moore search fail fast algorithm Oniguruma equivalent:is_allowed_reverse_match
-
rightAdjustCharHead
public final int rightAdjustCharHead(byte[] bytes, int p, int s, int end)
-
rightAdjustCharHeadWithPrev
public final int rightAdjustCharHeadWithPrev(byte[] bytes, int p, int s, int end, IntHolder prev)
-
prevCharHead
public final int prevCharHead(byte[] bytes, int p, int s, int end)
-
stepBack
public final int stepBack(byte[] bytes, int p, int s, int end, int n)
-
step
public final int step(byte[] bytes, int p, int end, int n)
-
strLength
public abstract int strLength(byte[] bytes, int p, int end)
-
strCodeAt
public abstract int strCodeAt(byte[] bytes, int p, int end, int index)
-
strLengthNull
public final int strLengthNull(byte[] bytes, int p, int end)
-
strByteLengthNull
public final int strByteLengthNull(byte[] bytes, int p, int end)
-
strNCmp
public final int strNCmp(byte[] bytes, int p, int end, byte[] ascii, int asciiP, int n)
-
isNewLine
public final boolean isNewLine(int code)
-
isGraph
public final boolean isGraph(int code)
-
isPrint
public final boolean isPrint(int code)
-
isAlnum
public final boolean isAlnum(int code)
-
isAlpha
public final boolean isAlpha(int code)
-
isLower
public final boolean isLower(int code)
-
isUpper
public final boolean isUpper(int code)
-
isCntrl
public final boolean isCntrl(int code)
-
isPunct
public final boolean isPunct(int code)
-
isSpace
public final boolean isSpace(int code)
-
isBlank
public final boolean isBlank(int code)
-
isDigit
public final boolean isDigit(int code)
-
isXDigit
public final boolean isXDigit(int code)
-
isWord
public final boolean isWord(int code)
-
isMbcWord
public final boolean isMbcWord(byte[] bytes, int p, int end)
-
isSbWord
public final boolean isSbWord(int code)
-
isMbcHead
public final boolean isMbcHead(byte[] bytes, int p, int end)
-
isMbcCrnl
public boolean isMbcCrnl(byte[] bytes, int p, int end)
-
digitVal
public static int digitVal(int code)
-
odigitVal
public static int odigitVal(int code)
-
xdigitVal
public final int xdigitVal(int code)
-
isMbcAscii
public static boolean isMbcAscii(byte b)
-
isAscii
public static boolean isAscii(int code)
-
isAscii
public static boolean isAscii(byte b)
-
asciiToLower
public static byte asciiToLower(int c)
-
asciiToUpper
public static byte asciiToUpper(int c)
-
isWordGraphPrint
public static boolean isWordGraphPrint(int ctype)
-
mbcodeStartPosition
public final int mbcodeStartPosition()
-
isSingleByte
public final boolean isSingleByte()
-
isFixedWidth
public final boolean isFixedWidth()
-
load
public static Encoding load(java.lang.String name)
-
-