package org.python.core;
import org.python.core.buffer.SimpleBuffer;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
/**
* Base class for Jython bytearray (and bytes in due course) that provides
* most of the Java API, including Java {@link List} behaviour. Attempts to modify the contents
* through this API will throw a TypeError if the actual type of the object is not
* mutable. It is possible for a Java client to treat this class as a
* List<PyInteger>, obtaining equivalent functionality to the Python interface in a
* Java paradigm.
*
* Subclasses must define (from {@link PySequence}): *
* Many of the methods implemented here are inherited or thinly wrapped by {@link PyByteArray},
* which offers them as Java API, or exposes them as Python methods. These prototype Python methods
* mostly accept a {@link PyObject} as argument, where you might have expected a byte[]
* or BaseBytes, in order to accommodate the full range of types accepted by the Python
* equivalent: usually, any PyObject that implements {@link BufferProtocol}, providing
* a one-dimensional array of bytes, is an acceptable argument. In the documentation, the reader
* will often see the terms "byte array" or "object viewable as bytes" instead of
* BaseBytes when this broader scope is intended.
*
* Where the methods return a
* The following idiom may be used:
*
*
* Note that the way this is written at the moment, if
* The elements of the
* The elements of the
* The elements of the
* The elements of the
* The elements of the
* The elements of the
* The elements of the
* The semantics of
* The elements of the
* The elements of the
* The elements of the
* Runs of consecutive whitespace are regarded as a single separator, and the result will
* contain no empty strings at the start or end if the string has leading or trailing
* whitespace. Consequently, splitting an empty string or a string consisting of just whitespace
* with a
* The elements of the
* The elements of the
* The elements of the
* If
* If
* The elements of the
* The elements of the
* The elements of the
* Runs of consecutive whitespace are regarded as a single separator, and the result will
* contain no empty strings at the start or end if the string has leading or trailing
* whitespace. Consequently, splitting an empty string or a string consisting of just whitespace
* with a
* The elements of the
* The elements of the
* The elements of the
* The elements of the
* The actual class of the returned object is determined by {@link #getBuilder(int)}.
*
* @param tabsize number of character positions between tab stops
* @return copy of this byte array with tabs expanded
*/
final BaseBytes basebytes_expandtabs(int tabsize) {
// We could only work out the true size by doing the work twice,
// so make a guess and let the Builder re-size if it's not enough.
int estimatedSize = size + size / 8;
Builder builder = getBuilder(estimatedSize);
int carriagePosition = 0;
int limit = offset + size;
for (int i = offset; i < limit; i++) {
byte c = storage[i];
if (c == '\t') {
// Number of spaces is 1..tabsize
int spaces = tabsize - carriagePosition % tabsize;
builder.repeat((byte)' ', spaces);
carriagePosition += spaces;
} else {
// Transfer the character, but if it is a line break, reset the carriage
builder.append(c);
carriagePosition = (c == '\n' || c == '\r') ? 0 : carriagePosition + 1;
}
}
return builder.getResult();
}
/**
* Ready-to-expose implementation of Python
*
* Implementation note: This can be done in a type-safe way but, in the present design,
* only by making
* It is intended the client call this method only once to get the result of a series of
* append operations. A second call to {@link #getCount()}, before any further appending,
* returns a zero-length array. This is to ensure that the same array is not given out
* twice. However, {@link #getCount()} continues to return the number bytes accumulated
* until an append next occurs.
*
* @return an array containing the accumulated result
*/
byte[] getStorage() {
byte[] s = storage;
storage = emptyStorage;
return s;
}
/**
* Number of bytes accumulated. In conjunctin with {@link #getStorage()}, this provides the
* result. Unlike {@link #getStorage()}, it does not affect the contents.
*
* @return number of bytes accumulated
*/
final int getSize() {
return size;
}
/**
* Append a single byte to the value.
*
* @param b
*/
void append(byte b) {
makeRoomFor(1);
storage[size++] = b;
}
/**
* Append a number of repeats of a single byte to the value, fo example in padding.
*
* @param b byte to repeat
* @param n number of repeats (none if n<=0)
*/
void repeat(byte b, int n) {
if (n > 0) {
makeRoomFor(n);
while (n-- > 0) {
storage[size++] = b;
}
}
}
/**
* Append the contents of the given byte array.
*
* @param b
*/
void append(BaseBytes b) {
append(b, 0, b.size);
}
/**
* Append the contents of a slice of the given byte array.
*
* @param b
* @param start index of first byte copied
* @param end index of fisrt byte not copied
*/
void append(BaseBytes b, int start, int end) {
int n = end - start;
makeRoomFor(n);
System.arraycopy(b.storage, b.offset + start, storage, size, n);
size += n;
}
/**
* Append the contents of the given {@link PyBuffer}.
*
* @param b
*/
void append(PyBuffer v) {
int n = v.getLen();
makeRoomFor(n);
v.copyTo(storage, size);
size += n;
}
// Ensure there is enough free space for n bytes (or allocate some)
void makeRoomFor(int n) throws PyException {
int needed = size + n;
if (needed > storage.length) {
try {
if (storage == emptyStorage) {
/*
* After getStorage(): size deliberately retains its prior value, even
* though storage is set to emptyStorage. However, the first (non-empty)
* append() operation after that lands us here, because storage.length==0.
*/
size = 0;
if (n > 0) {
// When previously empty (incluing the constructor) allocate exactly n.
storage = new byte[n];
}
} else {
// We are expanding an existing allocation: be imaginative
byte[] old = storage;
storage = new byte[roundUp(needed)];
System.arraycopy(old, 0, storage, 0, size);
}
} catch (OutOfMemoryError e) {
/*
* MemoryError is right for most clients. Some (e.g. bytearray.replace()) should
* convert it to an overflow, with a customised message.
*/
throw Py.MemoryError(e.getMessage());
}
}
}
}
/**
* Choose a size appropriate to store the given number of bytes, with some room for growth, when
* allocating storage for mutable types or BaseBytes, this is will normally be an instance of the
* class of the object on which the method was actually called. For example {@link #capitalize()},
* defined in BaseBytes to return a BaseBytes, actually returns a {@link PyByteArray}
* when applied to a bytearray. Or it may be that the method returns a
* PyList of instances of the target type, for example {@link #rpartition(PyObject)}.
* This is achieved by the sub-class defining {@link PySequence#getslice(int, int, int)} and
* {@link #getBuilder(int)} to return instances of its own type. See the documentation of particular
* methods for more information.
*/
@Untraversable
public abstract class BaseBytes extends PySequence implements ListBaseBytes of explicitly-specified sub-type.
*
* @param type explicit Jython type
*/
public BaseBytes(PyType type) {
super(type, null);
delegator = new IndexDelegate();
setStorage(emptyStorage);
}
/**
* Constructs a zero-filled array of defined size and type.
*
* @param size required
* @param type explicit Jython type
*/
public BaseBytes(PyType type, int size) {
super(type, null);
delegator = new IndexDelegate();
newStorage(size);
}
/**
* Constructs a byte array of defined type by copying values from int[].
*
* @param type explicit Jython type
* @param value source of values (and size)
*/
public BaseBytes(PyType type, int[] value) {
super(type, null);
delegator = new IndexDelegate();
int n = value.length;
newStorage(n);
for (int i = offset, j = 0; j < n; i++, j++) {
storage[i] = byteCheck(value[j]);
}
}
/**
* Constructs a byte array of defined type by copying character values from a String. These
* values have to be in the Python byte range 0 to 255.
*
* @param type explicit Jython type
* @param value source of characters
* @throws PyException if any value[i] > 255
*/
protected BaseBytes(PyType type, String value) throws PyException {
super(type, null);
delegator = new IndexDelegate();
int n = value.length();
newStorage(n);
for (int i = offset, j = 0; j < n; j++) {
storage[i++] = byteCheck(value.charAt(j));
}
}
/**
* Helper for constructors and methods that manipulate the storage in mutable subclasses. It
* also permits shared storage between objects, which in general is unsafe if the storage is
* subject to modification independent of the object now being created. Immutable types may
* share storage (safely).
*
* @param storage byte array allocated by client
* @param size number of bytes actually used
* @param offset index of first byte used
* @throws IllegalArgumentException if the range [offset:offset+size] is not within the array
* bounds of storage or size<0.
*/
protected void setStorage(byte[] storage, int size, int offset) throws IllegalArgumentException {
if (size < 0 || offset < 0 || offset + size > storage.length) {
throw new IllegalArgumentException();
} else {
this.storage = storage;
this.size = size;
this.offset = offset;
}
}
/**
* Helper for constructors and methods that manipulate the storage in mutable subclasses in the
* case where the storage should consist of the first part of the given array.
*
* @param storage byte array allocated by client
* @param size number of bytes actually used
* @throws IllegalArgumentException if the range [0:size] is not within the array bounds of
* storage.
*/
protected void setStorage(byte[] storage, int size) throws IllegalArgumentException {
if (size < 0 || size > storage.length) {
throw new IllegalArgumentException();
} else {
this.storage = storage;
this.size = size;
this.offset = 0;
}
}
/**
* Helper for constructors and methods that manipulate the storage in mutable subclasses in the
* case where the storage should consist of exactly the whole of the given array.
*
* @param storage byte array allocated by client
*/
protected void setStorage(byte[] storage) {
this.storage = storage;
this.size = storage.length;
this.offset = 0;
}
/*
* ============================================================================================
* Support for construction and initialisation
* ============================================================================================
*
* Methods here help subclasses set the initial state. They are designed with bytearray in mind,
* but note that from Python 3, bytes() has the same set of calls and behaviours. In Peterson's
* "sort of backport" to Python 2.x, bytes is effectively an alias for str and it shows.
*/
/**
* Helper for __new__ and __init__ and the Java API constructor from
* PyObject in subclasses.
*
* @see org.python.core.ByteArray#bytearray___init__(PyObject[], String[])
* @see org.python.core.ByteArray#ByteArray(PyObject)
* @param arg primary argument from which value is taken
* @param encoding name of optional encoding (must be a string type)
* @param errors name of optional errors policy (must be a string type)
*/
protected void init(PyObject arg) {
if (arg == null) {
/*
* bytearray() Construct a zero-length bytearray.
*/
setStorage(emptyStorage);
} else if (arg instanceof PyUnicode) {
/*
* bytearray(string) Construct from a text string by default encoding and error policy.
* Cases where encoding and error policy are specified explicitly are dealt with
* elsewhere.
*/
init((PyUnicode)arg, (String)null, (String)null); // Casts select right init()
} else if (arg.isIndex()) {
/*
* bytearray(int) Construct a zero-initialised bytearray of the given length.
*/
init(arg.asIndex(Py.OverflowError)); // overflow if too big to be Java int
} else if (arg instanceof BaseBytes) {
/*
* bytearray copy of bytearray (or bytes) -- do efficiently
*/
init((BaseBytes)arg);
} else if (arg instanceof BufferProtocol) {
/*
* bytearray copy of object supporting Jython implementation of PEP 3118
*/
init((BufferProtocol)arg);
} else {
/*
* The remaining alternative is an iterable returning (hopefully) right-sized ints. If
* it isn't one, we get an exception about not being iterable, or about the values.
*/
init(arg.asIterable());
}
}
/**
* Helper for __new__ and __init__ and the Java API constructor from a
* text string with the specified encoding in subclasses.
*
* @see #bytearray___init__(PyObject[], String[])
* @see PyByteArray#PyByteArray(PyBytes, String, String)
* @param arg primary argument from which value is taken
* @param encoding name of optional encoding (must be a string type)
* @param errors name of optional errors policy (must be a string type)
*/
protected void init(PyUnicode arg, PyObject encoding, PyObject errors) {
String enc = encoding == null ? null : encoding.asString();
String err = errors == null ? null : errors.asString();
init(arg, enc, err);
}
/**
* Helper for __new__ and __init__ and the Java API constructor from a
* text string with the specified encoding in subclasses.
*
* @see #bytearray___init__(PyObject[], String[])
* @see PyByteArray#PyByteArray(PyBytes, String, String)
* @param arg primary argument from which value is taken
* @param encoding name of optional encoding
* @param errors name of optional errors policy
*/
protected void init(PyUnicode arg, String encoding, String errors) {
// Jython encode emits a String (not byte[])
String encoded = encode(arg, encoding, errors);
newStorage(encoded.length());
setBytes(0, encoded);
}
/**
* Helper for {@link #setslice(int, int, int, PyObject)}, for __new__ and
* __init__ and the Java API constructor from a text string with the specified
* encoding in subclasses. This method thinly wraps a call to the codecs module and deals with
* checking for PyUnicode (where the encoding argument is mandatory).
*
* @see #ByteArray(PyBytes, String, String)
* @param arg primary argument from which value is taken
* @param encoding name of optional encoding
* @param errors name of optional errors policy
* @return encoded string
* @throws PyException (TypeError) if the PyBytes is actually a {@link PyUnicode}
* and encoding is null
*/
protected static String encode(PyObject arg, String encoding, String errors) throws PyException {
// Jython encode emits a String (not byte[])
if (arg instanceof PyUnicode) {
if (encoding == null) {
encoding = Py.getSystemState().getCodecState().getDefaultEncoding();
}
return codecs.encode(arg, encoding, errors);
} else {
return arg.toString();
}
}
/**
* Fill a defined section of a byte array by copying character values from a String. These
* values have to be in the Python byte range 0 to 255.
*
* @param start index in this byte array at which the first character code lands
* @param value source of characters
* @throws PyException (ValueError) if any value[i] > 255
*/
protected void setBytes(int start, String value) throws PyException {
int n = value.length();
int io = offset + start;
for (int j = 0; j < n; j++) {
storage[io++] = byteCheck(value.charAt(j));
}
}
/**
* Fill a strided slice of a byte array by copying character values from a String. These values
* have to be in the Python byte range 0 to 255.
*
* @param start index in this byte array at which the first character code lands
* @param value source of characters
* @throws PyException (ValueError) if any value[i] > 255
*/
protected void setBytes(int start, int step, String value) throws PyException {
int n = value.length();
int io = offset + start;
for (int j = 0; j < n; j++) {
storage[io] = byteCheck(value.charAt(j));
io += step;
}
}
/**
* Helper for __new__ and __init__ and the Java API constructor from
* int in subclasses. Construct zero-filled byte array of specified size.
*
* @param n size of zero-filled array
*/
protected void init(int n) {
if (n < 0) {
throw Py.ValueError("negative count");
}
newStorage(n);
}
/**
* Helper for __new__ and __init__ and the Java API constructor from
* objects supporting the Jython implementation of PEP 3118 (Buffer API) in subclasses.
*
* @param value an object bearing the Buffer API and consistent with the slice assignment
*/
protected void init(BufferProtocol value) throws PyException {
// Get the buffer view
try (PyBuffer view = value.getBuffer(PyBUF.FULL_RO)) {
// Create storage for the bytes and have the view drop them in
newStorage(view.getLen());
view.copyTo(storage, offset);
}
}
/**
* Helper for __new__ and __init__ and the Java API constructor from
* bytearray or bytes in subclasses.
*
* @param source bytearray (or bytes) to copy
*/
protected void init(BaseBytes source) {
newStorage(source.size);
System.arraycopy(source.storage, source.offset, storage, offset, size);
}
/**
* Helper for __new__ and __init__ and the Java API constructor from
* an arbitrary iterable Python type in subclasses. This will include generators and lists.
*
* @param iter iterable source of values to enter in the array
*/
protected void init(Iterable extends PyObject> iter) {
/*
* Different strategy is needed from that suited to "random append" operations. We shall
* have a stream of append operations, and it might be long.
*/
FragmentList fragList = new FragmentList();
fragList.loadFrom(iter);
// Now, aggregate all those fragments.
//
if (fragList.totalCount > 0) {
if (fragList.size() == 1) {
// Note that the first fragment is small: negligible waste if stolen directly.
Fragment frag = fragList.getFirst();
setStorage(frag.storage, frag.count);
} else {
// Stitch the fragments together in new storage of sufficient size
newStorage(fragList.totalCount);
fragList.emptyInto(storage, offset);
}
} else {
// Nothing in the iterator
setStorage(emptyStorage);
}
}
/**
* Intended as a fragment of temporary storage for use we do not know how many bytes of
* allocate, and we are reading in some kind of iterable stream.
*/
protected static class Fragment {
static final int MINSIZE = 8;
static final int MAXSIZE = 1024;
byte[] storage;
int count = 0;
Fragment(int size) {
storage = new byte[size];
}
// Convert to byte and add to buffer
boolean isFilledBy(PyObject value) {
storage[count++] = byteCheck(value);
return count == storage.length;
}
}
/**
* A container of temporary storage when we do not know how many bytes to allocate, and we are
* reading in some kind of iterable stream.
*/
protected static class FragmentList extends LinkedList
*
*
* @param value to convert.
* @throws PyException (TypeError) if not acceptable type
* @throws PyException (ValueError) if value<0 or value>255 or string length!=1
*/
protected static final byte byteCheck(PyObject value) throws PyException {
if (value.isIndex()) {
// This will possibly produce Py.OverflowError("long int too large to convert")
return byteCheck(value.asIndex());
} else {
throw Py.TypeError("an integer or string of size 1 is required");
}
}
/**
* Return a buffer exported by the argument, or return null if it does not bear the
* buffer API. The caller is responsible for calling {@link PyBuffer#release()} on the buffer,
* if the return value is not null.
*
* @param b object to wrap
* @return byte-oriented view or null
*/
protected static PyBuffer getView(PyObject b) {
if (b == null) {
return null;
} else if (b instanceof PyUnicode) {
/*
* PyUnicode has the BufferProtocol interface as it extends PyBytes. (It would bring
* you 0xff&charAt(i) in practice.) However, in CPython the unicode string does not have
* the buffer API.
*/
return null;
} else if (b instanceof BufferProtocol) {
return ((BufferProtocol) b).getBuffer(PyBUF.FULL_RO);
} else if (b instanceof PyLong) {
int v = ((PyLong) b).getValue().intValue();
if (v < 0 || v > 255) {
throw Py.ValueError("byte must be in range(0, 256)");
}
return new SimpleBuffer(new byte[]{(byte) v});
} else {
return null;
}
}
/**
* Return a buffer exported by the argument or raise an exception if it does not bear the buffer
* API. The caller is responsible for calling {@link PyBuffer#release()} on the buffer. The
* return value is never null.
*
* @param b object to wrap
* @return byte-oriented view
*/
protected static PyBuffer getViewOrError(PyObject b) {
String fmt = "a bytes-like object is required, not '%s'";
return getViewOrError(b, fmt);
}
protected static PyBuffer getViewOrError(PyObject b, String fmt) {
PyBuffer buffer = getView(b);
if (buffer != null) {
return buffer;
} else {
throw Py.TypeError(String.format(fmt, b.getType().fastGetName()));
}
}
/*
* ============================================================================================
* API for org.python.core.PySequence
* ============================================================================================
*/
@Override
protected PyInteger pyget(int index) {
return new PyInteger(intAt(index));
}
/*
* We're not implementing these here, but we can give a stronger guarantee about the return type
* and save some casting and type anxiety.
*/
@Override
public abstract BaseBytes getslice(int start, int stop, int step);
@Override
protected abstract BaseBytes repeat(int count);
/*
* And this extension point should be overridden in mutable subclasses
*/
/**
* Insert the element (interpreted as a Python byte value) at the given index. The default
* implementation produces a Python TypeError, for the benefit of immutable types. Mutable types
* must override it.
*
* @param index to insert at
* @param element to insert (by value)
* @throws PyException (IndexError) if the index is outside the array bounds
* @throws PyException (ValueError) if element<0 or element>255
* @throws PyException (TypeError) if the subclass is immutable
*/
public void pyinsert(int index, PyObject element) {
// This won't succeed: it just produces the right error.
// storageReplace(index, 0, 1);
pyset(index, element);
}
/**
* Specialisation of {@link PySequence#getslice(int, int, int)} to contiguous slices (of step size 1) for
* brevity and efficiency. The default implementation is getslice(start, stop, 1)
* but it is worth overriding.
*
* @param start the position of the first element.
* @param stop one more than the position of the last element.
* @return a subclass instance of BaseBytes corresponding the the given range of elements.
*/
@Override
public BaseBytes getslice(int start, int stop) {
return getslice(start, stop, 1);
}
/**
* Class defining the behaviour of bytearray with respect to slice assignment,
* etc., which differs from the default (list) behaviour in small ways.
*/
private class IndexDelegate extends PySequence.DefaultIndexDelegate {
/**
* bytearray treats assignment of a zero-length object to a slice as equivalent to deletion,
* unlike list, even for an extended slice.
*/
@Override
public void checkIdxAndSetSlice(PySlice slice, PyObject value) {
if (value.__len__() != 0) {
// Proceed as default
super.checkIdxAndSetSlice(slice, value);
} else {
// Treat as deletion
checkIdxAndDelItem(slice);
}
}
@Override
protected void delSlice(int[] indices) {
delslice(indices[0], indices[1], indices[2], indices[3]);
}
};
/*
* ============================================================================================
* Support for Python API common to mutable and immutable subclasses
* ============================================================================================
*/
@Override
public int __len__() {
return size;
}
/**
* Comparison function between a byte array and a buffer of bytes exported by some other object,
* such as a String, presented as a PyBuffer, returning 1, 0 or -1 as a>b, a==b, or
* a<b respectively. The comparison is by value, using Python unsigned byte conventions,
* left-to-right (low to high index). Zero bytes are significant, even at the end of the array:
* [65,66,67]<"ABC\u0000", for example and [] is less than every
* non-empty b, while []=="".
*
* @param a left-hand array in the comparison
* @param b right-hand wrapped object in the comparison
* @return 1, 0 or -1 as a>b, a==b, or a<b respectively
*/
private static int compare(BaseBytes a, PyBuffer b) {
// Compare elements one by one in these ranges:
int ap = a.offset;
int aEnd = ap + a.size;
int bp = 0;
int bEnd = b.getLen();
while (ap < aEnd) {
if (bp >= bEnd) {
// a is longer than b
return 1;
} else {
// Compare the corresponding bytes
int aVal = 0xff & a.storage[ap++];
int bVal = b.intAt(bp++);
int diff = aVal - bVal;
if (diff != 0) {
return (diff < 0) ? -1 : 1;
}
}
}
// All the bytes matched and we reached the end of a
if (bp < bEnd) {
// But we didn't reach the end of b
return -1;
} else {
// And the end of b at the same time, so they're equal
return 0;
}
}
/**
* Comparison function between byte array types and any other object. The six "rich comparison"
* operators are based on this.
*
* @param b
* @return 1, 0 or -1 as this>b, this==b, or this<b respectively, or -2 if the comparison is
* not implemented
*/
private synchronized int basebytes_cmp(PyObject b) {
if (this == b) {
// Same object: quick result
return 0;
} else {
// Try to get a byte-oriented view
try (PyBuffer bv = getView(b)) {
if (bv == null) {
// Signifies a type mis-match. See PyObject._cmp_unsafe() and related code.
return -2;
} else {
// Compare this with other object viewed as a buffer
return compare(this, bv);
}
}
}
}
/**
* Fail-fast comparison function between byte array types and any other object, for when the
* test is only for equality. The "rich comparison" operators __eq__ and
* __ne__ are based on this.
*
* @param b
* @return 0 if this==b, or +1 or -1 if this!=b, or -2 if the comparison is not implemented
*/
private synchronized int basebytes_cmpeq(PyObject b) {
if (this == b) {
// Same object: quick result
return 0;
} else {
if (b instanceof PyUnicode) {
return -2;
}
// Try to get a byte-oriented view
try (PyBuffer bv = getView(b)) {
if (bv == null) {
// Signifies a type mis-match. See PyObject._cmp_unsafe() and related code.
return -2;
} else {
if (bv.getLen() != size) {
// Different size: can't be equal, and we don't care which is bigger
return 1;
} else {
// Compare this with other object viewed as a buffer
return compare(this, bv);
}
}
}
}
}
/**
* Implementation of __eq__ (equality) operator, capable of comparison with another byte array.
* Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___eq__(PyObject other) {
return richCompare(other, CompareOp.EQ);
}
/**
* Implementation of __ne__ (not equals) operator, capable of comparison with another byte
* array. Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___ne__(PyObject other) {
return richCompare(other, CompareOp.NE);
}
@Override
public PyObject richCompare(PyObject other, CompareOp op) {
int cmp = basebytes_cmp(other);
return op.bool(cmp);
}
/**
* Implementation of __lt__ (less than) operator, capable of comparison with another byte array.
* Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___lt__(PyObject other) {
return richCompare(other, CompareOp.LT);
}
/**
* Implementation of __le__ (less than or equal to) operator, capable of comparison with another
* byte array. Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___le__(PyObject other) {
int cmp = basebytes_cmp(other);
if (cmp > 0) {
return Py.False;
} else if (cmp > -2) {
return Py.True;
} else {
return null;
}
}
/**
* Implementation of __ge__ (greater than or equal to) operator, capable of comparison with
* another byte array. Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___ge__(PyObject other) {
int cmp = basebytes_cmp(other);
if (cmp >= 0) {
return Py.True;
} else if (cmp > -2) {
return Py.False;
} else {
return null;
}
}
/**
* Implementation of __gt__ (greater than) operator, capable of comparison with another byte
* array. Comparison with an invalid type returns null.
*
* @param other Python object to compare with
* @return Python boolean result or null if not implemented for the other type.
*/
final PyObject basebytes___gt__(PyObject other) {
int cmp = basebytes_cmp(other);
if (cmp > 0) {
return Py.True;
} else if (cmp > -2) {
return Py.False;
} else {
return null;
}
}
/**
* Search for the target in this byte array, returning true if found and false if not. The
* target must either convertible to an integer in the Python byte range, or capable of being
* viewed as a byte array.
*
* @param target byte value to search for
* @return true iff found
*/
protected final synchronized boolean basebytes___contains__(PyObject target) {
if (target.isIndex()) {
// Caller is treating this as an array of integers, so the value has to be in range.
byte b = byteCheck(target.asIndex());
return index(b) >= 0;
} else {
// Caller is treating this as a byte-string and looking for substring 'target'
try (PyBuffer targetView = getViewOrError(target)) {
Finder finder = new Finder(targetView);
finder.setText(this);
return finder.nextIndex() >= 0;
}
}
}
/**
* Almost ready-to-expose implementation serving both Python
* startswith( prefix [, start [, end ]] ) and
* endswith( suffix [, start [, end ]] ). An extra boolean argument specifies which
* to implement on a given call, that is, whether the target is a suffix or prefix. The target
* may also be a tuple of targets.
*
* @param target prefix or suffix sequence to find (of a type viewable as a byte sequence) or a
* tuple of those.
* @param ostart of slice to search.
* @param oend of slice to search.
* @param endswith true if we are doing endswith, false if startswith.
* @return true if and only if this byte array ends with (one of) target.
*/
protected final synchronized boolean basebytes_starts_or_endswith(PyObject target,
PyObject ostart, PyObject oend, boolean endswith) {
/*
* This cheap 'endswith' trick saves us from maintaining two almost identical methods and
* mirrors CPython's _bytearray_tailmatch().
*/
int[] index = indicesEx(ostart, oend); // [ start, end, 1, end-start ]
if (target instanceof PyTuple) {
// target is a tuple of suffixes/prefixes and only one need match
for (PyObject t : ((PyTuple)target).getList()) {
if (match(t, index[0], index[3], endswith)) {
return true;
}
}
return false; // None of them matched
} else {
StringBuilder fmt = new StringBuilder(endswith ? "endswith" : "startswith");
fmt.append(" first arg must be bytes or a tuple of bytes, not '%s'");
return match(target, index[0], index[3], endswith, fmt.toString());
}
}
/**
* Test whether the slice [pos:pos+n] of this byte array matches the given target
* object (accessed as a {@link PyBuffer}) at one end or the orher. That is, if
* endswith==false test whether the bytes from index pos match all the
* bytes of the target; if endswith==false test whether the bytes up to index
* pos+n-1 match all the bytes of the target. By implication, the test returns
* false if the target is bigger than n. The caller guarantees that the slice
* [pos:pos+n] is within the byte array.
*
* @param target pattern to match
* @param pos at which to start the comparison
* @return true if and only if the slice [offset:] matches the given target
*/
private boolean match(PyObject target, int pos, int n, boolean endswith) {
return match(target, pos, n, endswith, "a bytes-like object is required, not '%s'");
}
private boolean match(PyObject target, int pos, int n, boolean endswith, String error) {
// Error if not something we can treat as a view of bytes
try (PyBuffer vt = getViewOrError(target, error)) {
int j = 0, len = vt.getLen();
if (!endswith) {
// Match is at the start of the range [pos:pos+n]
if (len > n) {
return false;
}
} else {
// Match is at the end of the range [pos:pos+n]
j = n - len;
if (j < 0) {
return false;
}
}
// Last resort: we have actually to look at the bytes!
j += offset + pos;
for (int i = 0; i < len; i++) {
if (storage[j++] != vt.byteAt(i)) {
return false;
}
}
return true; // They must all have matched
}
}
/**
* Helper to convert [ostart:oend] to integers with slice semantics relative to this byte array.
* The retruned array of ints contains [ start, end, 1, end-start ].
*
* @param ostart of slice to define.
* @param oend of slice to define.
* @return [ start, end, 1, end-start ]
*/
private int[] indicesEx(PyObject ostart, PyObject oend) {
// Convert [ostart:oend] to integers with slice semantics relative to this byte array
PySlice s = new PySlice(ostart, oend, null);
return s.indicesEx(size); // [ start, end, 1, end-start ]
}
/**
* Present the bytes of a byte array, with no decoding, as a Java String. The bytes are treated
* as unsigned character codes, and copied to the to the characters of a String with no change
* in ordinal value. This could also be described as 'latin-1' or 'ISO-8859-1' decoding of the
* byte array to a String, since this character encoding is numerically equal to Unicode.
*
* @return the byte array as a String
*/
@Override
public synchronized String asString() {
char[] buf = new char[size];
int j = offset + size;
for (int i = size; --i >= 0;) {
buf[i] = (char)(0xff & storage[--j]);
}
return new String(buf);
}
/**
* Decode the byte array to a Unicode string according to the default encoding. The returned
* PyObject should be a PyUnicode, since the default codec is well-behaved.
*
* @return object containing the decoded characters
*/
public PyObject decode() {
return decode(null, null);
}
/**
* Decode the byte array to a Unicode string according to the specified encoding and default
* error policy. The returned PyObject will usually be a PyUnicode, but in practice
* it is whatever the decode method of the codec decides.
*
* @param encoding the name of the codec (uses default codec if null)
* @return object containing the decoded characters
*/
public PyObject decode(String encoding) {
return decode(encoding, null);
}
/**
* Decode the byte array to a Unicode string according to the specified encoding and error
* policy. The returned PyObject will usually be a PyUnicode, but in practice it is
* whatever the decode method of the codec decides.
*
* @param encoding the name of the codec (uses default codec if null)
* @param errors the name of the error policy (uses 'strict' if null)
* @return object containing the decoded characters
*/
public PyObject decode(String encoding, String errors) {
/*
* Provide a Python str input to the decode method of a codec, which in v2.7
* expects a PyBytes. (In Python 3k the codecs decode from the bytes type, so
* we can pass this directly.)
*/
PyBytes this_ = new PyBytes(this.asString());
return codecs.decode(this_, encoding, errors);
}
/**
* Ready-to-expose implementation of decode( [ encoding [, errors ]] )
*
* @param args Python argument list
* @param keywords Assocaited keywords
* @return
*/
protected final PyObject basebytes_decode(PyObject[] args, String[] keywords) {
ArgParser ap = new ArgParser("decode", args, keywords, "encoding", "errors");
String encoding = ap.getString(0, "utf-8");
String errors = ap.getString(1, null);
return decode(encoding, errors);
}
/**
* Convenience method to create a TypeError PyException with the message
* "can't concat {type} to {toType}"
*
* @param type
* @param toType
* @return PyException (TypeError) as detailed
*/
static PyException ConcatenationTypeError(PyType type, PyType toType) {
String fmt = "can't concat %s to %s";
return Py.TypeError(String.format(fmt, type.fastGetName(), toType.fastGetName()));
}
/**
* Support for pickling byte arrays: reduce a byte array to the actual type, arguments for
* (re-)construction of the object, and the dictionary of any user-defined sub-class.
*
* @return PyTuple that is first stage in pickling byte array
*/
@Override
public PyObject __reduce__() {
return basebytes___reduce__();
}
/**
* Ready-to-expose implementation of Python __reduce__() method used in pickle (persistence) of
* Python objects.
*
* @return required tuple of type, arguments needed by init, and any user-added attributes.
*/
final PyTuple basebytes___reduce__() {
PyUnicode encoded = new PyUnicode(this.asString());
PyObject args = new PyTuple(encoded, getPickleEncoding());
PyObject dict = __findattr__("__dict__");
return new PyTuple(getType(), args, (dict != null) ? dict : Py.None);
}
private static PyUnicode PICKLE_ENCODING;
/**
* Name the encoding effectively used in __reduce__() suport for pickling: this choice is
* hard-coded in CPython as "latin-1".
*/
private static final PyUnicode getPickleEncoding() {
if (PICKLE_ENCODING == null) {
PICKLE_ENCODING = new PyUnicode("latin-1");
}
return PICKLE_ENCODING;
}
/*
* ============================================================================================
* Python API for find and replace operations
* ============================================================================================
*
* A large part of the CPython bytearray.c is devoted to replace( old, new [, count ] ). The
* special section here reproduces that in Java, but whereas CPython makes heavy use of the
* buffer API and C memcpy(), we use PyBuffer.copyTo. The logic is much the same, however, even
* down to variable names.
*/
/**
* The very simplest kind of find operation: return the index in the byte array of the first
* occurrence of the byte value
*
* @param b byte to search for
* @return index in the byte array (0..size-1) or -1 if not found
*/
protected int index(byte b) {
int limit = offset + size;
for (int p = offset; p < limit; p++) {
if (storage[p] == b) {
return p - offset;
}
}
return -1;
}
/**
* This class implements the Boyer-Moore-Horspool Algorithm for find a pattern in text, applied
* to byte arrays. The BMH algorithm uses a table of bad-character skips derived from the
* pattern. The bad-character skips table tells us how far from the end of the pattern is a byte
* that might match the text byte currently aligned with the end of the pattern. For example,
* suppose the pattern ("panama") is at position 6:
*
*
* 1 2 3
* 0123456789012345678901234567890
* Text: a man, a map, a panama canal
* Pattern: panama
*
*
* This puts the 'p' of 'map' against the last byte 'a' of the pattern. Rather than testing the
* pattern, we will look up 'p' in the skip table. There is an 'p' just 5 steps from the end of
* the pattern, so we will move the pattern 5 places to the right before trying to match it.
* This allows us to move in large strides through the text.
*/
protected static class Finder {
/**
* Construct a Finder object that may be used (repeatedly) to find matches with the pattern
* in text (arrays of bytes).
*
* @param pattern A vew that presents the pattern as an array of bytes
*/
public Finder(PyBuffer pattern) {
this.pattern = pattern;
}
/**
* Mask defining how many of the bits of each byte are used when looking up the skip, used
* like: skip = skipTable[MASK & currentByte].
*/
private static final byte MASK = 0x1f;
/**
* Table for looking up the skip, used like:
* skip = skipTable[MASK & currentByte].
*/
protected int[] skipTable = null;
/**
* This method creates a compressed table of bad-character skips from the pattern. The entry
* for a given byte value tells us how far it is from the end of the pattern, being 0 for
* the actual last byte, or is equal to the length of the pattern if the byte does not occur
* in the pattern. The table is compressed in that only the least-significant bits of the
* byte index are used. In the case where 5 bits are used, the table is only 32 elements
* long, rather than (as it might be) 256 bytes, the number of distinct byte values.
*/
protected int[] calculateSkipTable() {
int[] skipTable = new int[MASK + 1];
int m = pattern.getLen();
// Default skip is the pattern length: for bytes not in the pattern.
Arrays.fill(skipTable, m);
// For each byte in the pattern, make an entry for how far it is from the end.
// The last occurrence of the byte value prevails.
for (int i = 0; i < m; i++) {
skipTable[MASK & pattern.byteAt(i)] = m - i - 1;
}
return skipTable;
}
/**
* Set the text to be searched in successive calls to nextIndex(), where the
* text is the entire array text[].
*
* @param text to search
*/
public void setText(byte[] text) {
setText(text, 0, text.length);
}
/**
* Set the text to be searched in successive calls to nextIndex(), where the
* text is the entire byte array text.
*
* @param text to search
*/
public void setText(BaseBytes text) {
setText(text.storage, text.offset, text.size);
}
/**
* Set the text to be searched in successive calls to nextIndex(), where the
* text is effectively only the bytes text[start] to
* text[start+size-1] inclusive.
*
* @param text to search
* @param start first position to consider
* @param size number of bytes within which to match
*/
public void setText(byte[] text, int start, int size) {
this.text = text;
this.left = start;
right = start + size - pattern.getLen() + 1; // Last pattern position + 1
/*
* We defer computing the table from construction to this point mostly because
* calculateSkipTable() may be overridden, and we want to use the right one.
*/
if (pattern.getLen() > 1 && skipTable == null) {
skipTable = calculateSkipTable();
}
}
protected final PyBuffer pattern;
protected byte[] text = emptyStorage; // in case we forget to setText()
protected int left = 0; // Leftmost pattern position to use
protected int right = 0; // Rightmost pattern position + 1
/**
* Return the index in the text array where the preceding pattern match ends (one beyond the
* last character matched), which may also be one beyond the effective end ofthe text.
* Between a call to setText() and the first call to nextIndex() return the
* start position.
*
* f.setText(text);
* int p = f.nextIndex();
* int q = f.currIndex();
* // The range text[p:q] is the matched segment.
*
*
* @return index beyond end of last match found, i.e. where search will resume
*/
public int currIndex() {
return left;
}
/**
* Find the next index in the text array where the pattern starts. Successive calls to
* nextIndex() return the successive (non-overlapping) occurrences of the
* pattern in the text.
*
* @return matching index or -1 if no (further) occurrences found
*/
public int nextIndex() {
int m = pattern.getLen();
if (skipTable != null) { // ... which it will not be if m>1 and setText() was called
/*
* Boyer-Moore-Horspool Algorithm using a Bloom array. Based on CPython stringlib,
* but without avoiding a proper bad character skip array.
*/
for (int i = left; i < right; /* i incremented in loop */) {
/*
* Unusually, start by looking up the skip. If text[i+m-1] matches, skip==0,
* although it will also be zero if only the least-significant bits match.
*/
int skip = skipTable[MASK & text[i + (m - 1)]];
if (skip == 0) {
// Possible match, but we only used the least-significant bits: check all
int j, k = i;
for (j = 0; j < m; j++) { // k = i + j
if (text[k++] != pattern.byteAt(j)) {
break;
}
}
// If we tested all m bytes, that's a match.
if (j == m) {
left = k; // Start at text[i+m] next time we're called
return i;
}
// It wasn't a match: advance by one
i += 1;
} else {
/*
* The last byte of the pattern does not match the corresponding text byte.
* Skip tells us how far back down the pattern is a potential match, so how
* far it is safe to advance before we do another last-byte test.
*/
i += skip;
}
}
} else if (m == 1) {
// Special case of single byte search
byte b = pattern.byteAt(0);
for (int i = left; i < right; i++) {
if (text[i] == b) {
left = i + 1; // Start at text[i+1] next time we're called
return i;
}
}
} else {
// Special case of search for empty (m==0) byte string
int i = left;
if (i <= right) {
// It is an honorary match - even when left==right
left = i + 1;
return i;
}
}
// All sections fall out here if they do not find a match (even m==0)
return -1;
}
/**
* Count the non-overlapping occurrences of the pattern in the text.
*
* @param text to search
* @return number of occurrences
*/
public int count(byte[] text) {
return count(text, 0, text.length, Integer.MAX_VALUE);
}
/**
* Count the non-overlapping occurrences of the pattern in the text, where the text is
* effectively only the bytes text[start] to text[start+size-1]
* inclusive.
*
* @param text to search
* @param start first position to consider
* @param size number of bytes within which to match
* @return number of occurrences
*/
public int count(byte[] text, int start, int size) {
return count(text, start, size, Integer.MAX_VALUE);
}
/**
* Count the non-overlapping occurrences of the pattern in the text, where the text is
* effectively only the bytes text[start] to text[start+size-1].
*
* @param text to search
* @param start first position to consider
* @param size number of bytes within which to match
* @param maxcount limit to number of occurrences to find
* @return number of occurrences
*/
public int count(byte[] text, int start, int size, int maxcount) {
setText(text, start, size);
int count = 0;
while (count < maxcount && nextIndex() >= 0) {
count++;
}
return count;
}
}
/**
* This class is the complement of {@link Finder} and implements the Boyer-Moore-Horspool
* Algorithm adapted for right-to-left search for a pattern in byte arrays.
*/
protected static class ReverseFinder extends Finder {
/**
* Construct a ReverseFinder object that may be used (repeatedly) to find matches with the
* pattern in text (arrays of bytes).
*
* @param pattern A vew that presents the pattern as an array of bytes
*/
public ReverseFinder(PyBuffer pattern) {
super(pattern);
}
/**
* Mask defining how many of the bits of each byte are used when looking up the skip, used
* like: skip = skipTable[MASK & currentByte].
* MASK is different from
* super.MASK calculateSkipTable() and nextIndex()
* must both be overridden consistently to use the local definition.
*/
private static final byte MASK = 0x1f;
/**
* This method creates a compressed table of bad-character skips from the pattern for
* reverse-searching. The entry for a given byte value tells us how far it is from the start
* of the pattern, being 0 for the actual first byte, or is equal to the length of the
* pattern if the byte does not occur in the pattern. The table is compressed in that only
* the least-significant bits of the byte index are used. In the case where 5 bits are used,
* the table is only 32 elements long, rather than (as it might be) 256 bytes, the number of
* distinct byte values.
*/
@Override
protected int[] calculateSkipTable() {
int[] skipTable = new int[MASK + 1];
int m = pattern.getLen();
// Default skip is the pattern length: for bytes not in the pattern.
Arrays.fill(skipTable, m);
// For each byte in the pattern, make an entry for how far it is from the start.
// The last occurrence of the byte value prevails.
for (int i = m; --i >= 0;) {
skipTable[MASK & pattern.byteAt(i)] = i;
}
return skipTable;
}
/**
*
* @return the new effective end of the text
*/
@Override
public int currIndex() {
return right + pattern.getLen() - 1;
}
/**
* Find the next index in the text array where the pattern starts, but working backwards.
* Successive calls to nextIndex() return the successive (non-overlapping)
* occurrences of the pattern in the text.
*
* @return matching index or -1 if no (further) occurrences found
*/
@Override
public int nextIndex() {
int m = pattern.getLen();
if (skipTable != null) { // ... which it will not be if m>1 and setText() was called
/*
* Boyer-Moore-Horspool Algorithm using a Bloom array. Based on CPython stringlib,
* but without avoiding a proper bad character skip array.
*/
for (int i = right - 1; i >= left; /* i decremented in loop */) {
/*
* Unusually, start by looking up the skip. If text[i] matches, skip==0,
* although it will also be zero if only the least-significant bits match.
*/
int skip = skipTable[MASK & text[i]];
if (skip == 0) {
// Possible match, but we only used the least-significant bits: check all
int j, k = i;
for (j = 0; j < m; j++) { // k = i + j
if (text[k++] != pattern.byteAt(j)) {
break;
}
}
// If we tested all m bytes, that's a match.
if (j == m) {
right = i - m + 1; // Start at text[i-m] next time we're called
return i;
}
// It wasn't a match: move left by one
i -= 1;
} else {
/*
* The first byte of the pattern does not match the corresponding text byte.
* Skip tells us how far up the pattern is a potential match, so how far
* left it is safe to move before we do another first-byte test.
*/
i -= skip;
}
}
} else if (m == 1) {
// Special case of single byte search
byte b = pattern.byteAt(0);
for (int i = right; --i >= left;) {
if (text[i] == b) {
right = i; // Start at text[i-1] next time we're called
return i;
}
}
} else {
// Special case of search for empty (m==0) byte string
int i = right;
if (--i >= left) {
// It is an honorary match - even when right==left
right = i;
return i;
}
}
// All sections fall out here if they do not find a match (even m==0)
return -1;
}
}
/**
* Class for quickly determining whether a given byte is a member of a defined set. this class
* provides an efficient mechanism when a lot of bytes must be tested against the same set.
*/
protected static class ByteSet {
protected final long[] map = new long[4]; // 256 bits
/**
* Construct a set from a byte oriented view.
*
* @param bytes to be in the set.
*/
public ByteSet(PyBuffer bytes) {
int n = bytes.getLen();
for (int i = 0; i < n; i++) {
int c = bytes.intAt(i);
long mask = 1L << c; // Only uses low 6 bits of c (JLS)
int word = c >> 6;
map[word] |= mask;
}
}
/**
* Test to see if the byte is in the set.
*
* @param b value of the byte
* @return true iff b is in the set
*/
public boolean contains(byte b) {
int word = (b & 0xff) >> 6;
long mask = 1L << b; // Only uses low 6 bits of b (JLS)
return (map[word] & mask) != 0;
}
/**
* Test to see if the byte (expressed as an integer) is in the set.
*
* @param b integer value of the byte
* @return true iff b is in the set
* @throws ArrayIndexOutOfBoundsException if b>255 or b<0
*/
public boolean contains(int b) {
int word = b >> 6;
long mask = 1L << b; // Only uses low 6 bits of b (JLS)
return (map[word] & mask) != 0;
}
}
/**
* Convenience routine producing a ValueError for "empty separator" if the PyBuffer is of an
* object with zero length, and returning the length otherwise.
*
* @param separator view to test
* @return the length of the separator
* @throws PyException if the PyBuffer is zero length
*/
protected final static int checkForEmptySeparator(PyBuffer separator) throws PyException {
int n = separator.getLen();
if (n == 0) {
throw Py.ValueError("empty separator");
}
return n;
}
/**
* Return the index [0..size-1] of the leftmost byte not matching any in byteSet,
* or size if they are all strippable.
*
* @param byteSet set of byte values to skip over
* @return index of first unstrippable byte
*/
protected int lstripIndex(ByteSet byteSet) {
int limit = offset + size;
// Run up the storage checking against byteSet (or until we hit the end)
for (int left = offset; left < limit; left++) {
// Check against the byteSet to see if this is one to strip.
if (!byteSet.contains(storage[left])) {
// None of them matched: this is the leftmost non-strippable byte
return left - offset;
}
}
// We went through the whole array and they can all be stripped
return size;
}
/**
* Return the index [0..size-1] of the leftmost non-whitespace byte, or size if
* they are all whitespace.
*
* @return index of first non-whitespace byte
*/
protected int lstripIndex() {
int limit = offset + size;
// Run up the storage until non-whitespace (or hit end)t
for (int left = offset; left < limit; left++) {
if (!Character.isWhitespace(storage[left] & 0xff)) {
return left - offset;
}
}
// We went through the whole array and they are all whitespace
return size;
}
/**
* Return the index [0..size-1] such that all bytes from here to the right match one in
* byteSet, that is, the index of the matching tail, or size if there
* is no matching tail byte.
*
* @param byteSet set of byte values to strip
* @return index of strippable tail
*/
protected int rstripIndex(ByteSet byteSet) {
// Run down the storage checking the next byte against byteSet (or until we hit the start)
for (int right = offset + size; right > offset; --right) {
// Check against the byteSet to see if this is one to strip.
if (!byteSet.contains(storage[right - 1])) {
// None of them matched: this is the first strippable byte in the tail
return right - offset;
}
}
// We went through the whole array and they can all be stripped
return 0;
}
/**
* Return the index [0..size-1] such that all bytes from here to the right are whitespace, that
* is, the index of the whitespace tail, or size if there is no whitespace tail.
*
* @return index of strippable tail
*/
protected int rstripIndex() {
// Run down the storage until next is non-whitespace (or hit start)
for (int right = offset + size; right > offset; --right) {
if (!Character.isWhitespace(storage[right - 1] & 0xff)) {
return right - offset;
}
}
// We went through the whole array and they are all whitespace
return size;
}
/**
* Ready-to-expose implementation of Python count( sub [, start [, end ]] ). Return
* the number of non-overlapping occurrences of sub in the range [start, end].
* Optional arguments start and end (which may be null or
* Py.None ) are interpreted as in slice notation.
*
* @param sub bytes to find
* @param ostart of slice to search
* @param oend of slice to search
* @return count of occurrences of sub within this byte array
*/
final int basebytes_count(PyObject sub, PyObject ostart, PyObject oend) {
try (PyBuffer vsub = getViewOrError(sub)) {
Finder finder = new Finder(vsub);
// Convert [ostart:oend] to integers
int[] index = indicesEx(ostart, oend); // [ start, end, 1, end-start ]
// Make this slice the thing we count within.
return finder.count(storage, offset + index[0], index[3]);
}
}
/**
* Ready-to-expose implementation of Python find( sub [, start [, end ]] ). Return
* the lowest index in the byte array where byte sequence sub is found, such that
* sub is contained in the slice [start:end]. Arguments
* start and end (which may be null or
* Py.None ) are interpreted as in slice notation. Return -1 if sub is
* not found.
*
* @param sub bytes to find
* @param ostart of slice to search
* @param oend of slice to search
* @return index of start of occurrence of sub within this byte array
*/
final int basebytes_find(PyObject sub, PyObject ostart, PyObject oend) {
try (PyBuffer vsub = getViewOrError(sub)) {
Finder finder = new Finder(vsub);
return find(finder, ostart, oend);
}
}
/**
* Almost ready-to-expose implementation of Python class method fromhex(string).
* This assigns a value to the passed byte array object from a string of two-digit hexadecimal
* numbers. Spaces (but not whitespace in general) are acceptable around the numbers, not
* within. Non-hexadecimal characters or un-paired hex digits raise a ValueError.
* Example:
*
*
* bytearray.fromhex('B9 01EF') -> * bytearray(b'\xb9\x01\xef')."
*
*
* @param result to receive the decoded values
* @param hex specification of the bytes
* @throws PyException (ValueError) if non-hex characters, or isolated ones, are encountered
*/
static void basebytes_fromhex(BaseBytes result, String hex) throws PyException {
final int hexlen = hex.length();
result.newStorage(hexlen / 2); // Over-provides storage if hex has spaces
// We might produce a ValueError with this message.
String fmt = "non-hexadecimal number found in fromhex() arg at position %d";
// Output pointer in the result array
byte[] r = result.storage;
int p = result.offset;
/*
* When charAt(i) is a hex digit, we will always access hex.charAt(i+1), and catch the
* exception if that is beyond the end of the array.
*/
for (int i = 0; i < hexlen; /* i incremented in loop by 1 or 2 */) {
char c = hex.charAt(i++);
if (c != ' ') {
try {
// hexDigit throws IllegalArgumentException if non-hexadecimal character found
int value = hexDigit(c);
c = hex.charAt(i++); // Throw IndexOutOfBoundsException if no second digit
value = (value << 4) + hexDigit(c);
r[p++] = (byte)value;
} catch (IllegalArgumentException e) {
throw Py.ValueError(String.format(fmt, i - 1));
} catch (IndexOutOfBoundsException e) {
throw Py.ValueError(String.format(fmt, i - 2));
}
}
}
result.size = p - result.offset;
}
/**
* Translate one character to its hexadecimal value.
*
* @param c to translate
* @return value 0-15
* @throws IllegalArgumentException if c is not '0-'9', 'A'-'F' or 'a'-'f'.
*/
private static int hexDigit(char c) throws IllegalArgumentException {
int result = c - '0';
if (result >= 0) {
if (result < 10) { // digit
return result;
} else {
// If c is a letter, c & 0xDF is its uppercase.
// If c is not a letter c & 0xDF is still not a letter.
result = (c & 0xDF) - 'A';
if (result >= 0 && result < 6) { // A-F or a-f
return result + 10;
}
}
}
throw new IllegalArgumentException();
}
/**
* Almost ready-to-expose implementation of Python join(iterable).
*
* @param iter iterable of objects capable of being regarded as byte arrays
* @return the byte array that is their join
*/
final synchronized PyByteArray basebytes_join(Iterable extends PyObject> iter) {
Listpartition(sep), returning a 3-tuple of byte arrays (of
* the same type as this).
*
* Split the string at the first occurrence of sep, and return a 3-tuple containing
* the part before the separator, the separator itself, and the part after the separator. If the
* separator is not found, return a 3-tuple containing the string itself, followed by two empty
* byte arrays.
* PyTuple returned by this method are instances of the same
* actual type as this.
*
* @param sep the separator on which to partition this byte array
* @return a tuple of (head, separator, tail)
*/
public PyTuple partition(PyObject sep) {
return basebytes_partition(sep);
}
/**
* Ready-to-expose implementation of Python partition(sep).
* PyTuple returned by this method are instances of the same
* actual type as this.
*
* @param sep the separator on which to partition this byte array
* @return a tuple of (head, separator, tail)
*/
final synchronized PyTuple basebytes_partition(PyObject sep) {
// View the separator as a byte array (or error if we can't)
try (PyBuffer separator = getViewOrError(sep)) {
// Create a Finder for the separator and set it on this byte array
int n = checkForEmptySeparator(separator);
Finder finder = new Finder(separator);
finder.setText(this);
// We only use it once, to find the first occurrence
int p = finder.nextIndex() - offset;
if (p >= 0) {
// Found at p, so we'll be returning ([0:p], [p:p+n], [p+n:])
return partition(p, p + n);
} else {
// Not found: choose values leading to ([0:size], '', '')
return partition(size, size);
}
}
}
/**
* Construct return value for implementation of Python partition(sep) or
* rpartition(sep), returns [0:p], [p:q], [q:]
* PyTuple returned by this method are instances of the same
* actual type as this.
*
* @param p start of separator
* @param q start of tail
* @return ([0:p], [p:q], [q:])
*/
private PyTuple partition(int p, int q) {
BaseBytes head = this.getslice(0, p);
BaseBytes sep = this.getslice(p, q);
BaseBytes tail = this.getslice(q, size);
return new PyTuple(head, sep, tail);
}
/**
* Ready-to-expose implementation of Python rfind( sub [, start [, end ]] ). Return
* the highest index in the byte array where byte sequence sub is found, such that
* sub is contained in the slice [start:end]. Arguments
* start and end (which may be null or
* Py.None) are interpreted as in slice notation. Return -1 if sub is
* not found.
*
* @param sub bytes to find
* @param ostart of slice to search
* @param oend of slice to search
* @return index of start of occurrence of sub within this byte array
*/
final int basebytes_rfind(PyObject sub, PyObject ostart, PyObject oend) {
try (PyBuffer vsub = getViewOrError(sub)) {
Finder finder = new ReverseFinder(vsub);
return find(finder, ostart, oend);
}
}
/**
* Common code for Python find( sub [, start [, end ]] ) and
* rfind( sub [, start [, end ]] ). Return the lowest or highest index in the byte
* array where byte sequence used to construct finder is found. The particular type
* (plain Finder or ReverseFinder) determines the direction.
*
* @param finder for the bytes to find, sometime forwards, sometime backwards
* @param ostart of slice to search
* @param oend of slice to search
* @return index of start of occurrence of sub within this byte array
*/
private final int find(Finder finder, PyObject ostart, PyObject oend) {
// Convert [ostart:oend] to integers
int[] index = indicesEx(ostart, oend); // [ start, end, 1, end-start ]
// Make this slice the thing we search. Note finder works with Java index in storage.
finder.setText(storage, offset + index[0], index[3]);
int result = finder.nextIndex();
// Compensate for the offset in returning a value
return (result < 0) ? -1 : result - offset;
}
/**
* An almost ready-to-expose implementation of Python
* replace( old, new [, count ] ), returning a PyByteArray with all
* occurrences of sequence oldB replaced by newB. If the optional
* argument count is given, only the first count occurrences are
* replaced.
*
* @param oldB sequence to find
* @param newB relacement sequence
* @param maxcount maximum occurrences are replaced or < 0 for all
* @return result of replacement as a new PyByteArray
*/
final synchronized PyByteArray basebytes_replace(PyObject oldB, PyObject newB, int maxcount) {
// View the to and from as byte arrays (or error if we can't)
try (PyBuffer to = getViewOrError(newB); PyBuffer from = getViewOrError(oldB)) {
/*
* The logic of the first section is copied exactly from CPython in order to get the
* same behaviour. The "headline" description of replace is simple enough but the corner
* cases can be surprising:
*/
// >>> bytearray(b'hello').replace(b'',b'-')
// bytearray(b'-h-e-l-l-o-')
// >>> bytearray(b'hello').replace(b'',b'-',3)
// bytearray(b'-h-e-llo')
// >>> bytearray(b'hello').replace(b'',b'-',1)
// bytearray(b'-hello')
// >>> bytearray().replace(b'',b'-')
// bytearray(b'-')
// >>> bytearray().replace(b'',b'-',1) # ?
// bytearray(b'')
if (maxcount < 0) {
maxcount = Integer.MAX_VALUE;
} else if (maxcount == 0 || size == 0) {
// nothing to do; return the original bytes
return new PyByteArray(this);
}
int from_len = from.getLen();
int to_len = to.getLen();
if (maxcount == 0 || (from_len == 0 && to_len == 0)) {
// nothing to do; return the original bytes
return new PyByteArray(this);
} else if (from_len == 0) {
// insert the 'to' bytes everywhere.
// >>> "Python".replace("", ".")
// '.P.y.t.h.o.n.'
return replace_interleave(to, maxcount);
} else if (size == 0) {
// Special case for "".replace("", "A") == "A"
return new PyByteArray(to);
} else if (to_len == 0) {
// Delete occurrences of the 'from' bytes
return replace_delete_substring(from, maxcount);
} else if (from_len == to_len) {
// Result is same size as this byte array, whatever the number of replacements.
return replace_substring_in_place(from, to, maxcount);
} else {
// Otherwise use the generic algorithm
return replace_substring(from, to, maxcount);
}
}
}
/*
* Algorithms for different cases of string replacement. CPython also has specialisations for
* when 'from' or 'to' or both are single bytes. This may also be worth doing in Java when the
* 'to' is a single byte. (The 'from' is turned into a Finder object which already makes a
* special case of single bytes.)
*/
/**
* Helper for {@link #basebytes_replace(PyObject, PyObject, int)} implementing the general case
* of byte-string replacement when the new and old strings have different lengths.
*
* @param from byte-string to find and replace
* @param to replacement byte-string
* @param maxcount maximum number of replacements to make
* @return the result as a new PyByteArray
*/
private PyByteArray replace_substring(PyBuffer from, PyBuffer to, int maxcount) {
// size>=1, len(from)>=1, len(to)>=1, maxcount>=1
// Initialise a Finder for the 'from' pattern
Finder finder = new Finder(from);
int count = finder.count(storage, offset, size, maxcount);
if (count == 0) {
// no matches
return new PyByteArray(this);
}
int from_len = from.getLen();
int to_len = to.getLen();
// Calculate length of result and check for too big
long result_len = size + count * (to_len - from_len);
byte[] r; // Build result here
try {
// Good to go. As we know the ultimate size, we can do all our allocation in one
r = new byte[(int)result_len];
} catch (OutOfMemoryError e) {
throw Py.OverflowError("replace bytes is too long");
}
int p = offset; // Copy-from index in this.storage
int rp = 0; // Copy-to index in r
// Reset the Finder on the (active part of) this.storage
finder.setText(storage, p, size);
while (count-- > 0) {
// First occurrence of 'from' bytes in storage
int q = finder.nextIndex();
if (q < 0) {
// Never happens because we've got count right
break;
}
// Output the stretch up to the discovered occurrence of 'from'
int length = q - p;
if (length > 0) {
System.arraycopy(storage, p, r, rp, length);
rp += length;
}
// Skip over the occurrence of the 'from' bytes
p = q + from_len;
// Output a copy of 'to'
to.copyTo(r, rp);
rp += to_len;
}
// Copy the rest of the original string
int length = size + offset - p;
if (length > 0) {
System.arraycopy(storage, p, r, rp, length);
rp += length;
}
// Make r[] the storage of a new bytearray
return new PyByteArray(r);
}
/**
* Handle the interleaving case b'hello'.replace(b'', b'..') = b'..h..e..l..l..o..' At the call
* site we are guaranteed: size>=1, to.getLen()>=1, maxcount>=1
*
* @param to the replacement bytes as a byte-oriented view
* @param maxcount upper limit on number of insertions
*/
private PyByteArray replace_interleave(PyBuffer to, int maxcount) {
// Insert one at the beginning and one after every byte, or as many as allowed
int count = size + 1;
if (maxcount < count) {
count = maxcount;
}
int to_len = to.getLen();
// Calculate length of result and check for too big
long result_len = ((long)count) * to_len + size;
byte[] r; // Build result here
try {
// Good to go. As we know the ultimate size, we can do all our allocation in one
r = new byte[(int)result_len];
} catch (OutOfMemoryError e) {
throw Py.OverflowError("replace bytes is too long");
}
int p = offset; // Copy-from index in this.storage
int rp = 0; // Copy-to index in r
// Lay the first one down (guaranteed this will occur as count>=1)
to.copyTo(r, rp);
rp += to_len;
// And the rest
for (int i = 1; i < count; i++) {
r[rp++] = storage[p++];
to.copyTo(r, rp);
rp += to_len;
}
// Copy the rest of the original string
int length = size + offset - p;
if (length > 0) {
System.arraycopy(storage, p, r, rp, length);
rp += length;
}
// Make r[] the storage of a new bytearray
return new PyByteArray(r);
}
/**
* Helper for {@link #basebytes_replace(PyObject, PyObject, int)} implementing the special case
* of byte-string replacement when the new string has zero length, i.e. deletion.
*
* @param from byte-string to find and delete
* @param maxcount maximum number of deletions to make
* @return the result as a new PyByteArray
*/
private PyByteArray replace_delete_substring(PyBuffer from, int maxcount) {
// len(self)>=1, len(from)>=1, to="", maxcount>=1
// Initialise a Finder for the 'from' pattern
Finder finder = new Finder(from);
int count = finder.count(storage, offset, size, maxcount);
if (count == 0) {
// no matches
return new PyByteArray(this);
}
int from_len = from.getLen();
long result_len = size - (count * from_len);
assert (result_len >= 0);
byte[] r; // Build result here
try {
// Good to go. As we know the ultimate size, we can do all our allocation in one
r = new byte[(int)result_len];
} catch (OutOfMemoryError e) {
throw Py.OverflowError("replace bytes is too long");
}
int p = offset; // Copy-from index in this.storage
int rp = 0; // Copy-to index in r
// Reset the Finder on the (active part of) this.storage
finder.setText(storage, offset, size);
while (count-- > 0) {
// First occurrence of 'from' bytes in storage
int q = finder.nextIndex();
if (q < 0) {
// Never happens because we've got count right
break;
}
// Output the stretch up to the discovered occurrence of 'from'
int length = q - p;
if (length > 0) {
System.arraycopy(storage, p, r, rp, length);
rp += length;
}
// Skip over the occurrence of the 'from' bytes
p = q + from_len;
}
// Copy the rest of the original string
int length = size + offset - p;
if (length > 0) {
System.arraycopy(storage, p, r, rp, length);
rp += length;
}
// Make r[] the storage of a new bytearray
return new PyByteArray(r);
}
/**
* Helper for {@link #basebytes_replace(PyObject, PyObject, int)} implementing the special case
* of byte-string replacement when the new and old strings have the same length. The key
* observation here is that the result has the same size as this byte array, and we know this
* even without counting how many replacements we shall make.
*
* @param from byte-string to find and replace
* @param to replacement byte-string
* @param maxcount maximum number of replacements to make
* @return the result as a new PyByteArray
*/
private PyByteArray replace_substring_in_place(PyBuffer from, PyBuffer to, int maxcount) {
// len(self)>=1, len(from)==len(to)>=1, maxcount>=1
// Initialise a Finder for the 'from' pattern
Finder finder = new Finder(from);
int count = maxcount;
// The result will be this.size
byte[] r; // Build result here
try {
r = new byte[this.size];
} catch (OutOfMemoryError e) {
throw Py.OverflowError("replace bytes is too long");
}
System.arraycopy(storage, offset, r, 0, size);
// Change everything in-place: easiest if we search the destination
finder.setText(r);
while (count-- > 0) {
int q = finder.nextIndex(); // Note q is an index into result.storage
if (q < 0) {
// Normal exit because we discover actual count as we go along
break;
}
// Overwrite with 'to' the stretch corresponding to the matched 'from'
to.copyTo(r, q);
}
// Make r[] the storage of a new bytearray
return new PyByteArray(r);
}
/**
* Implementation of Python rpartition(sep), returning a 3-tuple of byte arrays (of
* the same type as this).
*
* Split the string at the rightmost occurrence of sep, and return a 3-tuple
* containing the part before the separator, the separator itself, and the part after the
* separator. If the separator is not found, return a 3-tuple containing two empty byte arrays,
* followed by the byte array itself.
* PyTuple returned by this method are instances of the same
* actual type as this.
*
* @param sep the separator on which to partition this byte array
* @return a tuple of (head, separator, tail)
*/
public PyTuple rpartition(PyObject sep) {
return basebytes_rpartition(sep);
}
/**
* Ready-to-expose implementation of Python rpartition(sep).
* PyTuple returned by this method are instances of the same
* actual type as this.
*
* @param sep the separator on which to partition this byte array
* @return a tuple of (head, separator, tail)
*/
final synchronized PyTuple basebytes_rpartition(PyObject sep) {
// View the separator as a byte array (or error if we can't)
try (PyBuffer separator = getViewOrError(sep)) {
// Create a Finder for the separtor and set it on this byte array
int n = checkForEmptySeparator(separator);
Finder finder = new ReverseFinder(separator);
finder.setText(this);
// We only use it once, to find the first (from the right) occurrence
int p = finder.nextIndex() - offset;
if (p >= 0) {
// Found at p, so we'll be returning ([0:p], [p:p+n], [p+n:])
return partition(p, p + n);
} else {
// Not found: choose values leading to ('', '', [0:size])
return partition(0, 0);
}
}
}
/**
* Implementation of Python rsplit(), that returns a list of the words in the byte
* array, using whitespace as the delimiter. See {@link #rsplit(PyObject, int)}.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @return PyList of byte arrays that result from the split
*/
public PyList rsplit() {
return basebytes_rsplit_whitespace(-1);
}
/**
* Implementation of Python rsplit(sep), that returns a list of the words in the
* byte array, using sep as the delimiter. See {@link #rsplit(PyObject, int)} for
* the semantics of the separator.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @return PyList of byte arrays that result from the split
*/
public PyList rsplit(PyObject sep) {
return basebytes_rsplit(sep, -1);
}
/**
* Implementation of Python rsplit(sep, maxsplit), that returns a list of the words
* in the byte array, using sep as the delimiter. If maxsplit is
* given, at most maxsplit splits are done (thus, the list will have at most
* maxsplit+1 elements). If maxsplit is not specified, then there is
* no limit on the number of splits (all possible splits are made).
* sep and maxcount are identical to those of
* split(sep, maxsplit) , except that splits are generated from the right (and
* pushed onto the front of the result list). The result is only different from that of
* split if maxcount limits the number of splits. For example,
*
*
* bytearray(b' 1 2 3 ').rsplit() returns
* [bytearray(b'1'), bytearray(b'2'), bytearray(b'3')], andbytearray(b' 1 2 3 ').rsplit(None, 1) returns
* [bytearray(b' 1 2'), bytearray(b'3')]PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
public PyList rsplit(PyObject sep, int maxsplit) {
return basebytes_rsplit(sep, maxsplit);
}
/**
* Ready-to-expose implementation of Python rsplit(sep, maxsplit), that returns a
* list of the words in the byte array, using sep as the delimiter. Use the defines
* whitespace semantics if sep is null.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final PyList basebytes_rsplit(PyObject sep, int maxsplit) {
if (sep == null || sep == Py.None) {
return basebytes_rsplit_whitespace(maxsplit);
} else {
return basebytes_rsplit_explicit(sep, maxsplit);
}
}
/**
* Implementation of Python rsplit(sep, maxsplit), that returns a list of the words
* in the byte array, using sep (which is not null) as the delimiter.
* If maxsplit>=0, at most maxsplit splits are done (thus, the list
* will have at most maxsplit+1 elements). If maxsplit<0, then
* there is no limit on the number of splits (all possible splits are made).
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final synchronized PyList basebytes_rsplit_explicit(PyObject sep, int maxsplit) {
// The separator may be presented as anything viewable as bytes
try (PyBuffer separator = getViewOrError(sep)) {
int n = checkForEmptySeparator(separator);
PyList result = new PyList();
// Use the Finder class to search in the storage of this byte array
Finder finder = new ReverseFinder(separator);
finder.setText(this);
int q = offset + size; // q points to "honorary separator"
int p;
// At this point storage[q-1] is the last byte of the rightmost unsplit word, or
// q=offset if there aren't any. While we have some splits left to do ...
while (q > offset && maxsplit-- != 0) {
// Delimit the word whose last byte is storage[q-1]
int r = q;
// Skip p backwards over the word and the separator
q = finder.nextIndex();
if (q < 0) {
p = offset;
} else {
p = q + n;
}
// storage[p] is the first byte of the word.
BaseBytes word = getslice(p - offset, r - offset);
result.add(0, word);
}
// Prepend the remaining unsplit text if any
if (q >= offset) {
BaseBytes word = getslice(0, q - offset);
result.add(0, word);
}
return result;
}
}
/**
* Implementation of Python rsplit(None, maxsplit), that returns a list of the
* words in the byte array, using whitespace as the delimiter. If maxsplit is
* given, at most maxsplit splits are done (thus, the list will have at most
* maxsplit+1 elements). If maxsplit is not specified, then there is no limit on
* the number of splits (all possible splits are made).
* None separator returns [].
* PyList returned by this method are instances of the same
* actual type as this/self.
*
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final synchronized PyList basebytes_rsplit_whitespace(int maxsplit) {
PyList result = new PyList();
int p, q; // Indexes of unsplit text and whitespace
// Scan backwards over trailing whitespace
for (q = offset + size; q > offset; --q) {
if (!isspace(storage[q - 1] & 0xff)) {
break;
}
}
// Note: bytearray().rsplit() = bytearray(b' ').rsplit() = []
// At this point storage[q-1] is the rightmost non-space byte, or
// q=offset if there aren't any. While we have some splits left ...
while (q > offset && maxsplit-- != 0) {
// Delimit the word whose last byte is storage[q-1]
// Skip p backwards over the non-whitespace
for (p = q; p > offset; --p) {
if (isspace(storage[p - 1] & 0xff)) {
break;
}
}
// storage[p] is the first byte of the word. (p=offset or storage[p-1] is whitespace.)
BaseBytes word = getslice(p - offset, q - offset);
result.add(0, word);
// Skip q backwards over the whitespace
for (q = p; q > offset; --q) {
if (!isspace(storage[q - 1] & 0xff)) {
break;
}
}
}
// Prepend the remaining unsplit text if any
if (q > offset) {
BaseBytes word = getslice(0, q - offset);
result.add(0, word);
}
return result;
}
/**
* Implementation of Python split(), that returns a list of the words in the byte
* array, using whitespace as the delimiter. See {@link #split(PyObject, int)}.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @return PyList of byte arrays that result from the split
*/
public PyList split() {
return basebytes_split_whitespace(-1);
}
/**
* Implementation of Python split(sep), that returns a list of the words in the
* byte array, using sep as the delimiter. See {@link #split(PyObject, int)} for
* the semantics of the separator.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @return PyList of byte arrays that result from the split
*/
public PyList split(PyObject sep) {
return basebytes_split(sep, -1);
}
/**
* Implementation of Python split(sep, maxsplit), that returns a list of the words
* in the byte array, using sep as the delimiter. If maxsplit is
* given, at most maxsplit splits are done. (Thus, the list will have at most
* maxsplit+1 elements). If maxsplit is not specified, then there is
* no limit on the number of splits (all possible splits are made).
* sep is given, consecutive delimiters are not grouped together and are deemed
* to delimit empty strings (for example, '1,,2'.split(',') returns
* ['1', '', '2']). The sep argument may consist of multiple
* characters (for example, '1<>2<>3'.split('<>') returns ['1',
* '2', '3']). Splitting an empty string with a specified separator [''].
* sep is not specified or is None, a different splitting algorithm
* is applied: runs of consecutive whitespace are regarded as a single separator, and the result
* will contain no empty strings at the start or end if the string has leading or trailing
* whitespace. Consequently, splitting an empty string or a string consisting of just whitespace
* with a None separator returns []. For example,
*
*
* bytearray(b' 1 2 3 ').split() returns
* [bytearray(b'1'), bytearray(b'2'), bytearray(b'3')], andbytearray(b' 1 2 3 ').split(None, 1) returns
* [bytearray(b'1'), bytearray(b'2 3 ')].PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
public PyList split(PyObject sep, int maxsplit) {
return basebytes_split(sep, maxsplit);
}
/**
* Ready-to-expose implementation of Python split(sep, maxsplit), that returns a
* list of the words in the byte array, using sep as the delimiter. Use the defines
* whitespace semantics if sep is null.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final PyList basebytes_split(PyObject sep, int maxsplit) {
if (sep == null || sep == Py.None) {
return basebytes_split_whitespace(maxsplit);
} else {
return basebytes_split_explicit(sep, maxsplit);
}
}
/**
* Implementation of Python split(sep, maxsplit), that returns a list of the words
* in the byte array, using sep (which is not null) as the delimiter.
* If maxsplit>=0, at most maxsplit splits are done (thus, the list
* will have at most maxsplit+1 elements). If maxsplit<0, then
* there is no limit on the number of splits (all possible splits are made).
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param sep bytes, or object viewable as bytes, defining the separator
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final synchronized PyList basebytes_split_explicit(PyObject sep, int maxsplit) {
// The separator may be presented as anything viewable as bytes
try (PyBuffer separator = getViewOrError(sep)) {
checkForEmptySeparator(separator);
PyList result = new PyList();
// Use the Finder class to search in the storage of this byte array
Finder finder = new Finder(separator);
finder.setText(this);
// Look for the first separator
int p = finder.currIndex(); // = offset
int q = finder.nextIndex(); // First separator (or <0 if not found)
// Note: bytearray().split(' ') == [bytearray(b'')]
// While we found a separator, and we have some splits left (if maxsplit started>=0)
while (q >= 0 && maxsplit-- != 0) {
// Note the Finder works in terms of indexes into this.storage
result.append(getslice(p - offset, q - offset));
p = finder.currIndex(); // Start of unsplit text
q = finder.nextIndex(); // Next separator (or <0 if not found)
}
// Append the remaining unsplit text
result.append(getslice(p - offset, size));
return result;
}
}
/**
* Implementation of Python split(None, maxsplit), that returns a list of the words
* in the byte array, using whitespace as the delimiter. If maxsplit is given, at
* most maxsplit splits are done (thus, the list will have at most maxsplit+1
* elements). If maxsplit is not specified, then there is no limit on the number of
* splits (all possible splits are made).
* None separator returns [].
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param maxsplit maximum number of splits
* @return PyList of byte arrays that result from the split
*/
final synchronized PyList basebytes_split_whitespace(int maxsplit) {
PyList result = new PyList();
int limit = offset + size;
int p, q; // Indexes of unsplit text and whitespace
// Scan over leading whitespace
for (p = offset; p < limit && isspace(storage[p] & 0xff); p++) {
; // continue
}
// Note: bytearray().split() = bytearray(b' ').split() = []
// At this point if psplitlines(), returning a list of the lines in the byte
* array, breaking at line boundaries. Line breaks are not included in the resulting segments.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @return List of segments
*/
public PyList splitlines() {
return basebytes_splitlines(false);
}
/**
* Implementation of Python splitlines(keepends), returning a list of the lines in
* the string, breaking at line boundaries. Line breaks are not included in the resulting list
* unless keepends is true.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param keepends if true, include the end of line bytes(s)
* @return PyList of segments
*/
public PyList splitlines(boolean keepends) {
return basebytes_splitlines(keepends);
}
/**
* Ready-to-expose implementation of Python splitlines(keepends), returning a list
* of the lines in the array, breaking at line boundaries. Line breaks are not included in the
* resulting list unless keepends is given and true.
* PyList returned by this method are instances of the same
* actual type as this.
*
* @param keepends if true, include the end of line bytes(s)
* @return List of segments
*/
protected final synchronized PyList basebytes_splitlines(boolean keepends) {
PyList list = new PyList();
int limit = offset + size;
for (int p = offset; p < limit; /* p advanced in loop */) {
int q, lenEOL = 0;
// Scan q to the end of the line (or buffer) including the EOL bytes
for (q = p; q < limit; q++) {
byte b = storage[q];
if (b == '\r') {
lenEOL = (storage[q + 1] == '\n') ? 2 : 1;
break;
} else if (b == '\n') {
lenEOL = 1; // Just one EOL byte \n
break;
}
}
// lenEOL =2: the line ended \r\n, and q points at \r;
// lenEOL =1: the line ended \n or \r (only), and q points at it;
// lenEOL =0: the line ended with the end of the data (and q=limit)
if (keepends) {
list.append(getslice(p - offset, q + lenEOL - offset));
} else {
list.append(getslice(p - offset, q - offset));
}
// Start next line after what terminated it
p = q + lenEOL;
}
return list;
}
//
// Padding, filling and centering
//
/**
* Helper to check the fill byte for {@link #rjust(String)}, {@link #ljust(String)} and
* {@link #center(String)}, which is required to be a single character string, treated as a
* byte.
*
* @param function name
* @param fillchar or null
* @return
*/
protected static byte fillByteCheck(String function, String fillchar) {
if (fillchar == null) {
return ' ';
} else if (fillchar.length() == 1) {
return (byte)fillchar.charAt(0);
} else {
throw Py.TypeError(function + "() argument 2 must be char, not str");
}
}
/**
* Helper function to construct the return value for {@link #rjust(String)},
* {@link #ljust(String)} and {@link #center(String)}. Clients calculate the left and right fill
* values according to their nature, and ignoring the possibility that the desired
* width=left+size+right may be less than this.size. This method does
* all the work, and deals with that exceptional case by returning self[:].
*
* @param pad byte to fill with
* @param left padding requested
* @param right padding requested
* @return (possibly new) byte array containing the result
*/
private BaseBytes padHelper(byte pad, int left, int right) {
if (left + right <= 0) {
// Deal here with the case wher width <= size, and no padding is necessary.
// If this is immutable getslice may return this same object
return getslice(0, size);
}
// Construct the result in a Builder of the desired width
Builder builder = getBuilder(left + size + right);
builder.repeat(pad, left);
builder.append(this);
builder.repeat(pad, right);
return builder.getResult();
}
/**
* A ready-to-expose implementation of Python center(width [, fillchar]): return
* the bytes centered in an array of length width. Padding is done using the
* specified fillchar (default is a space). A copy of the original byte array is returned if
* width is less than this.size(). (Immutable subclasses may return
* exactly the original object.)
*
* @param width desired
* @param fillchar one-byte String to fill with, or null implying space
* @return (possibly new) byte array containing the result
*/
final BaseBytes basebytes_center(int width, String fillchar) {
// Argument check and default
byte pad = fillByteCheck("center", fillchar);
// How many pads will I need?
int fill = width - size;
// CPython uses this formula, which makes a difference when width is odd and size even
int left = fill / 2 + (fill & width & 1);
return padHelper(pad, left, fill - left);
}
/**
* A ready-to-expose implementation of Python ljust(width [, fillchar]): return the
* bytes left-justified in an array of length width. Padding is done using the
* specified fillchar (default is a space). A copy of the original byte array is returned if
* width is less than this.size(). (Immutable subclasses may return
* exactly the original object.)
*
* @param width desired
* @param fillchar one-byte String to fill with, or null implying space
* @return (possibly new) byte array containing the result
*/
final BaseBytes basebytes_ljust(int width, String fillchar) {
// Argument check and default
byte pad = fillByteCheck("rjust", fillchar);
// How many pads will I need?
int fill = width - size;
return padHelper(pad, 0, fill);
}
/**
* A ready-to-expose implementation of Python rjust(width [, fillchar]): return the
* bytes right-justified in an array of length width. Padding is done using the
* specified fillchar (default is a space). A copy of the original byte array is returned if
* width is less than this.size(). (Immutable subclasses may return
* exactly the original object.)
*
* @param width desired
* @param fillchar one-byte String to fill with, or null implying space
* @return (possibly new) byte array containing the result
*/
final BaseBytes basebytes_rjust(int width, String fillchar) {
// Argument check and default
byte pad = fillByteCheck("rjust", fillchar);
// How many pads will I need?
int fill = width - size;
return padHelper(pad, fill, 0);
}
/**
* Ready-to-expose implementation of Python expandtabs([tabsize]): return a copy of
* the byte array where all tab characters are replaced by one or more spaces, depending on the
* current column and the given tab size. The column number is reset to zero after each newline
* occurring in the array. This treats other non-printing characters or escape sequences as
* regular characters.
* zfill(width): return the numeric string
* left filled with zeros in a byte array of length width. A sign prefix is handled correctly if
* it is in the first byte. A copy of the original is returned if width is less than the current
* size of the array.
*
* @param width desired
* @return left-filled byte array
*/
final BaseBytes basebytes_zfill(int width) {
// How many zeros will I need?
int fill = width - size;
Builder builder = getBuilder((fill > 0) ? width : size);
if (fill <= 0) {
// width <= size so result is just a copy of this array
builder.append(this);
} else {
// At least one zero must be added. Transfer the sign byte (if any) first.
int p = 0;
if (size > 0) {
byte sign = storage[offset];
if (sign == '-' || sign == '+') {
builder.append(sign);
p = 1;
}
}
// Now insert enough zeros
builder.repeat((byte)'0', fill);
// And finally the numeric part. Note possibility of no text eg. ''.zfill(6).
if (size > p) {
builder.append(this, p, size);
}
}
return builder.getResult();
}
//
// Character class operations
//
/**
* Java API equivalent of Python isalnum(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isLetterOrDigit(char)}.
*
* @return true if all bytes in the array are point codes for alphanumerics and there is at
* least one byte, false otherwise.
*/
public boolean isalnum() {
return basebytes_isalnum();
}
/**
* Ready-to-expose implementation of Python isalnum().
*
* @return true if all bytes in the array are point codes for alphanumerics and there is at
* least one byte, false otherwise.
*/
final boolean basebytes_isalnum() {
if (size <= 0) {
// Treat empty string as special case
return false;
} else {
// Test the bytes
for (int i = 0; i < size; i++) {
if (!Character.isLetterOrDigit(charAt(i))) {
return false;
}
}
return true;
}
}
/**
* Java API equivalent of Python isalpha(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isLetter(char)}.
*
* @return true if all bytes in the array are alphabetic and there is at least one byte, false
* otherwise
*/
public boolean isalpha() {
return basebytes_isalpha();
}
/**
* Ready-to-expose implementation of Python isalpha().
*
* @return true if all bytes in the array are alphabetic and there is at least one byte, false
* otherwise
*/
final boolean basebytes_isalpha() {
if (size <= 0) {
// Treat empty string as special case
return false;
} else {
// Test the bytes
for (int i = 0; i < size; i++) {
if (!Character.isLetter(charAt(i))) {
return false;
}
}
return true;
}
}
/**
* Java API equivalent of Python isdigit(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isDigit(char)}.
*
* @return true if all bytes in the array are point codes for digits and there is at least one
* byte, false otherwise.
*/
public boolean isdigit() {
return basebytes_isdigit();
}
/**
* Ready-to-expose implementation of Python isdigit().
*
* @return true if all bytes in the array are point codes for digits and there is at least one
* byte, false otherwise.
*/
final boolean basebytes_isdigit() {
if (size <= 0) {
// Treat empty string as special case
return false;
} else {
// Test the bytes
for (int i = 0; i < size; i++) {
if (!Character.isDigit(charAt(i))) {
return false;
}
}
return true;
}
}
/**
* Java API equivalent of Python islower(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isLowerCase(char)}.
*
* @return true if all cased bytes in the array are point codes for lowercase characters and
* there is at least one cased byte, false otherwise.
*/
public boolean islower() {
return basebytes_islower();
}
/**
* Ready-to-expose implementation of Python islower().
*
* @return true if all cased bytes in the array are point codes for lowercase characters and
* there is at least one cased byte, false otherwise.
*/
final boolean basebytes_islower() {
boolean hasCased = false;
// Test the bytes
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isUpperCase(c)) {
return false;
} else if (hasCased) {
continue; // Don't need to keep checking for cased characters
} else if (Character.isLowerCase(c)) {
hasCased = true;
}
}
// Found no upper case bytes, but did we find any cased bytes at all?
return hasCased;
}
/**
* Java API equivalent of Python isspace(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isWhitespace(char)}.
*
* @return true if all the bytes in the array are point codes for whitespace characters and
* there is at least one byte, false otherwise.
*/
public boolean isspace() {
return basebytes_isspace();
}
private boolean isspace(int c) {
switch(c) {
case 0x09:
case 0x0A:
case 0x0B:
case 0x0C:
case 0x0D:
case 0x20:
break;
default:
return false;
}
return true;
}
/**
* Ready-to-expose implementation of Python isspace().
*
* @return true if all the bytes in the array are point codes for whitespace characters and
* there is at least one byte, false otherwise.
*/
final boolean basebytes_isspace() {
if (size <= 0) {
// Treat empty string as special case
return false;
} else {
// Test the bytes
for (int i = 0; i < size; i++) {
if (!isspace(charAt(i))) {
return false;
}
}
return true;
}
}
/**
* Java API equivalent of Python istitle(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isUpperCase(char)} and
* {@link Character#isLowerCase(char)}.
*
* @return true if the string is a titlecased string and there is at least one cased byte, for
* example uppercase characters may only follow uncased bytes and lowercase characters
* only cased ones. Return false otherwise.
*/
public boolean istitle() {
return basebytes_istitle();
}
/**
* Ready-to-expose implementation of Python istitle().
*
* @return true if the string is a titlecased string and there is at least one cased byte, for
* example uppercase characters may only follow uncased bytes and lowercase characters
* only cased ones. Return false otherwise.
*/
final boolean basebytes_istitle() {
int state = 0;
// 0 = have seen no cased characters (can't be in a word)
// 1 = have seen cased character, but am not in a word
// 2 = in a word (hence have have seen cased character)
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isUpperCase(c)) {
if (state == 2) {
// Violation: can't continue a word in upper case
return false;
} else {
// Validly in a word
state = 2;
}
} else if (Character.isLowerCase(c)) {
if (state != 2) {
// Violation: can't start a word in lower case
return false;
}
} else {
if (state == 2) {
// Uncased character: end of the word as we know it
state = 1;
}
}
}
// Found no case violations, but did we find any cased bytes at all?
return state != 0;
}
/**
* Java API equivalent of Python isupper(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#isUpperCase(char)}.
*
* @return true if all cased bytes in the array are point codes for uppercase characters and
* there is at least one cased byte, false otherwise.
*/
public boolean isupper() {
return basebytes_isupper();
}
/**
* Ready-to-expose implementation of Python isupper().
*
* @return true if all cased bytes in the array are point codes for uppercase characters and
* there is at least one cased byte, false otherwise.
*/
final boolean basebytes_isupper() {
boolean hasCased = false;
// Test the bytes
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isLowerCase(c)) {
return false;
} else if (hasCased) {
continue; // Don't need to keep checking for cased characters
} else if (Character.isUpperCase(c)) {
hasCased = true;
}
}
// Found no lower case bytes, but did we find any cased bytes at all?
return hasCased;
}
//
// Case transformations
//
/**
* Java API equivalent of Python capitalize(). This method treats the bytes as
* Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and
* {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has
* the same actual type as this/self.
*
* @return a copy of the array with its first character capitalized and the rest lowercased.
*/
public BaseBytes capitalize() {
return basebytes_capitalize();
}
/**
* Ready-to-expose implementation of Python capitalize(). The
* BaseBytes returned by this method has the same actual type as
* this/self.
*
* @return a copy of the array with its first character capitalized and the rest lowercased.
*/
final BaseBytes basebytes_capitalize() {
Builder builder = getBuilder(size);
if (size > 0) {
// Treat first character
char c = charAt(0);
if (Character.isLowerCase(c)) {
c = Character.toUpperCase(c);
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
// Treat the rest
for (int i = 1; i < size; i++) {
c = charAt(i);
if (Character.isUpperCase(c)) {
c = Character.toLowerCase(c);
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
}
}
return builder.getResult();
}
/**
* Java API equivalent of Python lower(). This method treats the bytes as Unicode
* pont codes and is consistent with Java's {@link Character#toLowerCase(char)}. The
* BaseBytes returned by this method has the same actual type as
* this/self.
*
* @return a copy of the array with all the cased characters converted to lowercase.
*/
public BaseBytes lower() {
return basebytes_lower();
}
/**
* Ready-to-expose implementation of Python lower(). The BaseBytes
* returned by this method has the same actual type as this/self.
*
* @return a copy of the array with all the cased characters converted to lowercase.
*/
final BaseBytes basebytes_lower() {
Builder builder = getBuilder(size);
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isUpperCase(c)) {
c = Character.toLowerCase(c);
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
}
return builder.getResult();
}
/**
* Java API equivalent of Python swapcase(). This method treats the bytes as
* Unicode pont codes and is consistent with Java's {@link Character#toUpperCase(char)} and
* {@link Character#toLowerCase(char)}. The BaseBytes returned by this method has
* the same actual type as this/self.
*
* @return a copy of the array with uppercase characters converted to lowercase and vice versa.
*/
public BaseBytes swapcase() {
return basebytes_swapcase();
}
/**
* Ready-to-expose implementation of Python swapcase(). The BaseBytes
* returned by this method has the same actual type as this/self.
*
* @return a copy of the array with uppercase characters converted to lowercase and vice versa.
*/
final BaseBytes basebytes_swapcase() {
Builder builder = getBuilder(size);
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isUpperCase(c)) {
c = Character.toLowerCase(c);
} else if (Character.isLowerCase(c)) {
c = Character.toUpperCase(c);
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
}
return builder.getResult();
}
/**
* Java API equivalent of Python title(). The algorithm uses a simple
* language-independent definition of a word as groups of consecutive letters. The definition
* works in many contexts but it means that apostrophes in contractions and possessives form
* word boundaries, which may not be the desired result. The BaseBytes returned by
* this method has the same actual type as this/self.
*
* @return a titlecased version of the array where words start with an uppercase character and
* the remaining characters are lowercase.
*/
public BaseBytes title() {
return basebytes_title();
}
/**
* Ready-to-expose implementation of Python title(). The BaseBytes
* returned by this method has the same actual type as this/self.
*
* @return a titlecased version of the array where words start with an uppercase character and
* the remaining characters are lowercase.
*/
final BaseBytes basebytes_title() {
Builder builder = getBuilder(size);
boolean inWord = false; // We begin, not in a word (sequence of cased characters)
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (!inWord) {
// When we are not in a word ...
if (Character.isLowerCase(c)) {
c = Character.toUpperCase(c); // ... a lowercase letter must be upcased
inWord = true; // and it starts a word.
} else if (Character.isUpperCase(c)) {
inWord = true; // ... an uppercase letter just starts the word
}
} else {
// When we are in a word ...
if (Character.isUpperCase(c)) {
c = Character.toLowerCase(c); // ... an uppercase letter must be downcased
} else if (!Character.isLowerCase(c)) {
inWord = false; // ... and a non-letter ends the word
}
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
}
return builder.getResult();
}
/**
* Java API equivalent of Python upper(). Note that
* x.upper().isupper() might be false if the array contains uncased
* characters. The BaseBytes returned by this method has the same actual type as
* this/self.
*
* @return a copy of the array with all the cased characters converted to uppercase.
*/
public BaseBytes upper() {
return basebytes_upper();
}
/**
* Ready-to-expose implementation of Python upper(). The BaseBytes
* returned by this method has the same actual type as this/self.
*
* @return a copy of the array with all the cased characters converted to uppercase.
*/
final BaseBytes basebytes_upper() {
Builder builder = getBuilder(size);
for (int i = 0; i < size; i++) {
char c = charAt(i);
if (Character.isLowerCase(c)) {
c = Character.toUpperCase(c);
}
// Put the adjusted character in the output as a byte
builder.append((byte)c);
}
return builder.getResult();
}
/*
* ============================================================================================
* Java API for access as byte[]
* ============================================================================================
*
* Just the immutable case for now
*/
/**
* No range check access to byte[index].
*
* @param index
* @return the byte at the given index
*/
private final synchronized byte byteAt(int index) {
return storage[index + offset];
}
/**
* Return the Python byte (in range 0 to 255 inclusive) at the given index.
*
* @param index of value in byte array
* @return the integer value at the index
* @throws PyException (IndexError) if the index is outside the array bounds
*/
public synchronized int intAt(int index) throws PyException {
indexCheck(index);
return 0xff & byteAt(index);
}
/**
* Return the Python byte (in range 0 to 255 inclusive) at the given index, interpreted as an
* unsigned point code, without checking the index.
*
* @param index of value in byte array
* @return the char value at the index
* @throws IndexOutOfBoundsException if outside storage array
*/
private final char charAt(int index) throws IndexOutOfBoundsException {
return (char)(0xff & storage[index + offset]);
}
/**
* Helper to implement {@link #repeat(int)}. Use something like:
*
*
* @Override
* protected PyByteArray repeat(int count) {
* PyByteArray ret = new PyByteArray();
* ret.setStorage(repeatImpl(count));
* return ret;
* }
*
*
* @param count the number of times to repeat this.
* @return this byte array repeated count times.
*/
protected synchronized byte[] repeatImpl(int count) {
if (count <= 0) {
return emptyStorage;
} else {
// Allocate new storage, in a guarded way
long newSize = ((long)count) * size;
byte[] dst;
try {
dst = new byte[(int)newSize];
} catch (OutOfMemoryError e) {
throw Py.MemoryError(e.getMessage());
}
// Now fill with the repetitions needed
for (int i = 0, p = 0; i < count; i++, p += size) {
System.arraycopy(storage, offset, dst, p, size);
}
return dst;
}
}
//
// str() and repr() have different behaviour (despite PEP 3137)
//
/**
* Helper for __repr__()
*
* @param buf destination for characters
* @param c curren (maybe unprintable) character
*/
private static final void appendHexEscape(StringBuilder buf, int c) {
buf.append("\\x").append(Character.forDigit((c & 0xf0) >> 4, 16))
.append(Character.forDigit(c & 0xf, 16));
}
/**
* Almost ready-to-expose Python __repr__(), based on treating the bytes as point
* codes. The value added by this method is conversion of non-printing point codes to
* hexadecimal escapes in printable ASCII, and bracketed by the given before and after strings.
* These are used to get the required presentation:
*
*
* bytearray(b'Hello world!')
*
*
* with the possibility that subclasses might choose something different.
*
* @param before String to insert before the quoted text
* @param after String to insert after the quoted text
* @return string representation: before + "'" + String(this) + "'" + after
*/
final synchronized String basebytes_repr(String before, String after) {
// Safety
if (before == null) {
before = "";
}
if (after == null) {
after = "";
}
// Guess how long the result might be
int guess = size + (size >> 2) + before.length() + after.length() + 10;
StringBuilder buf = new StringBuilder(guess);
buf.append(before).append('\'');
// Scan and translate the bytes of the array
int jmax = offset + size;
for (int j = offset; j < jmax; j++) {
int c = 0xff & storage[j];
if (c >= 0x7f) { // Unprintable high 128 and DEL
appendHexEscape(buf, c);
} else if (c >= ' ') { // Printable
if (c == '\\' || c == '\'') { // Special cases
buf.append('\\');
}
buf.append((char)c);
} else if (c == '\t') { // Special cases in the low 32
buf.append("\\t");
} else if (c == '\n') {
buf.append("\\n");
} else if (c == '\r') {
buf.append("\\r");
} else {
appendHexEscape(buf, c);
}
}
buf.append('\'').append(after);
return buf.toString();
}
/*
* ============================================================================================
* API for java.util.Listbytearray (or bytes) object.
*
* @see java.util.List#size()
* @return Number of bytes in byte array.
* */
@Override
public int size() {
return size;
}
/*
* @see java.util.List#isEmpty()
*/
@Override
public boolean isEmpty() {
return size == 0;
}
/**
* Returns true if this list contains the specified value. More formally, returns true if and
* only if this list contains at least one integer e such that o.equals(PyInteger(e)).
*/
@Override
public boolean contains(Object o) {
return listDelegate.contains(o);
}
/*
* @see java.util.List#iterator()
*/
@Override
public Iteratorother. In
* the case where other is a PyObject, the comparison used is the
* standard Python == operation through PyObject. When
* other is not a PyObject, this object acts as a
* List<PyInteger>.
*
* @see java.util.List#equals(java.lang.Object)
*
* @param other object to compare this byte array to
* @return true if and only if this byte array is equal (in value) to
* other
*/
@Override
public boolean equals(Object other) {
if (other == null) {
return false;
} else if (other instanceof PyObject) {
return super.equals(other);
} else {
return listDelegate.equals(other);
}
}
/*
* @see java.util.List#hashCode()
*/
@Override
public int hashCode() {
return listDelegate.hashCode();
}
/*
* @see java.util.List#get(int)
*/
@Override
public PyInteger get(int index) {
return listDelegate.get(index);
}
/*
* @see java.util.List#set(int, java.lang.Object)
*/
@Override
public PyInteger set(int index, PyInteger element) {
return listDelegate.set(index, element);
}
/*
* @see java.util.List#add(int, java.lang.Object)
*/
@Override
public void add(int index, PyInteger element) {
listDelegate.add(index, element);
}
/*
* @see java.util.List#remove(int)
*/
@Override
public PyInteger remove(int index) {
return listDelegate.remove(index);
}
/*
* @see java.util.List#indexOf(java.lang.Object)
*/
@Override
public int indexOf(Object o) {
return listDelegate.indexOf(o);
}
/*
* @see java.util.List#lastIndexOf(java.lang.Object)
*/
@Override
public int lastIndexOf(Object o) {
return listDelegate.lastIndexOf(o);
}
/*
* @see java.util.List#listIterator()
*/
@Override
public ListIteratorBuilder holds a buffer of bytes to which new bytes may be appended while
* constructing the value of byte array, even when the type ultimately constructed is immutable.
* The value it builds may be transferred (normally without copying) to a new instance of the
* type being built.
* Builder is an abstract class. The each sub-class of BaseBytes may
* define its own concrete implementation in which {@link Builder#getResult()} returns an object
* of its own type, taking its value from the Builder contents using
* {@link #getStorage()} and {@link #getSize()}. Methods in BaseBytes obtain a
* Builder by calling the abstract method {@link BaseBytes#getBuilder(int)}, which
* the sub-class also defines, to return an isnstance of its characteristic Builder
* sub-class. The subclass that uses a method from BaseBytes returning a
* BaseBytes has to cast a returned from a BaseBytes method to its proper type.
* which it can do without error, since it was responsible for its actual type.
* BaseBytes parameterised class.
*
*/
protected static abstract class Builder /* */{
/**
* Return an object of type B extends BaseBytes whose content is what we built.
*/
abstract BaseBytes getResult();
// Internal state
private byte[] storage = emptyStorage;
private int size = 0;
/**
* Construct a builder with specified initial capacity.
*
* @param capacity
*/
Builder(int capacity) {
makeRoomFor(capacity);
}
/**
* Get an array of bytes containing the accumulated value, and clear the existing contents
* of the Builder. {@link #getCount()} returns the number of valid bytes in this array,
* which may be longer than the valid data.
* Builder. We'll be more generous than
* CPython for small array sizes to avoid needless reallocation.
*
* @param size of storage actually needed
* @return n >= size a recommended storage array size
*/
protected static final int roundUp(int size) {
/*
* The CPython formula is: size + (size >> 3) + (size < 9 ? 3 : 6). But when the array
* grows, CPython can use a realloc(), which will often be able to extend the allocation
* into memory already secretly allocated by the initial malloc(). Extension in Java means
* that we have to allocate a new array of bytes and copy to it.
*/
final int ALLOC = 16; // Must be power of two!
final int SIZE2 = 10; // Smallest size leading to a return value of 2*ALLOC
if (size >= SIZE2) { // Result > ALLOC, so work it out
// Same recommendation as CPython, but rounded up to multiple of ALLOC
return (size + (size >> 3) + (6 + ALLOC - 1)) & ~(ALLOC - 1);
} else if (size > 0) { // Easy: save arithmetic
return ALLOC;
} else { // Very easy
return 0;
}
}
/**
* Every sub-class of BaseBytes overrides this method to return a Builder<B>
* where B is (normally) that class's particular type, and it extends
* Builder<B> so that {@link Builder#getResult()} produces an instance of
* B from the contents.
*
* @param capacity of the Builder<B> returned
* @return a Builder<B> for the correct sub-class
*/
protected abstract Builder/* extends BaseBytes> */getBuilder(int capacity);
}