File: //usr/include/hphp/runtime/base/string-data.h
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#ifndef incl_HPHP_STRING_DATA_H_
#define incl_HPHP_STRING_DATA_H_
#include <folly/Range.h>
#include "hphp/util/alloc.h"
#include "hphp/util/bstring.h"
#include "hphp/util/hash.h"
#include "hphp/util/word-mem.h"
#include "hphp/runtime/base/cap-code.h"
#include "hphp/runtime/base/countable.h"
#include "hphp/runtime/base/datatype.h"
#include "hphp/runtime/base/exceptions.h"
#include "hphp/runtime/base/memory-manager.h"
#include "hphp/runtime/base/string-data-macros.h"
namespace HPHP {
//////////////////////////////////////////////////////////////////////
struct APCString;
struct Array;
struct String;
//////////////////////////////////////////////////////////////////////
// Copy the passed-in string and free the buffer immediately.
enum AttachStringMode { AttachString };
// const char* points to client-owned memory, StringData will copy it
// at construct-time using req::malloc. This is only ok when the StringData
// itself was request-allocated.
enum CopyStringMode { CopyString };
/*
* Runtime representation of PHP strings.
*
* StringData's have two different modes, not all of which we want to
* keep forever. The main mode is Flat, which means StringData is a
* header in a contiguous allocation with the character array for the
* string. The other (Proxy) is for APCString-backed StringDatas.
*
* StringDatas can also be allocated in multiple ways. Normally, they
* are created through one of the Make overloads, which drops them in
* the request-local heap. They can also be low-malloced (for static
* strings), or malloc'd (MakeMalloc) for APC shared or uncounted strings.
*
* Here's a breakdown of string modes, and which configurations are
* allowed in which allocation mode:
*
* | Static | Malloced | Normal (request local)
* +--------+----------+-----------------------
* Flat | X | X | X
* Proxy | | | X
*/
struct StringData final : MaybeCountable,
type_scan::MarkCountable<StringData> {
friend struct APCString;
friend StringData* allocFlatSmallImpl(size_t len);
friend StringData* allocFlatSlowImpl(size_t len);
/*
* Max length of a string, not counting the terminal 0.
*
* This is smaller than MAX_INT, and we want a CapCode to precisely encode it.
*/
static constexpr uint32_t MaxSize = 0x7ff00000; // 11 bits of 1's
/*
* Creates an empty request-local string with an unspecified amount of
* reserved space. Ref-count is pre-initialized to 1.
*/
static StringData* Make();
/*
* Constructors that copy the string memory into this StringData, for
* request-local strings. Ref-count is pre-initialized to 1.
*
* Most strings are created this way.
*/
static StringData* Make(folly::StringPiece);
static StringData* Make(const char* data, CopyStringMode);
static StringData* Make(const char* data, size_t len, CopyStringMode);
static StringData* Make(const StringData* s, CopyStringMode);
static StringData* Make(folly::StringPiece r1, CopyStringMode);
/*
* Attach constructors for request-local strings.
*
* These do the same thing as the above CopyStringMode constructors, except
* that it will also free `data'. Ref-count is pre-initialized to 1.
*/
static StringData* Make(char* data, AttachStringMode);
static StringData* Make(char* data, size_t len, AttachStringMode);
/*
* Create a new request-local string by concatenating two existing
* strings. Ref-count is pre-initialized to 1.
*/
static StringData* Make(const StringData* s1, const StringData* s2);
static StringData* Make(const StringData* s1, folly::StringPiece s2);
static StringData* Make(const StringData* s1, const char* lit2);
static StringData* Make(folly::StringPiece s1, const char* lit2);
static StringData* Make(folly::StringPiece s1, folly::StringPiece s2);
static StringData* Make(folly::StringPiece s1, folly::StringPiece s2,
folly::StringPiece s3);
static StringData* Make(folly::StringPiece s1, folly::StringPiece s2,
folly::StringPiece s3, folly::StringPiece s4);
/*
* Create a new request-local empty string big enough to hold strings of
* length `reserve' (not counting the \0 terminator). Ref-count is
* pre-initialized to 1.
*/
static StringData* Make(size_t reserve);
/*
* Create a request-local "Proxy" StringData that wraps an APCString.
* Ref-count is pre-initialized to 1.
*/
static StringData* MakeProxy(const APCString* apcstr);
/*
* Allocate a string with malloc, using the low-memory allocator if
* jemalloc is available, and setting it as a static string.
*
* This api is only for the static-string-table.cpp. The returned
* StringData is not expected to be reference counted, and must be
* deallocated using destructStatic.
*/
static StringData* MakeStatic(folly::StringPiece);
/*
* Same as MakeStatic but the string allocated will *not* be in the static
* string table, will not be in low-memory, and should be deleted using
* destructUncounted once the root goes out of scope.
*/
static StringData* MakeUncounted(folly::StringPiece);
/*
* Same as MakeStatic but initializes the empty string in aligned storage.
* This should be called by the static string table initialization code.
*/
static StringData* MakeEmpty();
/*
* Offset accessors for the JIT compiler.
*/
#ifndef NO_M_DATA
static constexpr ptrdiff_t dataOff() { return offsetof(StringData, m_data); }
#endif
static constexpr ptrdiff_t sizeOff() { return offsetof(StringData, m_len); }
static constexpr ptrdiff_t hashOff() { return offsetof(StringData, m_hash); }
/*
* Proxy StringData's have a sweep list running through them for
* decrefing the APCString they are fronting. This function
* must be called at request cleanup time to handle this.
*/
static unsigned sweepAll();
/*
* Called to return a StringData to the request allocator. This is
* normally called when the reference count goes to zero (e.g. with
* a helper like decRefStr).
*/
void release() noexcept;
size_t heapSize() const;
/*
* StringData objects allocated with MakeStatic should be freed
* using this function.
*/
void destructStatic();
/*
* StringData objects allocated with MakeUncounted should be freed
* using this function.
*/
void destructUncounted();
/*
* Reference-counting related.
*/
ALWAYS_INLINE void decRefAndRelease() {
assert(kindIsValid());
if (decReleaseCheck()) release();
}
bool kindIsValid() const { return m_kind == HeaderKind::String; }
/*
* Append the supplied range to this string. If there is not sufficient
* capacity in this string to contain the range, a new string may be
* returned. The new string's reference count will be pre-initialized to 1.
*
* Pre: !hasMultipleRefs()
* Pre: the string is request-local
*/
StringData* append(folly::StringPiece r);
StringData* append(folly::StringPiece r1, folly::StringPiece r2);
StringData* append(folly::StringPiece r1,
folly::StringPiece r2,
folly::StringPiece r3);
/*
* Reserve space for a string of length `maxLen' (not counting null
* terminator).
*
* May not be called for strings created with MakeUncounted or
* MakeStatic.
*
* Returns: possibly a new StringData, if we had to reallocate. The new
* string's reference count will be pre-initialized to 1.
*/
StringData* reserve(size_t maxLen);
/*
* Shrink a string down to length `len` (not counting null terminator).
*
* May not be called for strings created with MakeUncounted or
* MakeStatic.
*
* Returns: possibly a new StringData, if we decided to reallocate. The new
* string's reference count is be pre-initialized to 1. shrinkImpl
* always returns a new StringData.
*/
StringData* shrink(size_t len);
StringData* shrinkImpl(size_t len);
/*
* Returns a slice with extents sized to the *string* that this
* StringData wraps. This range does not include a null terminator.
*
* Note: please do not add new code that assumes the range does
* include a null-terminator if possible. (We would like to make
* this unnecessary eventually.)
*/
folly::StringPiece slice() const;
/*
* Returns a mutable slice with extents sized to the *buffer* this
* StringData wraps, not the string, minus space for an implicit
* null terminator.
*
* Note: please do not introduce new uses of this API that write
* nulls 1 byte past slice.len---we want to weed those out.
*/
folly::MutableStringPiece bufferSlice();
/*
* If external users of this object want to modify it (e.g. through
* bufferSlice or mutableData()), they are responsible for either
* calling setSize() if the mutation changed the size of the string,
* or invalidateHash() if not.
*
* Pre: !hasMultipleRefs()
*/
void invalidateHash();
void setSize(int len);
/*
* StringData should not generally be allocated on the stack,
* because references to it could escape. This function is for
* debugging: it asserts that the addres of this doesn't point into
* the C++ stack.
*/
void checkStack() const;
/*
* Access to the string's data as a character array.
*
* Please try to prefer slice() in new code, instead of assuming
* this is null terminated.
*/
const char* data() const;
/*
* Mutable version of data().
*/
char* mutableData() const;
/*
* Accessor for the length of a string.
*
* Note: size() returns a signed int for historical reasons. It is
* guaranteed to be in the range (0 <= size() <= MaxSize)
*/
int size() const;
/*
* Returns: size() == 0
*/
bool empty() const;
/*
* Return the capacity of this string's buffer, not including the space
* for the null terminator.
*/
uint32_t capacity() const;
/*
* Simultaneously query whether this string is numeric, and pull out
* the numeric value of the string (as either an int or a double).
*
* The allow_errors flag is a boolean that does something currently
* undocumented.
*
* If overflow is set its value is initialized to either zero to
* indicate that no overflow occurred or 1/-1 to inidicate the direction
* of overflow.
*
* Returns: KindOfNull, KindOfInt64 or KindOfDouble. The int64_t or
* double out reference params are populated in the latter two cases
* with the numeric value of the string. The KindOfNull case
* indicates the string is not numeric.
*/
DataType isNumericWithVal(int64_t&, double&, int allowErrors,
int* overflow = nullptr) const;
/*
* Returns true if this string is numeric.
*
* In effect: isNumericWithVal(i, d, false) != KindOfNull
*/
bool isNumeric() const;
/*
* Returns whether this string is numeric and an integer.
*
* In effect: isNumericWithVal(i, d, false) == KindOfInt64
*/
bool isInteger() const;
/*
* Returns true if this string is "strictly" an integer in the sense
* of is_strictly_integer from util/hash.h, and if so provides the
* integer value in res.
*/
bool isStrictlyInteger(int64_t& res) const;
/*
* Returns whether this string contains a single character '0'.
*/
bool isZero() const;
/*
* Change the character at offset `offset' to `c'.
*
* May return a reallocated StringData* if this string was a shared
* string. The new string's reference count is pre-initialized to 1.
*
* Pre: offset >= 0 && offset < size()
* !hasMultipleRefs()
* string must be request local
*/
StringData* modifyChar(int offset, char c);
/*
* Return a string containing the character at `offset', if it is in
* range. Otherwise raises a warning and returns an empty string.
*
* All return values are guaranteed to be static strings.
*/
StringData* getChar(int offset) const;
/*
* Increment this string in the manner of php's ++ operator. May return a new
* string if it had to resize. The new string's reference count is
* pre-initialized to 1.
*
* Pre: !isStatic() && !isEmpty()
* string must be request local
*/
StringData* increment();
/*
* Type conversion functions.
*/
bool toBoolean() const;
char toByte(int base = 10) const { return toInt64(base); }
short toInt16(int base = 10) const { return toInt64(base); }
int toInt32(int base = 10) const { return toInt64(base); }
int64_t toInt64(int base = 10) const;
double toDouble() const;
DataType toNumeric(int64_t& lval, double& dval) const;
std::string toCppString() const;
/*
* Returns: case insensitive hash value for this string.
*/
strhash_t hash() const;
NEVER_INLINE strhash_t hashHelper() const;
/*
* Equality comparison, in the sense of php's string == operator.
* (I.e. numeric strings are compared numerically.)
*/
bool equal(const StringData* s) const;
/*
* Exact comparison, in the sense of php's string === operator.
* (Exact, case-sensitive comparison.)
*/
bool same(const StringData* s) const;
/*
* Case-insensitive exact string comparison. (Numeric strings are
* not treated specially.)
*/
bool isame(const StringData* s) const;
/*
* Implements comparison in the sense of php's operator < on
* strings. (I.e. this compares numeric strings numerically, and
* other strings lexicographically.)
*
* Returns: a number less than zero if *this is less than *v2,
* greater than zero if *this is greater than *v2, or zero if
* this->equal(v2).
*/
int compare(const StringData* v2) const;
/*
* Debug dumping of a StringData to stdout.
*/
void dump() const;
static StringData* node2str(StringDataNode* node) {
return reinterpret_cast<StringData*>(
uintptr_t(node) - offsetof(Proxy, node)
- sizeof(StringData)
);
}
#ifdef NO_M_DATA
static constexpr bool isProxy() { return false; }
#else
bool isProxy() const;
#endif
bool isImmutable() const;
bool checkSane() const;
private:
struct Proxy {
StringDataNode node;
const APCString* apcstr;
};
private:
template<bool trueStatic>
static StringData* MakeShared(folly::StringPiece sl);
static StringData* MakeProxySlowPath(const APCString*);
StringData(const StringData&) = delete;
StringData& operator=(const StringData&) = delete;
~StringData() = delete;
private:
const void* payload() const;
void* payload();
const Proxy* proxy() const;
Proxy* proxy();
#ifdef NO_M_DATA
static constexpr bool isFlat() { return true; }
#else
bool isFlat() const;
#endif
void releaseDataSlowPath();
int numericCompare(const StringData *v2) const;
StringData* escalate(size_t cap);
void enlist();
void delist();
void incrementHelper();
void preCompute();
// We have the next fields blocked into qword-size unions so
// StringData initialization can do fewer stores to initialize the
// fields. (gcc does not combine the stores itself.)
private:
#ifndef NO_M_DATA
// TODO(5601154): Add KindOfApcString and remove StringData m_data field.
char* m_data;
#endif
union {
struct {
uint32_t m_len;
mutable int32_t m_hash; // precomputed for persistent strings
};
uint64_t m_lenAndHash;
};
};
//////////////////////////////////////////////////////////////////////
/*
* A reasonable length to reserve for small strings. This is the
* default reserve size for StringData::Make(), also.
*/
constexpr uint32_t SmallStringReserve = 64 - sizeof(StringData) - 1;
/*
* DecRef a string s, calling release if its reference count goes to
* zero.
*/
void decRefStr(StringData* s);
//////////////////////////////////////////////////////////////////////
/*
* Function objects the forward to the StringData member functions of
* the same name.
*/
struct string_data_hash;
struct string_data_same;
struct string_data_isame;
struct string_data_lt;
struct string_data_lti;
//////////////////////////////////////////////////////////////////////
extern std::aligned_storage<
sizeof(StringData) + 1,
alignof(StringData)
>::type s_theEmptyString;
/*
* Return the "static empty string". This is a singleton StaticString
* that can be used to return a StaticString for the empty string in
* as lightweight a manner as possible.
*/
ALWAYS_INLINE StringData* staticEmptyString() {
void* vp = &s_theEmptyString;
return static_cast<StringData*>(vp);
}
//////////////////////////////////////////////////////////////////////
}
namespace folly {
template<> class FormatValue<const HPHP::StringData*> {
public:
explicit FormatValue(const HPHP::StringData* str) : m_val(str) {}
template<typename Callback>
void format(FormatArg& arg, Callback& cb) const {
auto piece = folly::StringPiece(m_val->data(), m_val->size());
format_value::formatString(piece, arg, cb);
}
private:
const HPHP::StringData* m_val;
};
template<> class FormatValue<HPHP::StringData*> {
public:
explicit FormatValue(const HPHP::StringData* str) : m_val(str) {}
template<typename Callback>
void format(FormatArg& arg, Callback& cb) const {
FormatValue<const HPHP::StringData*>(m_val).format(arg, cb);
}
private:
const HPHP::StringData* m_val;
};
}
#include "hphp/runtime/base/string-data-inl.h"
#endif