added 2 changesets to branch 'refs/remotes/HaikuPM-github/package-management' old head: 0361a1da73a964be9d432cab0e123a51b37a3b34 new head: 0e9ec703dd88ade7b1adc8d28a3bec4484bcd63c overview: https://github.com/haiku/HaikuPM/compare/0361a1d...0e9ec70 ---------------------------------------------------------------------------- 276c321: BPackageInfo::Parser::_ParseFlags(): fix indentation 0e9ec70: package info parser: improve string parsing * Instead of two string token types (TOKEN_WORD, TOKEN_QUOTED_STRING), there's now only one (TOKEN_STRING). Whether the string meets the criteria is checked where needed. In most cases the check was already done or not necessary anyway. * Strings can now consist of an arbitrary sequence of quoted and unquoted strings and escaping is also supported in unquoted string segments. * Among other things this fixes incorrect restrictions for resolvable names and should also make quoting paths superfluous (unless they contain separator characters). [ Ingo Weinhold <ingo_weinhold@xxxxxx> ] ---------------------------------------------------------------------------- 2 files changed, 137 insertions(+), 122 deletions(-) src/kits/package/PackageInfoParser.cpp | 214 +++++++++++++++++------------ src/kits/package/PackageInfoParser.h | 45 ++---- ############################################################################ Commit: 276c321bcdd0a579d3584198b1b41d95afc8275b Author: Ingo Weinhold <ingo_weinhold@xxxxxx> Date: Tue Jul 9 18:24:57 2013 UTC BPackageInfo::Parser::_ParseFlags(): fix indentation ---------------------------------------------------------------------------- diff --git a/src/kits/package/PackageInfoParser.cpp b/src/kits/package/PackageInfoParser.cpp index 02ffe0e..7f15e8f 100644 --- a/src/kits/package/PackageInfoParser.cpp +++ b/src/kits/package/PackageInfoParser.cpp @@ -443,19 +443,19 @@ BPackageInfo::Parser::_ParseFlags() virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD) - throw ParseError("expected word (a flag)", token.pos); - - if (token.text.ICompare("approve_license") == 0) - flags |= B_PACKAGE_FLAG_APPROVE_LICENSE; - else if (token.text.ICompare("system_package") == 0) - flags |= B_PACKAGE_FLAG_SYSTEM_PACKAGE; - else { + if (token.type != TOKEN_WORD) + throw ParseError("expected word (a flag)", token.pos); + + if (token.text.ICompare("approve_license") == 0) + flags |= B_PACKAGE_FLAG_APPROVE_LICENSE; + else if (token.text.ICompare("system_package") == 0) + flags |= B_PACKAGE_FLAG_SYSTEM_PACKAGE; + else { throw ParseError( "expected 'approve_license' or 'system_package'", - token.pos); + token.pos); + } } - } } flagParser; _ParseList(flagParser, true); ############################################################################ Commit: 0e9ec703dd88ade7b1adc8d28a3bec4484bcd63c Author: Ingo Weinhold <ingo_weinhold@xxxxxx> Date: Tue Jul 9 19:41:45 2013 UTC package info parser: improve string parsing * Instead of two string token types (TOKEN_WORD, TOKEN_QUOTED_STRING), there's now only one (TOKEN_STRING). Whether the string meets the criteria is checked where needed. In most cases the check was already done or not necessary anyway. * Strings can now consist of an arbitrary sequence of quoted and unquoted strings and escaping is also supported in unquoted string segments. * Among other things this fixes incorrect restrictions for resolvable names and should also make quoting paths superfluous (unless they contain separator characters). ---------------------------------------------------------------------------- diff --git a/src/kits/package/PackageInfoParser.cpp b/src/kits/package/PackageInfoParser.cpp index 7f15e8f..377e8e8 100644 --- a/src/kits/package/PackageInfoParser.cpp +++ b/src/kits/package/PackageInfoParser.cpp @@ -10,6 +10,7 @@ #include <stdint.h> #include <algorithm> +#include <string> namespace BPackageKit { @@ -84,7 +85,7 @@ BPackageInfo::Parser::ParseVersion(const BString& versionString, fPos = versionString.String(); try { - Token token(TOKEN_WORD, fPos, versionString.Length()); + Token token(TOKEN_STRING, fPos, versionString.Length()); _ParseVersionValue(token, &_version, revisionIsOptional); } catch (const ParseError& error) { if (fListener != NULL) { @@ -175,38 +176,76 @@ BPackageInfo::Parser::_NextToken() } return Token(TOKEN_OPERATOR_GREATER, tokenPos, 1); - case '"': - case '\'': - { - char quoteChar = *fPos; - fPos++; - const char* start = fPos; - // anything until the next quote is part of the value - bool lastWasEscape = false; - while ((*fPos != quoteChar || lastWasEscape) && *fPos != '\0') { - if (lastWasEscape) - lastWasEscape = false; - else if (*fPos == '\\') - lastWasEscape = true; - fPos++; - } - if (*fPos != quoteChar) - throw ParseError("unterminated quoted-string", tokenPos); - const char* end = fPos++; - return Token(TOKEN_QUOTED_STRING, start, end - start); - } - default: { - const char* start = fPos; - while (isalnum(*fPos) || *fPos == '.' || *fPos == '-' - || *fPos == '_' || *fPos == ':' || *fPos == '+' - || *fPos == '~') { - fPos++; + std::string string; + char quoteChar = '\0'; + + for (; *fPos != '\0'; fPos++) { + char c = *fPos; + if (quoteChar != '\0') { + // within a quoted string segment + if (c == quoteChar) { + quoteChar = '\0'; + continue; + } + + if (c == '\\') { + // next char is escaped + c = *++fPos; + if (c == '\0') { + throw ParseError("unterminated quoted-string", + tokenPos); + } + + if (c == 'n') + c = '\n'; + else if (c == 't') + c = '\t'; + } + + string += c; + } else { + // unquoted string segment + switch (c) { + case '"': + case '\'': + // quoted string start + quoteChar = c; + continue; + + case '{': + case '}': + case '<': + case '=': + case '!': + case '>': + // a separator character -- this ends the string + break; + + case '\\': + // next char is escaped + c = *++fPos; + if (c == '\0') { + throw ParseError("'\\' at end of string", + tokenPos); + } + string += c; + continue; + + default: + if (isspace(c)) + break; + string += c; + continue; + } + + break; + } } - if (fPos == start) - break; - return Token(TOKEN_WORD, start, fPos - start); + + return Token(TOKEN_STRING, tokenPos, fPos - tokenPos, + string.c_str()); } } @@ -226,8 +265,8 @@ void BPackageInfo::Parser::_ParseStringValue(BString* value, const char** _tokenPos) { Token string = _NextToken(); - if (string.type != TOKEN_QUOTED_STRING && string.type != TOKEN_WORD) - throw ParseError("expected quoted-string or word", string.pos); + if (string.type != TOKEN_STRING) + throw ParseError("expected string", string.pos); *value = string.text; if (_tokenPos != NULL) @@ -239,7 +278,7 @@ void BPackageInfo::Parser::_ParseArchitectureValue(BPackageArchitecture* value) { Token arch = _NextToken(); - if (arch.type == TOKEN_WORD) { + if (arch.type == TOKEN_STRING) { for (int i = 0; i < B_PACKAGE_ARCHITECTURE_ENUM_COUNT; ++i) { if (arch.text.ICompare(BPackageInfo::kArchitectureNames[i]) == 0) { *value = (BPackageArchitecture)i; @@ -272,8 +311,8 @@ BPackageInfo::Parser::_ParseVersionValue(BPackageVersion* value, BPackageInfo::Parser::_ParseVersionValue(Token& word, BPackageVersion* value, bool revisionIsOptional) { - if (word.type != TOKEN_WORD) - throw ParseError("expected word (a version)", word.pos); + if (word.type != TOKEN_STRING) + throw ParseError("expected string (a version)", word.pos); // get the revision number uint32 revision = 0; @@ -388,33 +427,33 @@ BPackageInfo::Parser::_ParseList(ListElementParser& elementParser, void BPackageInfo::Parser::_ParseStringList(BStringList* value, - bool allowQuotedStrings, bool convertToLowerCase) + bool requireResolvableName, bool convertToLowerCase) { struct StringParser : public ListElementParser { BStringList* value; - bool allowQuotedStrings; + bool requireResolvableName; bool convertToLowerCase; - StringParser(BStringList* value, bool allowQuotedStrings, + StringParser(BStringList* value, bool requireResolvableName, bool convertToLowerCase) : value(value), - allowQuotedStrings(allowQuotedStrings), + requireResolvableName(requireResolvableName), convertToLowerCase(convertToLowerCase) { } virtual void operator()(const Token& token) { - if (allowQuotedStrings) { - if (token.type != TOKEN_QUOTED_STRING - && token.type != TOKEN_WORD) { - throw ParseError("expected quoted-string or word", - token.pos); + if (token.type != TOKEN_STRING) + throw ParseError("expected string", token.pos); + + if (requireResolvableName) { + int32 errorPos; + if (!_IsValidResolvableName(token.text, &errorPos)) { + throw ParseError("invalid character in resolvable name", + token.pos + errorPos); } - } else { - if (token.type != TOKEN_WORD) - throw ParseError("expected word", token.pos); } BString element(token.text); @@ -423,7 +462,7 @@ BPackageInfo::Parser::_ParseStringList(BStringList* value, value->Add(element); } - } stringParser(value, allowQuotedStrings, convertToLowerCase); + } stringParser(value, requireResolvableName, convertToLowerCase); _ParseList(stringParser, true); } @@ -443,7 +482,7 @@ BPackageInfo::Parser::_ParseFlags() virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD) + if (token.type != TOKEN_STRING) throw ParseError("expected word (a flag)", token.pos); if (token.text.ICompare("approve_license") == 0) @@ -482,7 +521,7 @@ BPackageInfo::Parser::_ParseResolvableList( virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD) { + if (token.type != TOKEN_STRING) { throw ParseError("expected word (a resolvable name)", token.pos); } @@ -507,7 +546,7 @@ BPackageInfo::Parser::_ParseResolvableList( // parse compatible version BPackageVersion compatibleVersion; Token compatible = parser._NextToken(); - if (compatible.type == TOKEN_WORD + if (compatible.type == TOKEN_STRING && (compatible.text == "compat" || compatible.text == "compatible")) { op = parser._NextToken(); @@ -548,7 +587,7 @@ BPackageInfo::Parser::_ParseResolvableExprList( virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD) { + if (token.type != TOKEN_STRING) { throw ParseError("expected word (a resolvable name)", token.pos); } @@ -571,7 +610,7 @@ BPackageInfo::Parser::_ParseResolvableExprList( if (basePackage != NULL) { Token base = parser._NextToken(); - if (base.type == TOKEN_WORD && base.text == "base") { + if (base.type == TOKEN_STRING && base.text == "base") { if (!basePackage->IsEmpty()) { throw ParseError( "multiple packages marked as base package", @@ -621,8 +660,8 @@ BPackageInfo::Parser::_ParseGlobalWritableFileInfos( virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD && token.type != TOKEN_QUOTED_STRING) { - throw ParseError("expected string (a settings file path)", + if (token.type != TOKEN_STRING) { + throw ParseError("expected string (a file path)", token.pos); } @@ -631,12 +670,13 @@ BPackageInfo::Parser::_ParseGlobalWritableFileInfos( bool isDirectory = false; Token nextToken = parser._NextToken(); - if (nextToken.type == TOKEN_WORD && nextToken.text == "directory") { + if (nextToken.type == TOKEN_STRING + && nextToken.text == "directory") { isDirectory = true; nextToken = parser._NextToken(); } - if (nextToken.type == TOKEN_WORD) { + if (nextToken.type == TOKEN_STRING) { const char* const* end = kWritableFileUpdateTypes + B_WRITABLE_FILE_UPDATE_TYPE_ENUM_COUNT; const char* const* found = std::find(kWritableFileUpdateTypes, @@ -685,7 +725,7 @@ BPackageInfo::Parser::_ParseUserSettingsFileInfos( virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD && token.type != TOKEN_QUOTED_STRING) { + if (token.type != TOKEN_STRING) { throw ParseError("expected string (a settings file path)", token.pos); } @@ -694,13 +734,13 @@ BPackageInfo::Parser::_ParseUserSettingsFileInfos( bool isDirectory = false; Token nextToken = parser._NextToken(); - if (nextToken.type == TOKEN_WORD && nextToken.text == "directory") { + if (nextToken.type == TOKEN_STRING + && nextToken.text == "directory") { isDirectory = true; - } else if (nextToken.type == TOKEN_WORD + } else if (nextToken.type == TOKEN_STRING && nextToken.text == "template") { nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD - && nextToken.type != TOKEN_QUOTED_STRING) { + if (nextToken.type != TOKEN_STRING) { throw ParseError( "expected string (a settings template file path)", nextToken.pos); @@ -744,9 +784,9 @@ BPackageInfo::Parser::_ParseUsers(UserList* users) virtual void operator()(const Token& token) { - if (token.type != TOKEN_WORD) { - throw ParseError("expected a user name", - token.pos); + if (token.type != TOKEN_STRING + || !BUser::IsValidUserName(token.text)) { + throw ParseError("expected a user name", token.pos); } BString realName; @@ -756,31 +796,28 @@ BPackageInfo::Parser::_ParseUsers(UserList* users) for (;;) { Token nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD) { + if (nextToken.type != TOKEN_STRING) { parser._RewindTo(nextToken); break; } if (nextToken.text == "real-name") { nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD - && nextToken.type != TOKEN_QUOTED_STRING) { + if (nextToken.type != TOKEN_STRING) { throw ParseError("expected string (a user real name)", nextToken.pos); } realName = nextToken.text; } else if (nextToken.text == "home") { nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD - && nextToken.type != TOKEN_QUOTED_STRING) { + if (nextToken.type != TOKEN_STRING) { throw ParseError("expected string (a home path)", nextToken.pos); } home = nextToken.text; } else if (nextToken.text == "shell") { nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD - && nextToken.type != TOKEN_QUOTED_STRING) { + if (nextToken.type != TOKEN_STRING) { throw ParseError("expected string (a shell path)", nextToken.pos); } @@ -788,7 +825,8 @@ BPackageInfo::Parser::_ParseUsers(UserList* users) } else if (nextToken.text == "groups") { for (;;) { nextToken = parser._NextToken(); - if (nextToken.type == TOKEN_WORD) { + if (nextToken.type == TOKEN_STRING + && BUser::IsValidUserName(nextToken.text)) { if (!groups.Add(nextToken.text)) throw std::bad_alloc(); } else if (nextToken.type == TOKEN_ITEM_SEPARATOR @@ -811,10 +849,10 @@ BPackageInfo::Parser::_ParseUsers(UserList* users) BString templatePath; Token nextToken = parser._NextToken(); - if (nextToken.type == TOKEN_WORD && nextToken.text == "template") { + if (nextToken.type == TOKEN_STRING + && nextToken.text == "template") { nextToken = parser._NextToken(); - if (nextToken.type != TOKEN_WORD - && nextToken.type != TOKEN_QUOTED_STRING) { + if (nextToken.type != TOKEN_STRING) { throw ParseError( "expected string (a settings template file path)", nextToken.pos); @@ -853,8 +891,8 @@ BPackageInfo::Parser::_Parse(BPackageInfo* packageInfo) if (t.type == TOKEN_ITEM_SEPARATOR) continue; - if (t.type != TOKEN_WORD) - throw ParseError("expected word (a variable name)", t.pos); + if (t.type != TOKEN_STRING) + throw ParseError("expected string (a variable name)", t.pos); BPackageInfoAttributeID attribute = B_PACKAGE_INFO_ENUM_COUNT; for (int i = 0; i < B_PACKAGE_INFO_ENUM_COUNT; i++) { @@ -986,7 +1024,7 @@ BPackageInfo::Parser::_Parse(BPackageInfo* packageInfo) break; case B_PACKAGE_INFO_REPLACES: - _ParseStringList(&packageInfo->fReplacesList, false, true); + _ParseStringList(&packageInfo->fReplacesList, true); break; case B_PACKAGE_INFO_FLAGS: diff --git a/src/kits/package/PackageInfoParser.h b/src/kits/package/PackageInfoParser.h index 3c94b00..613ec68 100644 --- a/src/kits/package/PackageInfoParser.h +++ b/src/kits/package/PackageInfoParser.h @@ -34,8 +34,7 @@ private: friend struct ListElementParser; enum TokenType { - TOKEN_WORD, - TOKEN_QUOTED_STRING, + TOKEN_STRING, TOKEN_OPERATOR_ASSIGN, TOKEN_OPERATOR_LESS, TOKEN_OPERATOR_LESS_EQUAL, @@ -67,7 +66,7 @@ private: void _ParseList(ListElementParser& elementParser, bool allowSingleNonListElement); void _ParseStringList(BStringList* value, - bool allowQuotedStrings = true, + bool requireResolvableName = false, bool convertToLowerCase = false); void _ParseResolvableList( BObjectList<BPackageResolvable>* value); @@ -118,38 +117,16 @@ struct BPackageInfo::Parser::Token { BString text; const char* pos; - Token(TokenType _type, const char* _pos, int length = 0) - : type(_type), pos(_pos) + Token(TokenType _type, const char* _pos, int length = 0, + const char* text = NULL) + : + type(_type), + pos(_pos) { - if (length != 0) { - text.SetTo(pos, length); - - if (type == TOKEN_QUOTED_STRING) { - // unescape value of quoted string - char* value = text.LockBuffer(length); - if (value == NULL) - return; - int index = 0; - int newIndex = 0; - bool lastWasEscape = false; - while (char c = value[index++]) { - if (lastWasEscape) { - lastWasEscape = false; - // map \n to newline and \t to tab - if (c == 'n') - c = '\n'; - else if (c == 't') - c = '\t'; - } else if (c == '\\') { - lastWasEscape = true; - continue; - } - value[newIndex++] = c; - } - value[newIndex] = '\0'; - text.UnlockBuffer(newIndex); - } - } + if (text != NULL) + this->text = text; + else if (length != 0) + this->text.SetTo(pos, length); } operator bool() const