Skip to content

Commit

Permalink
QUrl/QDir: revert all recent changes to path normalization
Browse files Browse the repository at this point in the history
This amends commit c023016 to revert
all of the following commits:
214fbc6
492c646
804b099
e457668
a1610c6

The reason for this is that they introduce a behavior change in QUrl,
causing the normalization/resolution of file:// directory paths not to
end in "/" and this behavior change revealed buggy code in QML that
concatenated strings expecting the slash to be there. Commit
c023016 attempted to do that too, but it left QUrl in a state
that introduced a regression from 6.7.

Discussion on whether we will accept this behavior change (and require
fixes to buggy code) or keep the old behavior to avoid breakages will
happen for 6.8.x.

This commit reintroduces bug QTBUG-120396 to 6.8.0.

Task-number: QTBUG-120396
Fixes: QTBUG-128940
Change-Id: Id78899df1cea4321d9e1fffdcc0f406bbfd534b8
Reviewed-by: Jani Heikkinen <[email protected]>
Reviewed-by: Ulf Hermann <[email protected]>
  • Loading branch information
thiagomacieira committed Sep 19, 2024
1 parent c023016 commit e8122a0
Show file tree
Hide file tree
Showing 5 changed files with 203 additions and 376 deletions.
279 changes: 137 additions & 142 deletions src/corelib/io/qdir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2199,173 +2199,168 @@ bool QDir::match(const QString &filter, const QString &fileName)

/*!
\internal
Updates \a path with redundant directory separators removed, and "."s and
".."s resolved (as far as possible). It returns \c false if there were ".."
segments left over, attempt to go up past the root (only applies to
absolute paths), or \c true otherwise.
Returns \a path with redundant directory separators removed,
and "."s and ".."s resolved (as far as possible).
This method is shared with QUrl, so it doesn't deal with QDir::separator(),
nor does it remove the trailing slash, if any.
When dealing with URLs, we are following section 5.2.4 (Remove dot
segments) from http://www.ietf.org/rfc/rfc3986.txt. URL mode differs from
from local path mode in these ways:
1) it can set *path to empty ("." becomes "")
2) directory path outputs end in / ("a/.." becomes "a/" instead of "a")
3) a sequence of "//" is treated as multiple path levels ("a/b//.." becomes
"a/b/" and "a/b//../.." becomes "a/"), which matches the behavior
observed in web browsers.
*/
bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags)
QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
{
const bool allowUncPaths = flags.testAnyFlag(QDirPrivate::AllowUncPaths);
const bool isRemote = flags.testAnyFlag(QDirPrivate::RemotePath);
const qsizetype prefixLength = rootLength(*path, allowUncPaths);

// RFC 3986 says: "The input buffer is initialized with the now-appended
// path components and the output buffer is initialized to the empty
// string."
const QChar *in = path->constBegin();

// Scan the input for a "." or ".." segment. If there isn't any, we may not
// need to modify this path at all. Also scan for "//" segments, which
// will be normalized if the path is local.
qsizetype i = prefixLength;
qsizetype n = path->size();
for (bool lastWasSlash = true; i < n; ++i) {
if (lastWasSlash && in[i] == u'.') {
if (i + 1 == n || in[i + 1] == u'/')
break;
if (in[i + 1] == u'.' && (i + 2 == n || in[i + 2] == u'/'))
break;
}
if (!isRemote && lastWasSlash && in[i] == u'/' && i > 0) {
// backtrack one, so the algorithm below gobbles up the remaining
// slashes
--i;
break;
}
lastWasSlash = in[i] == u'/';
const qsizetype len = name.size();

if (ok)
*ok = false;

if (len == 0)
return name;

qsizetype i = len - 1;
QVarLengthArray<char16_t> outVector(len);
qsizetype used = len;
char16_t *out = outVector.data();
const char16_t *p = reinterpret_cast<const char16_t *>(name.data());
const char16_t *prefix = p;
qsizetype up = 0;

const qsizetype prefixLength = rootLength(name, allowUncPaths);
p += prefixLength;
i -= prefixLength;

// replicate trailing slash (i > 0 checks for emptiness of input string p)
// except for remote paths because there can be /../ or /./ ending
if (i > 0 && p[i] == '/' && !isRemote) {
out[--used] = '/';
--i;
}
if (i == n)
return true;

QChar *out = path->data(); // detaches
const QChar *start = out + prefixLength;
const QChar *end = out + path->size();
out += i;
in = out;

// We implement a modified algorithm compared to RFC 3986, for efficiency.
bool ok = true;
do {
#if 0 // to see in the debugger
QString output = QStringView(path->constBegin(), out).toString();
QStringView input(in, end);
#endif
auto isDot = [](const char16_t *p, qsizetype i) {
return i > 1 && p[i - 1] == '.' && p[i - 2] == '/';
};
auto isDotDot = [](const char16_t *p, qsizetype i) {
return i > 2 && p[i - 1] == '.' && p[i - 2] == '.' && p[i - 3] == '/';
};

// First, copy the preceding slashes, so we can look at the segment's
// content. If the path is part of a URL, we copy all slashes, otherwise
// just one.
if (in[0] == u'/') {
*out++ = *in++;
while (in < end && in[0] == u'/') {
if (isRemote)
*out++ = *in++;
else
++in;

// Note: we may exit this loop with in == end, in which case we
// *shouldn't* dereference *in. But since we are pointing to a
// detached, non-empty QString, we know there's a u'\0' at the
// end, so dereferencing is safe.
while (i >= 0) {
// copy trailing slashes for remote urls
if (p[i] == '/') {
if (isRemote && !up) {
if (isDot(p, i)) {
i -= 2;
continue;
}
out[--used] = p[i];
}

--i;
continue;
}

// Is this path segment either "." or ".."?
enum { Nothing, Dot, DotDot } type = Nothing;
if (in[0] == u'.') {
if (in + 1 == end || in[1] == u'/')
type = Dot;
else if (in[1] == u'.' && (in + 2 == end || in[2] == u'/'))
type = DotDot;
// remove current directory
if (p[i] == '.' && (i == 0 || p[i-1] == '/')) {
--i;
continue;
}
if (type == Nothing) {
// If it is neither, then we copy this segment.
while (in < end && in[0] != u'/')
*out++ = *in++;

// detect up dir
if (i >= 1 && p[i] == '.' && p[i-1] == '.' && (i < 2 || p[i - 2] == '/')) {
++up;
i -= i >= 2 ? 3 : 2;

if (isRemote) {
// moving up should consider empty path segments too (/path//../ -> /path/)
while (i > 0 && up && p[i] == '/') {
--up;
--i;
}
}
continue;
}

// Otherwise, we skip it and remove preceding slashes (if
// any, exactly one if part of a URL, all otherwise) from the
// output. If it is "..", we remove the segment before that and
// preceding slashes too in a similar fashion, if they are there.
if (type == DotDot) {
if (Q_UNLIKELY(out == start)) {
// we can't go further up from here, so we "re-root"
// without cleaning this segment
ok = false;
if (!isRemote) {
*out++ = u'.';
*out++ = u'.';
if (in + 2 != end) {
Q_ASSERT(in[2] == u'/');
*out++ = u'/';
++in;
// prepend a slash before copying when not empty
if (!up && used != len && out[used] != '/')
out[--used] = '/';

// skip or copy
while (i >= 0) {
if (p[i] == '/') {
// copy all slashes as is for remote urls if they are not part of /./ or /../
if (isRemote && !up) {
while (i > 0 && p[i] == '/' && !isDotDot(p, i)) {

if (isDot(p, i)) {
i -= 2;
continue;
}

out[--used] = p[i];
--i;
}
start = out;
in += 2;
continue;

// in case of /./, jump over
if (isDot(p, i))
i -= 2;

break;
}

--i;
break;
}
while (out > start && *--out != u'/')
;
while (!isRemote && out > start && out[-1] == u'/')
--out;
while (out > start && out[-1] != u'/')
--out;
in += 2; // the two dots
} else {
++in; // the one dot
}

if (out > start) {
// backtrack one or all the slashes (so "/tmp///" -> "/tmp/")
if (out[-1] == u'/' && in != end)
--out;
while (!isRemote && out > start && out[-1] == u'/')
--out;
}
if (out == start) {
// We've reached the root. Make sure we don't turn a relative path
// to absolute or, in the case of local paths that are already
// absolute, into UNC.
// Note: this will turn ".//a" into "a" even for URLs!
if (in != end && in[0] == u'/')
++in;
while (prefixLength == 0 && in != end && in[0] == u'/')
++in;
// actual copy
if (!up)
out[--used] = p[i];
--i;
}
} while (in < end);

path->truncate(out - path->constBegin());
if (!isRemote && path->isEmpty())
*path = u"."_s;

// we return false only if the path was absolute
return ok || prefixLength == 0;
}
// decrement up after copying/skipping
if (up)
--up;
}

QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok)
{
// temporary compat
QString copy = name;
bool r = qt_normalizePathSegments(&copy, flags);
// Indicate failure when ".." are left over for an absolute path.
if (ok)
*ok = r;
return copy;
*ok = prefixLength == 0 || up == 0;

// add remaining '..'
while (up && !isRemote) {
if (used != len && out[used] != '/') // is not empty and there isn't already a '/'
out[--used] = '/';
out[--used] = '.';
out[--used] = '.';
--up;
}

bool isEmpty = used == len;

if (prefixLength) {
if (!isEmpty && out[used] == '/') {
// Even though there is a prefix the out string is a slash. This happens, if the input
// string only consists of a prefix followed by one or more slashes. Just skip the slash.
++used;
}
for (qsizetype i = prefixLength - 1; i >= 0; --i)
out[--used] = prefix[i];
} else {
if (isEmpty) {
// After resolving the input path, the resulting string is empty (e.g. "foo/.."). Return
// a dot in that case.
out[--used] = '.';
} else if (out[used] == '/') {
// After parsing the input string, out only contains a slash. That happens whenever all
// parts are resolved and there is a trailing slash ("./" or "foo/../" for example).
// Prepend a dot to have the correct return value.
out[--used] = '.';
}
}

// If path was not modified return the original value
if (used == 0)
return name;
return QStringView(out + used, len - used).toString();
}

static QString qt_cleanPath(const QString &path, bool *ok)
Expand Down
1 change: 0 additions & 1 deletion src/corelib/io/qdir_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ class QDirPrivate : public QSharedData
Q_DECLARE_OPERATORS_FOR_FLAGS(QDirPrivate::PathNormalizations)

Q_AUTOTEST_EXPORT QString qt_normalizePathSegments(const QString &name, QDirPrivate::PathNormalizations flags, bool *ok = nullptr);
bool qt_normalizePathSegments(QString *path, QDirPrivate::PathNormalizations flags);

QT_END_NAMESPACE

Expand Down
Loading

0 comments on commit e8122a0

Please sign in to comment.