/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
Constructs of the form {reference <rule1> using rule2} stand for a rule matching the given rule1 specified in the given reference, encoded to URI syntax using rule2 (as specified in this URI grammar).
sal_uInt32 const aMustEncodeMap[128]
= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */ PP, /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* " */ PM+PN +PP, /* # */ PM, /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* % */ PM, /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR, /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR, /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO, /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR, /* < */ +PI +PM+PN +PP, /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR, /* > */ +PI +PM+PN +PP, /* ? */ +PG +PM +PO +PQ, /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* [ */ PG +PM+PN+PO, /* \ */ +PM+PN +PP, /* ] */ PG +PM+PN+PO, /* ^ */ PM+PN +PP, /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ` */ PM+PN +PP, /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* { */ PM+PN +PP, /* | */ +PM+PN +PP, /* } */ PM+PN +PP, /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
0 };
OUString parseScheme(
sal_Unicode const ** begin, sal_Unicode const * end,
sal_uInt32 fragmentDelimiter)
{
sal_Unicode const * p = *begin; if (p != end && rtl::isAsciiAlpha(*p)) { do {
++p;
} while (p != end
&& (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
|| *p == '.')); // #i34835# To avoid problems with Windows file paths like "C:\foo", // do not accept generic schemes that are only one character long: if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
&& p - *begin >= 2)
{
OUString scheme(
OUString(*begin, p - *begin).toAsciiLowerCase());
*begin = p + 1; return scheme;
}
} return OUString();
}
const OUString & rTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ?
pPrefix->m_aTranslatedPrefix :
pPrefix->m_aPrefix;
m_aAbsURIRef.append(rTemp);
m_aScheme = SubString( 0, rTemp.indexOf(':') );
} else
{ if (bSmart)
{ // For scheme detection, the first (if any) of the following // productions that matches the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the scheme. The productions use the auxiliary rules
// 1st Production (known scheme; handled by the "if (pPrefix)" branch above): // <one of the known schemes, ignoring case> ":" *UCS4 // 2nd Production (mailto): // domain "@" domain // 3rd Production (ftp): // "FTP" 2*("." label) ["/" *UCS4] // 4th Production (http): // label 2*("." label) ["/" *UCS4] // 5th Production (file): // "//" (domain / IPv6reference) ["/" *UCS4] // 6th Production (Unix file): // "/" *UCS4 // 7th Production (UNC file; FSysStyle::Dos only): // "\\" domain ["\" *UCS4] // 8th Production (Unix-like DOS file; FSysStyle::Dos only): // ALPHA ":" ["/" *UCS4] // 9th Production (DOS file; FSysStyle::Dos only): // ALPHA ":" ["\" *UCS4] // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code // after this else branch): // <any scheme> ":" *UCS4
// For the 'non URL' file productions 6--9, the interpretation of // the input as a (degenerate) URI is turned off, i.e., escape // sequences and fragments are never detected as such, but are // taken as literal characters.
OUString aSynScheme; if (m_eScheme == INetProtocol::NotValid) {
sal_Unicode const * p1 = pPos;
aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter); if (!aSynScheme.isEmpty())
{ if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
{ // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)" // branch above); but a known scheme is defined in m_eSmartScheme. If this // scheme may have a port in authority component, then avoid misinterpreting // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with // 123/baz rootless path. For now, do not try to handle possible colons in // user information, require such ambiguous URLs to have explicit scheme part. // Also ignore possibility of empty port. const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme); if (rInfo.m_bAuthority && rInfo.m_bPort)
{ // Make sure that all characters from colon to [/?#] or to EOL are digits. // Or maybe make it simple, and just assume that "xyz:1..." is more likely // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
sal_Unicode const* p2 = p1 + 1; while (p2 != pEnd && rtl::isAsciiDigit(*p2))
++p2; if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
m_eScheme = m_eSmartScheme;
}
}
case INetProtocol::File: if (bSmart)
{ // The first of the following seven productions that // matches the rest of the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the used notation. The productions use the // auxiliary rules
// 9th Production (any): // *path ["#" *UCS4] // becomes // "file:///" *path ["#" *UCS4] // replacing the delimiter by "/" within <*path>. The // delimiter is that character from the set { "/", "\"} // which appears most often in <*path> (if FSysStyle::Unix // is not among the style bits, "/" is removed from the // set; if FSysStyle::Dos is not among the style bits, "\" is // removed from the set). If two or // more characters appear the same number of times, the // character mentioned first in that set is chosen. If // the first character of <*path> is the delimiter, that // character is not copied if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
{
m_aAbsURIRef.append("//"); switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
{ case FSysStyle::Unix:
nSegmentDelimiter = '/'; break;
case FSysStyle::Dos:
nSegmentDelimiter = '\\'; break;
default:
OSL_FAIL( "INetURLObject::setAbsURIRef():" " Bad guessFSysStyleByCounting"); break;
}
bSkippedInitialSlash
= pPos != pEnd && *pPos != nSegmentDelimiter; break;
}
}
[[fallthrough]]; default:
{ // For INetProtocol::File, allow an empty authority ("//") to be // missing if the following path starts with an explicit "/" // (Java is notorious in generating such file URLs, so be // liberal here): if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
pPos += 2; elseif (!bSmart
&& !(m_eScheme == INetProtocol::File
&& pPos != pEnd && *pPos == '/'))
{
setInvalid(); returnfalse;
}
m_aAbsURIRef.append("//");
if (!hasScheme && bSmart)
{ // If the input matches any of the following productions (for which // the appropriate style bit is set in eStyle), it is assumed to be an // absolute file system path, rather than a relative URI reference. // (This is only a subset of the productions used for scheme detection // in INetURLObject::setAbsURIRef(), because most of those productions // interfere with the syntax of relative URI references.) The // productions use the auxiliary rules
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.30 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.