/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
Constructs of the form {reference <rule1> using rule2} stand for a rule matching the given rule1 specified in the given reference, encoded to URI syntax using rule2 (as specified in this URI grammar).
sal_uInt32 const aMustEncodeMap[128]
= { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* */ PP, /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* " */ PM+PN +PP, /* # */ PM, /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* % */ PM, /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR, /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR, /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO, /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR, /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR, /* < */ +PI +PM+PN +PP, /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR, /* > */ +PI +PM+PN +PP, /* ? */ +PG +PM +PO +PQ, /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* [ */ PG +PM+PN+PO, /* \ */ +PM+PN +PP, /* ] */ PG +PM+PN+PO, /* ^ */ PM+PN +PP, /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* ` */ PM+PN +PP, /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR, /* { */ PM+PN +PP, /* | */ +PM+PN +PP, /* } */ PM+PN +PP, /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
0 };
OUString parseScheme(
sal_Unicode const ** begin, sal_Unicode const * end,
sal_uInt32 fragmentDelimiter)
{
sal_Unicode const * p = *begin; if (p != end && rtl::isAsciiAlpha(*p)) { do {
++p;
} while (p != end
&& (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
|| *p == '.')); // #i34835# To avoid problems with Windows file paths like "C:\foo", // do not accept generic schemes that are only one character long: if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
&& p - *begin >= 2)
{
OUString scheme(
OUString(*begin, p - *begin).toAsciiLowerCase());
*begin = p + 1; return scheme;
}
} return OUString();
}
const OUString & rTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ?
pPrefix->m_aTranslatedPrefix :
pPrefix->m_aPrefix;
m_aAbsURIRef.append(rTemp);
m_aScheme = SubString( 0, rTemp.indexOf(':') );
} else
{ if (bSmart)
{ // For scheme detection, the first (if any) of the following // productions that matches the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the scheme. The productions use the auxiliary rules
// 1st Production (known scheme; handled by the "if (pPrefix)" branch above): // <one of the known schemes, ignoring case> ":" *UCS4 // 2nd Production (mailto): // domain "@" domain // 3rd Production (ftp): // "FTP" 2*("." label) ["/" *UCS4] // 4th Production (http): // label 2*("." label) ["/" *UCS4] // 5th Production (file): // "//" (domain / IPv6reference) ["/" *UCS4] // 6th Production (Unix file): // "/" *UCS4 // 7th Production (UNC file; FSysStyle::Dos only): // "\\" domain ["\" *UCS4] // 8th Production (Unix-like DOS file; FSysStyle::Dos only): // ALPHA ":" ["/" *UCS4] // 9th Production (DOS file; FSysStyle::Dos only): // ALPHA ":" ["\" *UCS4] // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code // after this else branch): // <any scheme> ":" *UCS4
// For the 'non URL' file productions 6--9, the interpretation of // the input as a (degenerate) URI is turned off, i.e., escape // sequences and fragments are never detected as such, but are // taken as literal characters.
OUString aSynScheme; if (m_eScheme == INetProtocol::NotValid) {
sal_Unicode const * p1 = pPos;
aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter); if (!aSynScheme.isEmpty())
{ if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
{ // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)" // branch above); but a known scheme is defined in m_eSmartScheme. If this // scheme may have a port in authority component, then avoid misinterpreting // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with // 123/baz rootless path. For now, do not try to handle possible colons in // user information, require such ambiguous URLs to have explicit scheme part. // Also ignore possibility of empty port. const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme); if (rInfo.m_bAuthority && rInfo.m_bPort)
{ // Make sure that all characters from colon to [/?#] or to EOL are digits. // Or maybe make it simple, and just assume that "xyz:1..." is more likely // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
sal_Unicode const* p2 = p1 + 1; while (p2 != pEnd && rtl::isAsciiDigit(*p2))
++p2; if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
m_eScheme = m_eSmartScheme;
}
}
case INetProtocol::File: if (bSmart)
{ // The first of the following seven productions that // matches the rest of the input string (and for which the // appropriate style bit is set in eStyle, if applicable) // determines the used notation. The productions use the // auxiliary rules
// 9th Production (any): // *path ["#" *UCS4] // becomes // "file:///" *path ["#" *UCS4] // replacing the delimiter by "/" within <*path>. The // delimiter is that character from the set { "/", "\"} // which appears most often in <*path> (if FSysStyle::Unix // is not among the style bits, "/" is removed from the // set; if FSysStyle::Dos is not among the style bits, "\" is // removed from the set). If two or // more characters appear the same number of times, the // character mentioned first in that set is chosen. If // the first character of <*path> is the delimiter, that // character is not copied if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
{
m_aAbsURIRef.append("//"); switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
{ case FSysStyle::Unix:
nSegmentDelimiter = '/'; break;
case FSysStyle::Dos:
nSegmentDelimiter = '\\'; break;
default:
OSL_FAIL( "INetURLObject::setAbsURIRef():" " Bad guessFSysStyleByCounting"); break;
}
bSkippedInitialSlash
= pPos != pEnd && *pPos != nSegmentDelimiter; break;
}
}
[[fallthrough]]; default:
{ // For INetProtocol::File, allow an empty authority ("//") to be // missing if the following path starts with an explicit "/" // (Java is notorious in generating such file URLs, so be // liberal here): if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
pPos += 2; elseif (!bSmart
&& !(m_eScheme == INetProtocol::File
&& pPos != pEnd && *pPos == '/'))
{
setInvalid(); returnfalse;
}
m_aAbsURIRef.append("//");
if (!hasScheme && bSmart)
{ // If the input matches any of the following productions (for which // the appropriate style bit is set in eStyle), it is assumed to be an // absolute file system path, rather than a relative URI reference. // (This is only a subset of the productions used for scheme detection // in INetURLObject::setAbsURIRef(), because most of those productions // interfere with the syntax of relative URI references.) The // productions use the auxiliary rules
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.