Quellcodebibliothek Statistik Leitseite products/Sources/formale Sprachen/C/LibreOffice/tools/source/fsys/   (Office von Apache Version 25.8.3.2©)  Datei vom 5.10.2025 mit Größe 178 kB image not shown  

Quelle  urlobj.cxx   Sprache: C

 
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */


#include <sal/config.h>

#include <tools/urlobj.hxx>
#include <tools/debug.hxx>
#include <tools/inetmime.hxx>
#include <tools/stream.hxx>
#include <com/sun/star/uno/Reference.hxx>
#include <com/sun/star/util/XStringWidth.hpp>
#include <o3tl/enumarray.hxx>
#include <osl/diagnose.h>
#include <osl/file.hxx>
#include <rtl/character.hxx>
#include <rtl/string.h>
#include <rtl/textenc.h>
#include <rtl/ustring.hxx>
#include <sal/log.hxx>
#include <sal/types.h>

#include <algorithm>
#include <cassert>
#include <limits>
#include <memory>
#include <string_view>

#include <string.h>

#include <com/sun/star/uno/Sequence.hxx>
#include <comphelper/base64.hxx>
#include <comphelper/string.hxx>

using namespace css;

//  INetURLObject

/* The URI grammar (using RFC 2234 conventions).

   Constructs of the form
       {reference <rule1> using rule2}
   stand for a rule matching the given rule1 specified in the given reference,
   encoded to URI syntax using rule2 (as specified in this URI grammar).


   ; RFC 1738, RFC 2396, RFC 2732, private
   login = [user [":" password] "@"] hostport
   user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
   password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
   hostport = host [":" port]
   host = incomplete-hostname / hostname / IPv4address / IPv6reference
   incomplete-hostname = *(domainlabel ".") domainlabel
   hostname = *(domainlabel ".") toplabel ["."]
   domainlabel = alphanum [*(alphanum / "-") alphanum]
   toplabel = ALPHA [*(alphanum / "-") alphanum]
   IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
   IPv6reference = "[" hexpart [":" IPv4address] "]"
   hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
   hexseq = hex4 *(":" hex4)
   hex4 = 1*4HEXDIG
   port = *DIGIT
   escaped = "%" HEXDIG HEXDIG
   reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
   mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
   alphanum = ALPHA / DIGIT
   unreserved = alphanum / mark
   uric = escaped / reserved / unreserved
   pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"


   ; RFC 1738, RFC 2396
   ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
   segment = *pchar


   ; RFC 1738, RFC 2396
   http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")


   ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&&gt;
   file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
   segment = *pchar
   netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}


   ; RFC 2368, RFC 2396
   mailto-url = "MAILTO:" [to] [headers]
   to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
   headers = "?" header *("&" header)
   header = hname "=" hvalue
   hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
   hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}


   ; private (see RFC 1738, RFC 2396)
   vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")


   ; private
   private-url = "PRIVATE:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
   name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
   segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
   segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   slot-url = "SLOT:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   macro-url = "MACRO:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   javascript-url = "JAVASCRIPT:" *uric


   ; RFC 2397
   data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
   mediatype = [type "/" subtype] *(";" attribute "=" value)
   type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
   value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}


   ; RFC 2392, RFC 2396
   cid-url = "CID:" {RFC 822 <addr-spec> using *uric}


   ; private
   vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
   reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   uno-url = ".UNO:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   component-url = ".COMPONENT:" path ["?" *uric]
   path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")


   ; private
   vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
   reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")


   ; RFC 2255
   ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
   dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
   extension = ["!"] ["X-"] extoken ["=" exvalue]
   extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
   exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}


   ; private
   db-url = "DB:" *uric


   ; private
   vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
   opaque_part = uric_no_slash *uric
   uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","


   ; RFC 1738
   telnet-url = "TELNET://" login ["/"]


   ; private
   vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
   opaque_part = uric_no_slash *uric
   uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","


   ; private
   vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
   segment = *pchar


   ; private
   unknown-url = scheme ":" 1*uric
   scheme = ALPHA *(alphanum / "+" / "-" / ".")


   ; private (http://ubiqx.org/cifs/Appendix-D.html):
   smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
   segment = *(pchar / ";")
 */


sal_Int32 INetURLObject::SubString::clear()
{
    sal_Int32 nDelta = -m_nLength;
    m_nBegin = -1;
    m_nLength = 0;
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
                                       std::u16string_view rSubString)
{
    sal_Int32 nDelta = rSubString.size() - m_nLength;

    rString.remove(m_nBegin, m_nLength);
    rString.insert(m_nBegin, rSubString);

    m_nLength = rSubString.size();
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUString & rString,
                                       std::u16string_view rSubString)
{
    sal_Int32 nDelta = rSubString.size() - m_nLength;

    rString = OUString::Concat(rString.subView(0, m_nBegin)) + 
             rSubString + rString.subView(m_nBegin + m_nLength);

    m_nLength = rSubString.size();
    return nDelta;
}

sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
                                        std::u16string_view rSubString,
                                        sal_Int32 nTheBegin)
{
    m_nBegin = nTheBegin;
    return set(rString, rSubString);
}

inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
{
    if (isPresent())
        m_nBegin = m_nBegin + nDelta;
}

int INetURLObject::SubString::compare(SubString const & rOther,
                                      OUStringBuffer const & rThisString,
                                      OUStringBuffer const & rOtherString) const
{
    sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
    sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
    sal_Unicode const * end = p1 + len;
    sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
    while (p1 != end) {
        if (*p1 < *p2) {
            return -1;
        } else if (*p1 > *p2) {
            return 1;
        }
        ++p1;
        ++p2;
    }
    return m_nLength < rOther.m_nLength ? -1
        : m_nLength > rOther.m_nLength ? 1
        : 0;
}

struct INetURLObject::SchemeInfo
{
    OUString m_sScheme;
    OUString m_aPrefix;
    bool m_bAuthority;
    bool m_bUser;
    bool m_bAuth;
    bool m_bPassword;
    bool m_bHost;
    bool m_bPort;
    bool m_bHierarchical;
    bool m_bQuery;
};

struct INetURLObject::PrefixInfo
{
    enum class Kind { Official, Internal, External }; // order is important!

    OUString     m_aPrefix;
    OUString     m_aTranslatedPrefix;
    INetProtocol m_eScheme;
    Kind         m_eKind;
};

// static
inline INetURLObject::SchemeInfo const &
INetURLObject::getSchemeInfo(INetProtocol eTheScheme)
{
    static constexpr OUString EMPTY = u""_ustr;
    static constexpr OUString FTP = u"ftp"_ustr;
    static constexpr OUString HTTP = u"http"_ustr;
    static constexpr OUString FILE1 = u"file"_ustr; // because FILE is already defined
    static constexpr OUString MAILTO = u"mailto"_ustr;
    static constexpr OUString VND_WEBDAV = u"vnd.sun.star.webdav"_ustr;
    static constexpr OUString PRIVATE = u"private"_ustr;
    static constexpr OUString VND_HELP = u"vnd.sun.star.help"_ustr;
    static constexpr OUString HTTPS = u"https"_ustr;
    static constexpr OUString SLOT = u"slot"_ustr;
    static constexpr OUString MACRO = u"macro"_ustr;
    static constexpr OUString JAVASCRIPT = u"javascript"_ustr;
    static constexpr OUString DATA = u"data"_ustr;
    static constexpr OUString CID = u"cid"_ustr;
    static constexpr OUString VND_HIER = u"vnd.sun.star.hier"_ustr;
    static constexpr OUString UNO = u".uno"_ustr;
    static constexpr OUString COMPONENT = u".component"_ustr;
    static constexpr OUString VND_PKG = u"vnd.sun.star.pkg"_ustr;
    static constexpr OUString LDAP = u"ldap"_ustr;
    static constexpr OUString DB = u"db"_ustr;
    static constexpr OUString VND_CMD = u"vnd.sun.star.cmd"_ustr;
    static constexpr OUString TELNET = u"telnet"_ustr;
    static constexpr OUString VND_EXPAND = u"vnd.sun.star.expand"_ustr;
    static constexpr OUString VND_TDOC = u"vnd.sun.star.tdoc"_ustr;
    static constexpr OUString SMB = u"smb"_ustr;
    static constexpr OUString HID = u"hid"_ustr;
    static constexpr OUString SFTP = u"sftp"_ustr;
    static constexpr OUString VND_CMIS = u"vnd.libreoffice.cmis"_ustr;

    static o3tl::enumarray<INetProtocol, SchemeInfo> constexpr map = {
        // [-loplugin:redundantfcast]:
        SchemeInfo{
            EMPTY, u""_ustr, falsefalsefalsefalsefalsefalsefalsefalse},
        SchemeInfo{
            FTP, u"ftp://"_ustr, true, true, false, true, true, true, true,
            false},
        SchemeInfo{
            HTTP, u"http://"_ustr, true, false, false, false, true, true, true,
            true},
        SchemeInfo{
            FILE1, u"file://"_ustr, true, false, false, false, true, false, true,
            false},
        SchemeInfo{
            MAILTO, u"mailto:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            VND_WEBDAV, u"vnd.sun.star.webdav://"_ustr, true, false,
            falsefalsetruetruetruetrue},
        SchemeInfo{
            PRIVATE, u"private:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            VND_HELP, u"vnd.sun.star.help://"_ustr, true, false, false,
            falsefalsefalsetruetrue},
        SchemeInfo{
            HTTPS, u"https://"_ustr, true, false, false, false, true, true,
            truetrue},
        SchemeInfo{
            SLOT, u"slot:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            MACRO, u"macro:"_ustr, falsefalsefalsefalsefalsefalse,
            falsetrue},
        SchemeInfo{
            JAVASCRIPT, u"javascript:"_ustr, falsefalsefalsefalsefalse,
            falsefalsefalse},
        SchemeInfo{
            DATA, u"data:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            CID, u"cid:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            VND_HIER, u"vnd.sun.star.hier:"_ustr, truefalsefalse,
            falsefalsefalsetruefalse},
        SchemeInfo{
            UNO, u".uno:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            COMPONENT, u".component:"_ustr, falsefalsefalsefalsefalse,
            falsefalsetrue},
        SchemeInfo{
            VND_PKG, u"vnd.sun.star.pkg://"_ustr, true, false, false,
            falsefalsefalsetruetrue},
        SchemeInfo{
            LDAP, u"ldap://"_ustr, true, false, false, false, true, true,
            falsetrue},
        SchemeInfo{
            DB, u"db:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            false},
        SchemeInfo{
            VND_CMD, u"vnd.sun.star.cmd:"_ustr, falsefalsefalse,
            falsefalsefalsefalsefalse},
        SchemeInfo{
            TELNET, u"telnet://"_ustr, true, true, false, true, true, true,
            truefalse},
        SchemeInfo{
            VND_EXPAND, u"vnd.sun.star.expand:"_ustr, falsefalse,
            falsefalsefalsefalsefalsefalse},
        SchemeInfo{
            VND_TDOC, u"vnd.sun.star.tdoc:"_ustr, falsefalsefalse,
            falsefalsefalsetruefalse},
        SchemeInfo{
            EMPTY, u""_ustr, falsefalsefalsefalsetruetruetruefalse },
        SchemeInfo{
            SMB, u"smb://"_ustr, true, true, false, true, true, true, true,
            true},
        SchemeInfo{
            HID, u"hid:"_ustr, falsefalsefalsefalsefalsefalsefalse,
            true},
        SchemeInfo{
            SFTP, u"sftp://"_ustr, true, true, false, true, true, true, true,
            true},
        SchemeInfo{
            VND_CMIS, u"vnd.libreoffice.cmis://"_ustr, true, true,
            falsefalsetruefalsetruetrue} };
    return map[eTheScheme];
};

inline INetURLObject::SchemeInfo const & INetURLObject::getSchemeInfo() const
{
    return getSchemeInfo(m_eScheme);
}

namespace {

sal_Unicode getHexDigit(sal_uInt32 nWeight)
{
    assert(nWeight < 16);
    static const sal_Unicode aDigits[16]
        = { '0''1''2''3''4''5''6''7''8''9''A''B''C',
            'D''E''F' };
    return aDigits[nWeight];
}

}

// static
inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
                                        sal_uInt32 nOctet)
{
    rTheText.append( '%' );
    rTheText.append( getHexDigit(nOctet >> 4) );
    rTheText.append( getHexDigit(nOctet & 15) );
}

namespace {

enum
{
    PA = INetURLObject::PART_USER_PASSWORD,
    PD = INetURLObject::PART_FPATH,
    PE = INetURLObject::PART_AUTHORITY,
    PF = INetURLObject::PART_REL_SEGMENT_EXTRA,
    PG = INetURLObject::PART_URIC,
    PH = INetURLObject::PART_HTTP_PATH,
    PI = INetURLObject::PART_MESSAGE_ID_PATH,
    PJ = INetURLObject::PART_MAILTO,
    PK = INetURLObject::PART_PATH_BEFORE_QUERY,
    PL = INetURLObject::PART_PCHAR,
    PM = INetURLObject::PART_VISIBLE,
    PN = INetURLObject::PART_VISIBLE_NONSPECIAL,
    PO = INetURLObject::PART_UNO_PARAM_VALUE,
    PP = INetURLObject::PART_UNAMBIGUOUS,
    PQ = INetURLObject::PART_URIC_NO_SLASH,
    PR = INetURLObject::PART_HTTP_QUERY,
};

sal_uInt32 const aMustEncodeMap[128]
    = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/*   */                                              PP,
/* ! */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* " */                                  PM+PN   +PP,
/* # */                                  PM,
/* $ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* % */                                  PM,
/* & */ PA   +PD+PE+PF+PG+PH+PI   +PK+PL+PM+PN+PO   +PQ+PR,
/* ' */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ( */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ) */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* * */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* + */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO   +PQ+PR,
/* , */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN      +PQ+PR,
/* - */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* . */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* / */      +PD      +PG+PH+PI+PJ+PK   +PM+PN+PO,
/* 0 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 1 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 2 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 3 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 4 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 5 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 6 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 7 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 8 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* 9 */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* : */      +PD+PE   +PG+PH+PI+PJ+PK+PL+PM+PN+PO   +PQ+PR,
/* ; */ PA      +PE+PF+PG+PH+PI+PJ+PK   +PM         +PQ+PR,
/* < */                     +PI         +PM+PN   +PP,
/* = */ PA   +PD+PE+PF+PG+PH      +PK+PL+PM+PN      +PQ+PR,
/* > */                     +PI         +PM+PN   +PP,
/* ? */               +PG               +PM   +PO   +PQ,
/* @ */      +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* A */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* B */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* C */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* D */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* E */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* F */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* G */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* H */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* I */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* J */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* K */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* L */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* M */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* N */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* O */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* P */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Q */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* R */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* S */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* T */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* U */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* V */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* W */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* X */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Y */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* Z */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* [ */                PG               +PM+PN+PO,
/* \ */                                 +PM+PN   +PP,
/* ] */                PG               +PM+PN+PO,
/* ^ */                                  PM+PN   +PP,
/* _ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* ` */                                  PM+PN   +PP,
/* a */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* b */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* c */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* d */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* e */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* f */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* g */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* h */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* i */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* j */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* k */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* l */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* m */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* n */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* o */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* p */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* q */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* r */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* s */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* t */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* u */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* v */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* w */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* x */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* y */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* z */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
/* { */                                  PM+PN   +PP,
/* | */                                 +PM+PN   +PP,
/* } */                                  PM+PN   +PP,
/* ~ */ PA   +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
        0 };

bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
{
    return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
}

}

void INetURLObject::setInvalid()
{
    m_aAbsURIRef.setLength(0);
    m_eScheme = INetProtocol::NotValid;
    m_aScheme.clear();
    m_aUser.clear();
    m_aAuth.clear();
    m_aHost.clear();
    m_aPort.clear();
    m_aPath.clear();
    m_aQuery.clear();
    m_aFragment.clear();
}

namespace {

std::unique_ptr<SvMemoryStream> memoryStream(
        void const * data, sal_Int32 length)
{
    std::unique_ptr<char[]> b(
        new char[length]);
    memcpy(b.get(), data, length);
    std::unique_ptr<SvMemoryStream> s(
        new SvMemoryStream(b.get(), length, StreamMode::READ));
    s->ObjectOwnsMemory(true);
    // coverity[leaked_storage : FALSE] - belongs to SvMemoryStream s at this point
    b.release();
    return s;
}

}

std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
{
    if( GetProtocol() != INetProtocol::Data )
    {
        return nullptr;
    }

    OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
    sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
    sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
        ? 0 : pSkippedMediatype-sURLPath.getStr();
    if (sURLPath.match(",", nCharactersSkipped))
    {
        nCharactersSkipped += strlen(",");
        OString sURLEncodedData(
            sURLPath.getStr() + nCharactersSkipped,
            sURLPath.getLength() - nCharactersSkipped,
            RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
        return memoryStream(
            sURLEncodedData.getStr(), sURLEncodedData.getLength());
    }
    else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
    {
        nCharactersSkipped += strlen(";base64,");
        std::u16string_view sBase64Data = sURLPath.subView( nCharactersSkipped );
        css::uno::Sequence< sal_Int8 > aDecodedData;
        if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
            == sBase64Data.size())
        {
            return memoryStream(
                aDecodedData.getArray(), aDecodedData.getLength());
        }
    }
    return nullptr;
}

namespace {

FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
                                                  sal_Unicode const * pEnd,
                                                  FSysStyle eStyle)
{
    DBG_ASSERT(eStyle
                   & (FSysStyle::Unix
                          | FSysStyle::Dos),
               "guessFSysStyleByCounting(): Bad style");
    DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
               && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
               "guessFSysStyleByCounting(): Too big");
    sal_Int32 nSlashCount
        = (eStyle & FSysStyle::Unix) ?
              0 : std::numeric_limits< sal_Int32 >::min();
    sal_Int32 nBackslashCount
        = (eStyle & FSysStyle::Dos) ?
              0 : std::numeric_limits< sal_Int32 >::min();
    while (pBegin != pEnd)
        switch (*pBegin++)
        {
            case '/':
                ++nSlashCount;
                break;

            case '\\':
                ++nBackslashCount;
                break;
        }
    return nSlashCount >= nBackslashCount ?
                   FSysStyle::Unix : FSysStyle::Dos;
}

OUString parseScheme(
    sal_Unicode const ** begin, sal_Unicode const * end,
    sal_uInt32 fragmentDelimiter)
{
    sal_Unicode const * p = *begin;
    if (p != end && rtl::isAsciiAlpha(*p)) {
        do {
            ++p;
        } while (p != end
                 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
                     || *p == '.'));
        // #i34835# To avoid problems with Windows file paths like "C:\foo",
        // do not accept generic schemes that are only one character long:
        if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
            && p - *begin >= 2)
        {
            OUString scheme(
                OUString(*begin, p - *begin).toAsciiLowerCase());
            *begin = p + 1;
            return scheme;
        }
    }
    return OUString();
}

}

bool INetURLObject::setAbsURIRef(std::u16string_view rTheAbsURIRef,
                                 EncodeMechanism eMechanism,
                                 rtl_TextEncoding eCharset,
                                 bool bSmart,
                                 FSysStyle eStyle)
{
    sal_Unicode const * pPos = rTheAbsURIRef.data();
    sal_Unicode const * pEnd = pPos + rTheAbsURIRef.size();

    setInvalid();

    sal_uInt32 nFragmentDelimiter = '#';

    m_aAbsURIRef.setLength(0);

    // Parse <scheme>:
    sal_Unicode const * p = pPos;
    PrefixInfo const * pPrefix = getPrefix(p, pEnd);
    if (pPrefix)
    {
        pPos = p;
        m_eScheme = pPrefix->m_eScheme;

        const OUString & rTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ?
                                             pPrefix->m_aTranslatedPrefix :
                                             pPrefix->m_aPrefix;
        m_aAbsURIRef.append(rTemp);
        m_aScheme = SubString( 0, rTemp.indexOf(':') );
    }
    else
    {
        if (bSmart)
        {
            // For scheme detection, the first (if any) of the following
            // productions that matches the input string (and for which the
            // appropriate style bit is set in eStyle, if applicable)
            // determines the scheme. The productions use the auxiliary rules

            //    domain = label *("." label)
            //    label = alphanum [*(alphanum / "-") alphanum]
            //    alphanum = ALPHA / DIGIT
            //    IPv6reference = "[" IPv6address "]"
            //    IPv6address = hexpart [":" IPv4address]
            //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
            //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
            //    hexseq = hex4 *(":" hex4)
            //    hex4 = 1*4HEXDIG
            //    UCS4 = <any UCS4 character>

            // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
            //    <one of the known schemes, ignoring case> ":" *UCS4
            // 2nd Production (mailto):
            //    domain "@" domain
            // 3rd Production (ftp):
            //    "FTP" 2*("." label) ["/" *UCS4]
            // 4th Production (http):
            //    label 2*("." label) ["/" *UCS4]
            // 5th Production (file):
            //    "//" (domain / IPv6reference) ["/" *UCS4]
            // 6th Production (Unix file):
            //    "/" *UCS4
            // 7th Production (UNC file; FSysStyle::Dos only):
            //    "\\" domain ["\" *UCS4]
            // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
            //    ALPHA ":" ["/" *UCS4]
            // 9th Production (DOS file; FSysStyle::Dos only):
            //    ALPHA ":" ["\" *UCS4]
            // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
            // after this else branch):
            //    <any scheme> ":" *UCS4

            // For the 'non URL' file productions 6--9, the interpretation of
            // the input as a (degenerate) URI is turned off, i.e., escape
            // sequences and fragments are never detected as such, but are
            // taken as literal characters.

            sal_Unicode const * p1 = pPos;
            if (eStyle & FSysStyle::Dos
                && pEnd - p1 >= 2
                && rtl::isAsciiAlpha(p1[0])
                && p1[1] == ':'
                && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
            {
                m_eScheme = INetProtocol::File; // 8th, 9th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (eStyle & FSysStyle::Dos
                && pEnd - p1 >= 6
                && p1[0] == '\\' && p1[1] == '\\' && p1[2] == '?' && p1[3] == '\\'
                && rtl::isAsciiAlpha(p1[4])
                && p1[5] == ':'
                && (pEnd - p1 == 6 || p1[6] == '/' || p1[6] == '\\'))
            {
                m_eScheme = INetProtocol::File; // 8th, 9th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
            {
                p1 += 2;
                if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
                    && (p1 == pEnd || *p1 == '/'))
                    m_eScheme = INetProtocol::File; // 5th
            }
            else if (p1 != pEnd && *p1 == '/')
            {
                m_eScheme = INetProtocol::File; // 6th
                eMechanism = EncodeMechanism::All;
                nFragmentDelimiter = 0x80000000;
            }
            else if (eStyle & FSysStyle::Dos
                     && pEnd - p1 >= 2
                     && p1[0] == '\\'
                     && p1[1] == '\\')
            {
                p1 += 2;
                if (pEnd - p1 >= 6 && p1[0] == '?' && p1[1] == '\\' && p1[5] == '\\'
                    && rtl::toAsciiLowerCase(p1[2]) == 'u'
                    && rtl::toAsciiLowerCase(p1[3]) == 'n'
                    && rtl::toAsciiLowerCase(p1[4]) == 'c')
                {
                    p1 += 6; // "\\?\UNC\Servername\..."
                }

                sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
                    p1, pEnd - p1, '\\');
                sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
                if (
                    parseHostOrNetBiosName(
                        p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
                        true, nullptr) ||
                    (scanDomain(p1, pe) > 0 && p1 == pe)
                   )
                {
                    m_eScheme = INetProtocol::File; // 7th
                    eMechanism = EncodeMechanism::All;
                    nFragmentDelimiter = 0x80000000;
                }
            }
            else
            {
                sal_Unicode const * pDomainEnd = p1;
                sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
                if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
                {
                    ++pDomainEnd;
                    if (scanDomain(pDomainEnd, pEnd) > 0
                        && pDomainEnd == pEnd)
                        m_eScheme = INetProtocol::Mailto; // 2nd
                }
                else if (nLabels >= 3
                         && (pDomainEnd == pEnd || *pDomainEnd == '/'))
                    m_eScheme
                        = pDomainEnd - p1 >= 4
                          && (p1[0] == 'f' || p1[0] == 'F')
                          && (p1[1] == 't' || p1[1] == 'T')
                          && (p1[2] == 'p' || p1[2] == 'P')
                          && p1[3] == '.' ?
                              INetProtocol::Ftp : INetProtocol::Http; // 3rd, 4th
            }
        }

        OUString aSynScheme;
        if (m_eScheme == INetProtocol::NotValid) {
            sal_Unicode const * p1 = pPos;
            aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
            if (!aSynScheme.isEmpty())
            {
                if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
                {
                    // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)"
                    // branch above); but a known scheme is defined in m_eSmartScheme. If this
                    // scheme may have a port in authority component, then avoid misinterpreting
                    // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with
                    // 123/baz rootless path. For now, do not try to handle possible colons in
                    // user information, require such ambiguous URLs to have explicit scheme part.
                    // Also ignore possibility of empty port.
                    const SchemeInfo& rInfo = getSchemeInfo(m_eSmartScheme);
                    if (rInfo.m_bAuthority && rInfo.m_bPort)
                    {
                        // Make sure that all characters from colon to [/?#] or to EOL are digits.
                        // Or maybe make it simple, and just assume that "xyz:1..." is more likely
                        // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
                        sal_Unicode const* p2 = p1 + 1;
                        while (p2 != pEnd && rtl::isAsciiDigit(*p2))
                            ++p2;
                        if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
                            m_eScheme = m_eSmartScheme;
                    }
                }

                if (m_eScheme == INetProtocol::NotValid)
                {
                    m_eScheme = INetProtocol::Generic;
                    pPos = p1;
                }
            }
        }

        if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
            && *pPos != nFragmentDelimiter)
        {
            m_eScheme = m_eSmartScheme;
        }

        if (m_eScheme == INetProtocol::NotValid)
        {
            setInvalid();
            return false;
        }

        if (m_eScheme != INetProtocol::Generic) {
            aSynScheme = getSchemeInfo().m_sScheme;
        }
        m_aScheme.set(m_aAbsURIRef, aSynScheme, m_aAbsURIRef.getLength());
        m_aAbsURIRef.append(':');
    }

    sal_uInt32 nSegmentDelimiter = '/';
    sal_uInt32 nAltSegmentDelimiter = 0x80000000;
    bool bSkippedInitialSlash = false;

    // Parse //<user>;AUTH=<auth>@<host>:<port> or
    // //<user>:<password>@<host>:<port> or
    // //<reg_name>
    if (getSchemeInfo().m_bAuthority)
    {
        sal_Unicode const * pUserInfoBegin = nullptr;
        sal_Unicode const * pUserInfoEnd = nullptr;
        sal_Unicode const * pHostPortBegin = nullptr;
        sal_Unicode const * pHostPortEnd = nullptr;

        switch (m_eScheme)
        {
            case INetProtocol::VndSunStarHelp:
            {
                if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");
                OUStringBuffer aSynAuthority;
                while (pPos < pEnd
                       && *pPos != '/' && *pPos != '?'
                       && *pPos != nFragmentDelimiter)
                {
                    EscapeType eEscapeType;
                    sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                 eMechanism,
                                                 eCharset, eEscapeType);
                    appendUCS4(aSynAuthority, nUTF32, eEscapeType,
                               PART_AUTHORITY, eCharset, false);
                }
                m_aHost.set(m_aAbsURIRef,
                            aSynAuthority,
                            m_aAbsURIRef.getLength());
                    // misusing m_aHost to store the authority
                break;
            }

            case INetProtocol::VndSunStarHier:
            {
                if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                {
                    pPos += 2;
                    m_aAbsURIRef.append("//");
                    OUStringBuffer aSynAuthority;
                    while (pPos < pEnd
                           && *pPos != '/' && *pPos != '?'
                           && *pPos != nFragmentDelimiter)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(pPos,
                                                     pEnd,
                                                     eMechanism,
                                                     eCharset,
                                                     eEscapeType);
                        appendUCS4(aSynAuthority,
                                   nUTF32,
                                   eEscapeType,
                                   PART_AUTHORITY,
                                   eCharset,
                                   false);
                    }
                    if (aSynAuthority.isEmpty())
                    {
                        setInvalid();
                        return false;
                    }
                    m_aHost.set(m_aAbsURIRef,
                                aSynAuthority,
                                m_aAbsURIRef.getLength());
                        // misusing m_aHost to store the authority
                }
                break;
            }

            case INetProtocol::VndSunStarPkg:
            case INetProtocol::Cmis:
            {
                if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");
                OUStringBuffer aSynUser(128);

                bool bHasUser = false;
                while (pPos < pEnd && *pPos != '@'
                       && *pPos != '/' && *pPos != '?'
                       && *pPos != nFragmentDelimiter)
                {
                    EscapeType eEscapeType;
                    sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                 eMechanism,
                                                 eCharset, eEscapeType);
                    appendUCS4(aSynUser, nUTF32, eEscapeType,
                               PART_USER_PASSWORD, eCharset, false);

                    bHasUser = *pPos == '@';
                }

                OUStringBuffer aSynAuthority(64);
                if ( !bHasUser )
                {
                    aSynAuthority = std::move(aSynUser);
                }
                else
                {
                    m_aUser.set(m_aAbsURIRef,
                            aSynUser,
                            m_aAbsURIRef.getLength());
                    m_aAbsURIRef.append("@");
                    ++pPos;

                    while (pPos < pEnd
                           && *pPos != '/' && *pPos != '?'
                           && *pPos != nFragmentDelimiter)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                                     eMechanism,
                                                     eCharset, eEscapeType);
                        appendUCS4(aSynAuthority, nUTF32, eEscapeType,
                                   PART_AUTHORITY, eCharset, false);
                    }
                }
                if (aSynAuthority.isEmpty())
                {
                    setInvalid();
                    return false;
                }
                m_aHost.set(m_aAbsURIRef,
                            aSynAuthority,
                            m_aAbsURIRef.getLength());
                    // misusing m_aHost to store the authority
                break;
            }

            case INetProtocol::File:
                if (bSmart)
                {
                    // The first of the following seven productions that
                    // matches the rest of the input string (and for which the
                    // appropriate style bit is set in eStyle, if applicable)
                    // determines the used notation.  The productions use the
                    // auxiliary rules

                    //    domain = label *("." label)
                    //    label = alphanum [*(alphanum / "-") alphanum]
                    //    alphanum = ALPHA / DIGIT
                    //    IPv6reference = "[" IPv6address "]"
                    //    IPv6address = hexpart [":" IPv4address]
                    //    IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
                    //    hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
                    //    hexseq = hex4 *(":" hex4)
                    //    hex4 = 1*4HEXDIG
                    //    path = <any UCS4 character except "#">
                    //    UCS4 = <any UCS4 character>

                    // 1st Production (URL):
                    //    "//" [domain / IPv6reference] ["/" *path]
                    //        ["#" *UCS4]
                    //  becomes
                    //    "file://" domain "/" *path ["#" *UCS4]
                    if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    {
                        sal_Unicode const * p1 = pPos + 2;
                        while (p1 != pEnd && *p1 != '/' &&
                               *p1 != nFragmentDelimiter)
                        {
                            ++p1;
                        }
                        if (parseHostOrNetBiosName(
                                pPos + 2, p1, EncodeMechanism::All,
                                RTL_TEXTENCODING_DONTKNOW, true, nullptr))
                        {
                            m_aAbsURIRef.append("//");
                            pHostPortBegin = pPos + 2;
                            pHostPortEnd = p1;
                            pPos = p1;
                            break;
                        }
                    }

                    // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
                    //    "//" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
                    //    "//" ALPHA ":" ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 4th Production (miscounted slashes):
                    //    "//" *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    {
                        m_aAbsURIRef.append("//");
                        pPos += 2;
                        bSkippedInitialSlash = true;
                        if ((eStyle & FSysStyle::Dos)
                            && pEnd - pPos >= 2
                            && rtl::isAsciiAlpha(pPos[0])
                            && pPos[1] == ':'
                            && (pEnd - pPos == 2
                                || pPos[2] == '/' || pPos[2] == '\\'))
                            nAltSegmentDelimiter = '\\';
                        break;
                    }

                    // 5th Production (Unix):
                    //    "/" *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    if (pPos < pEnd && *pPos == '/')
                    {
                        m_aAbsURIRef.append("//");
                        break;
                    }

                    // 6th Production (UNC; FSysStyle::Dos only):
                    //    "\\" domain ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file://" domain "/" *path ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    if (eStyle & FSysStyle::Dos
                        && pEnd - pPos >= 2
                        && pPos[0] == '\\'
                        && pPos[1] == '\\')
                    {
                        sal_Unicode const * p1 = pPos + 2;
                        sal_Unicode const * pHostPortTentativeBegin = p1;
                        if (pEnd - p1 >= 6 && p1[0] == '?' && p1[1] == '\\' && p1[5] == '\\'
                            && rtl::toAsciiLowerCase(p1[2]) == 'u'
                            && rtl::toAsciiLowerCase(p1[3]) == 'n'
                            && rtl::toAsciiLowerCase(p1[4]) == 'c')
                        {
                            p1 += 6; // "\\?\UNC\Servername\..."
                            pHostPortTentativeBegin = p1;
                        }

                        sal_Unicode const * pe = p1;
                        while (pe < pEnd && *pe != '\\' &&
                               *pe != nFragmentDelimiter)
                        {
                            ++pe;
                        }
                        if (
                             parseHostOrNetBiosName(
                                p1, pe, EncodeMechanism::All,
                                RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
                             (scanDomain(p1, pe) > 0 && p1 == pe)
                           )
                        {
                            m_aAbsURIRef.append("//");
                            pHostPortBegin = pHostPortTentativeBegin;
                            pHostPortEnd = pe;
                            pPos = pe;
                            nSegmentDelimiter = '\\';
                            break;
                        }
                    }

                    // 7th Production (Unix-like DOS; FSysStyle::Dos only):
                    //    ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    // 8th Production (DOS; FSysStyle::Dos only):
                    //    ALPHA ":" ["\" *path] ["#" *UCS4]
                    //  becomes
                    //    "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
                    //  replacing "\" by "/" within <*path>
                    if (eStyle & FSysStyle::Dos)
                    {
                        sal_Unicode const* p1 = pPos;
                        if (pEnd - p1 >= 4 && p1[0] == '\\' && p1[1] == '\\' && p1[2] == '?'
                            && p1[3] == '\\')
                            p1 += 4; // "\\?\c:\..."

                        if (pEnd - p1 >= 2
                            && rtl::isAsciiAlpha(p1[0])
                            && p1[1] == ':'
                            && (pEnd - p1 == 2
                                || p1[2] == '/'
                                || p1[2] == '\\'))
                        {
                            pPos = p1;
                            m_aAbsURIRef.append("//");
                            nAltSegmentDelimiter = '\\';
                            bSkippedInitialSlash = true;
                            break;
                        }
                    }

                    // 9th Production (any):
                    //    *path ["#" *UCS4]
                    //  becomes
                    //    "file:///" *path ["#" *UCS4]
                    //  replacing the delimiter by "/" within <*path>.  The
                    //  delimiter is that character from the set { "/", "\"}
                    // which appears most often in <*path> (if FSysStyle::Unix
                    //  is not among the style bits, "/" is removed from the
                    //  set; if FSysStyle::Dos is not among the style bits, "\" is
                    //  removed from the set).  If two or
                    //  more characters appear the same number of times, the
                    //  character mentioned first in that set is chosen.  If
                    //  the first character of <*path> is the delimiter, that
                    //  character is not copied
                    if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
                    {
                        m_aAbsURIRef.append("//");
                        switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
                        {
                            case FSysStyle::Unix:
                                nSegmentDelimiter = '/';
                                break;

                            case FSysStyle::Dos:
                                nSegmentDelimiter = '\\';
                                break;

                            default:
                                OSL_FAIL(
                                    "INetURLObject::setAbsURIRef():"
                                        " Bad guessFSysStyleByCounting");
                                break;
                        }
                        bSkippedInitialSlash
                            = pPos != pEnd && *pPos != nSegmentDelimiter;
                        break;
                    }
                }
                [[fallthrough]];
            default:
            {
                // For INetProtocol::File, allow an empty authority ("//") to be
                // missing if the following path starts with an explicit "/"
                // (Java is notorious in generating such file URLs, so be
                // liberal here):
                if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
                    pPos += 2;
                else if (!bSmart
                         && !(m_eScheme == INetProtocol::File
                              && pPos != pEnd && *pPos == '/'))
                {
                    setInvalid();
                    return false;
                }
                m_aAbsURIRef.append("//");

                sal_Unicode const * pAuthority = pPos;
                sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
                while (pPos < pEnd && *pPos != '/' && *pPos != c
                       && *pPos != nFragmentDelimiter)
                    ++pPos;
                if (getSchemeInfo().m_bUser)
                    if (getSchemeInfo().m_bHost)
                    {
                        sal_Unicode const * p1 = pAuthority;
                        while (p1 < pPos && *p1 != '@')
                            ++p1;
                        if (p1 == pPos)
                        {
                            pHostPortBegin = pAuthority;
                            pHostPortEnd = pPos;
                        }
                        else
                        {
                            pUserInfoBegin = pAuthority;
                            pUserInfoEnd = p1;
                            pHostPortBegin = p1 + 1;
                            pHostPortEnd = pPos;
                        }
                    }
                    else
                    {
                        pUserInfoBegin = pAuthority;
                        pUserInfoEnd = pPos;
                    }
                else if (getSchemeInfo().m_bHost)
                {
                    pHostPortBegin = pAuthority;
                    pHostPortEnd = pPos;
                }
                else if (pPos != pAuthority)
                {
                    setInvalid();
                    return false;
                }
                break;
            }
        }

        if (pUserInfoBegin)
        {
            Part ePart = PART_USER_PASSWORD;
            bool bSupportsPassword = getSchemeInfo().m_bPassword;
            bool bSupportsAuth
                = !bSupportsPassword && getSchemeInfo().m_bAuth;
            bool bHasAuth = false;
            OUStringBuffer aSynUser;
            sal_Unicode const * p1 = pUserInfoBegin;
            while (p1 < pUserInfoEnd)
            {
                EscapeType eEscapeType;
                sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                             eMechanism, eCharset, eEscapeType);
                if (eEscapeType == EscapeType::NONE)
                {
                    if (nUTF32 == ':' && bSupportsPassword)
                    {
                        bHasAuth = true;
                        break;
                    }
                    else if (nUTF32 == ';' && bSupportsAuth
                             && pUserInfoEnd - p1
                                    > RTL_CONSTASCII_LENGTH("auth=")
                             && INetMIME::equalIgnoreCase(
                                    p1,
                                    p1 + RTL_CONSTASCII_LENGTH("auth="),
                                    "auth="))
                    {
                        p1 += RTL_CONSTASCII_LENGTH("auth=");
                        bHasAuth = true;
                        break;
                    }
                }
                appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
                           eCharset, false);
            }
            m_aUser.set(m_aAbsURIRef, aSynUser, m_aAbsURIRef.getLength());
            if (bHasAuth)
            {
                if (bSupportsPassword)
                {
                    m_aAbsURIRef.append(':');
                    OUStringBuffer aSynAuth;
                    while (p1 < pUserInfoEnd)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                                     eMechanism, eCharset,
                                                     eEscapeType);
                        appendUCS4(aSynAuth, nUTF32, eEscapeType,
                                   ePart, eCharset, false);
                    }
                    m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
                }
                else
                {
                    m_aAbsURIRef.append(";AUTH=");
                    OUStringBuffer aSynAuth;
                    while (p1 < pUserInfoEnd)
                    {
                        EscapeType eEscapeType;
                        sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
                                                     eMechanism, eCharset,
                                                     eEscapeType);
                        if (!INetMIME::isIMAPAtomChar(nUTF32))
                        {
                            setInvalid();
                            return false;
                        }
                        appendUCS4(aSynAuth, nUTF32, eEscapeType,
                                   ePart, eCharset, false);
                    }
                    m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
                }
            }
            if (pHostPortBegin)
                m_aAbsURIRef.append('@');
        }

        if (pHostPortBegin)
        {
            sal_Unicode const * pPort = pHostPortEnd;
            if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
            {
                sal_Unicode const * p1 = pHostPortEnd - 1;
                while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
                    --p1;
                if (*p1 == ':')
                    pPort = p1;
            }
            bool bNetBiosName = false;
            switch (m_eScheme)
            {
                case INetProtocol::File:
                    // If the host equals "LOCALHOST" (unencoded and ignoring
                    // case), turn it into an empty host:
                    if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
                                                  "localhost"))
                        pHostPortBegin = pPort;
                    bNetBiosName = true;
                    break;

                case INetProtocol::Ldap:
                case INetProtocol::Smb:
                    if (pHostPortBegin == pPort && pPort != pHostPortEnd)
                    {
                        setInvalid();
                        return false;
                    }
                    break;
                default:
                    if (pHostPortBegin == pPort)
                    {
                        setInvalid();
                        return false;
                    }
                    break;
            }
            sal_Int32 nLenBeforeHost = m_aAbsURIRef.getLength();
            if (!parseHostOrNetBiosName(
                    pHostPortBegin, pPort, eMechanism, eCharset,
                    bNetBiosName, &m_aAbsURIRef))
            {
                setInvalid();
                return false;
            }
            m_aHost = SubString(nLenBeforeHost, m_aAbsURIRef.getLength() - nLenBeforeHost);
            if (pPort != pHostPortEnd)
            {
                m_aAbsURIRef.append(':');
                m_aPort.set(m_aAbsURIRef,
                    std::u16string_view{pPort + 1, static_cast<size_t>(pHostPortEnd - (pPort + 1))},
                    m_aAbsURIRef.getLength());
            }
        }
    }

    // Parse <path>
    sal_Int32 nBeforePathLength = m_aAbsURIRef.getLength();
    if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
                   bSkippedInitialSlash, nSegmentDelimiter,
                   nAltSegmentDelimiter,
                   getSchemeInfo().m_bQuery ? '?' : 0x80000000,
                   nFragmentDelimiter, m_aAbsURIRef))
    {
        setInvalid();
        return false;
    }
    m_aPath = SubString(nBeforePathLength, m_aAbsURIRef.getLength() - nBeforePathLength);

    // Parse ?<query>
    if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
    {
        m_aAbsURIRef.append('?');
        OUStringBuffer aSynQuery;
        for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
        {
            EscapeType eEscapeType;
            sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                         eMechanism, eCharset, eEscapeType);
            appendUCS4(aSynQuery, nUTF32, eEscapeType,
                       PART_URIC, eCharset, true);
        }
        m_aQuery.set(m_aAbsURIRef, aSynQuery, m_aAbsURIRef.getLength());
    }

    // Parse #<fragment>
    if (pPos < pEnd && *pPos == nFragmentDelimiter)
    {
        m_aAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
        OUStringBuffer aSynFragment;
        for (++pPos; pPos < pEnd;)
        {
            EscapeType eEscapeType;
            sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
                                         eMechanism, eCharset, eEscapeType);
            appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
                       eCharset, true);
        }
        m_aFragment.set(m_aAbsURIRef, aSynFragment, m_aAbsURIRef.getLength());
    }

    if (pPos != pEnd)
    {
        setInvalid();
        return false;
    }

    return true;
}

void INetURLObject::changeScheme(INetProtocol eTargetScheme) {
    sal_Int32 oldSchemeLen = 0;
    const OUString& rOldSchemeName = getSchemeInfo().m_sScheme;
    if (m_eScheme == INetProtocol::Generic)
        oldSchemeLen = m_aScheme.getLength();
    else
        oldSchemeLen = rOldSchemeName.getLength();
    m_eScheme=eTargetScheme;
    const OUString& rNewSchemeName = getSchemeInfo().m_sScheme;
    sal_Int32 newSchemeLen = rNewSchemeName.getLength();
    m_aAbsURIRef.remove(0, oldSchemeLen);
    m_aAbsURIRef.insert(0, rNewSchemeName);
    sal_Int32 delta=newSchemeLen-oldSchemeLen;
    m_aUser+=delta;
    m_aAuth+=delta;
    m_aHost+=delta;
    m_aPort+=delta;
    m_aPath+=delta;
    m_aQuery+=delta;
    m_aFragment+=delta;
}

bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
                                    INetURLObject & rTheAbsURIRef,
                                    bool & rWasAbsolute,
                                    EncodeMechanism eMechanism,
                                    rtl_TextEncoding eCharset,
                                    bool bIgnoreFragment, bool bSmart,
                                    bool bRelativeNonURIs, FSysStyle eStyle)
    const
{
    sal_Unicode const * p = rTheRelURIRef.getStr();
    sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();

    sal_Unicode const * pPrefixBegin = p;
    PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
    bool hasScheme = pPrefix != nullptr;
    if (!hasScheme) {
        pPrefixBegin = p;
        hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
    }

    sal_uInt32 nSegmentDelimiter = '/';
    sal_uInt32 nQueryDelimiter
        = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
    sal_uInt32 nFragmentDelimiter = '#';
    Part ePart = PART_VISIBLE;

    if (!hasScheme && bSmart)
    {
        // If the input matches any of the following productions (for which
        // the appropriate style bit is set in eStyle), it is assumed to be an
        // absolute file system path, rather than a relative URI reference.
        // (This is only a subset of the productions used for scheme detection
        // in INetURLObject::setAbsURIRef(), because most of those productions
        // interfere with the syntax of relative URI references.)  The
        // productions use the auxiliary rules

--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5
C=90 H=94 G=91

¤ Dauer der Verarbeitung: 0.21 Sekunden  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.