/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <regexp.hxx>
#include <cstddef>
#include <osl/diagnose.h>
#include <com/sun/star/lang/IllegalArgumentException.hpp>
#include <rtl/character.hxx>
#include <rtl/ustrbuf.hxx>
#include <rtl/ustring.hxx>
#include <utility>
using namespace com::sun::star;
using namespace ucb_impl;
// Regexp
inline Regexp::Regexp(Kind eTheKind, OUString aThePrefix,
bool bTheEmptyDomain, OUString aTheInfix,
bool bTheTranslation,
OUString aTheReversePrefix):
m_eKind(eTheKind),
m_aPrefix(std::move(aThePrefix)),
m_aInfix(std::move(aTheInfix)),
m_aReversePrefix(std::move(aTheReversePrefix)),
m_bEmptyDomain(bTheEmptyDomain),
m_bTranslation(bTheTranslation)
{
OSL_ASSERT(m_eKind == KIND_DOMAIN
|| (!m_bEmptyDomain && m_aInfix.isEmpty()));
OSL_ASSERT(m_bTranslation || m_aReversePrefix.isEmpty());
}
namespace {
bool matchStringIgnoreCase(sal_Unicode
const ** pBegin,
sal_Unicode
const * pEnd,
OUString
const & rString)
{
sal_Unicode
const * p = *pBegin;
sal_Unicode
const * q = rString.getStr();
sal_Unicode
const * qEnd = q + rString.getLength();
if (pEnd - p < qEnd - q)
return false;
while (q != qEnd)
{
if (rtl::compareIgnoreAsciiCase(*p++, *q++) != 0)
return false;
}
*pBegin = p;
return true;
}
}
bool Regexp::matches(OUString
const & rString)
const
{
sal_Unicode
const * pBegin = rString.getStr();
sal_Unicode
const * pEnd = pBegin + rString.getLength();
bool bMatches =
false;
sal_Unicode
const * p = pBegin;
if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
{
switch (m_eKind)
{
case KIND_PREFIX:
bMatches =
true;
break;
case KIND_AUTHORITY:
bMatches = p == pEnd || *p ==
'/' || *p ==
'?' || *p ==
'#';
break;
case KIND_DOMAIN:
if (!m_bEmptyDomain)
{
if (p == pEnd || *p ==
'/' || *p ==
'?' || *p ==
'#')
break;
++p;
}
for (;;)
{
sal_Unicode
const * q = p;
if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
&& (q == pEnd || *q ==
'/' || *q ==
'?' || *q ==
'#'))
{
bMatches =
true;
break;
}
if (p == pEnd)
break;
sal_Unicode c = *p++;
if (c ==
'/' || c ==
'?' || c ==
'#')
break;
}
break;
}
}
return bMatches;
}
namespace {
bool isScheme(OUString
const & rString,
bool bColon)
{
// Return true if rString matches <scheme> (plus a trailing ":" if bColon
// is true) from RFC 2396:
sal_Unicode
const * p = rString.getStr();
sal_Unicode
const * pEnd = p + rString.getLength();
if (p != pEnd && rtl::isAsciiAlpha(*p))
for (++p;;)
{
if (p == pEnd)
return !bColon;
sal_Unicode c = *p++;
if (!(rtl::isAsciiAlphanumeric(c)
|| c ==
'+' || c ==
'-' || c ==
'.'))
return bColon && c ==
':' && p == pEnd;
}
return false;
}
void appendStringLiteral(OUStringBuffer * pBuffer,
OUString
const & rString)
{
OSL_ASSERT(pBuffer);
pBuffer->append(
'"');
sal_Unicode
const * p = rString.getStr();
sal_Unicode
const * pEnd = p + rString.getLength();
while (p != pEnd)
{
sal_Unicode c = *p++;
if (c ==
'"' || c ==
'\\')
pBuffer->append(
'\\');
pBuffer->append(c);
}
pBuffer->append(
'"');
}
}
OUString Regexp::getRegexp()
const
{
if (m_bTranslation)
{
OUStringBuffer aBuffer;
if (!m_aPrefix.isEmpty())
appendStringLiteral(&aBuffer, m_aPrefix);
switch (m_eKind)
{
case KIND_PREFIX:
aBuffer.append(
"(.*)");
break;
case KIND_AUTHORITY:
aBuffer.append(
"(([/?#].*)?)");
break;
case KIND_DOMAIN:
aBuffer.append(
"([^/?#]" + OUStringChar(sal_Unicode(m_bEmptyDomain ?
'*' :
'+')));
if (!m_aInfix.isEmpty())
appendStringLiteral(&aBuffer, m_aInfix);
aBuffer.append(
"([/?#].*)?)");
break;
}
aBuffer.append(
"->");
if (!m_aReversePrefix.isEmpty())
appendStringLiteral(&aBuffer, m_aReversePrefix);
aBuffer.append(
"\\1");
return aBuffer.makeStringAndClear();
}
else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix,
true))
return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
else
{
OUStringBuffer aBuffer;
if (!m_aPrefix.isEmpty())
appendStringLiteral(&aBuffer, m_aPrefix);
switch (m_eKind)
{
case KIND_PREFIX:
aBuffer.append(
".*");
break;
case KIND_AUTHORITY:
aBuffer.append(
"([/?#].*)?");
break;
case KIND_DOMAIN:
aBuffer.append(
"[^/?#]" + OUStringChar( m_bEmptyDomain ?
'*' :
'+' ));
if (!m_aInfix.isEmpty())
appendStringLiteral(&aBuffer, m_aInfix);
aBuffer.append(
"([/?#].*)?");
break;
}
return aBuffer.makeStringAndClear();
}
}
namespace {
bool matchString(sal_Unicode
const ** pBegin, sal_Unicode
const * pEnd,
char const * pString, size_t nStringLength)
{
sal_Unicode
const * p = *pBegin;
unsigned char const * q =
reinterpret_cast<
unsigned char const * >(pString);
unsigned char const * qEnd = q + nStringLength;
if (pEnd - p < qEnd - q)
return false;
while (q != qEnd)
{
sal_Unicode c1 = *p++;
sal_Unicode c2 = *q++;
if (c1 != c2)
return false;
}
*pBegin = p;
return true;
}
bool scanStringLiteral(sal_Unicode
const ** pBegin, sal_Unicode
const * pEnd,
OUString * pString)
{
sal_Unicode
const * p = *pBegin;
if (p == pEnd || *p++ !=
'"')
return false;
OUStringBuffer aBuffer;
for (;;)
{
if (p == pEnd)
return false;
sal_Unicode c = *p++;
if (c ==
'"')
break;
if (c ==
'\\')
{
if (p == pEnd)
return false;
c = *p++;
if (c !=
'"' && c !=
'\\')
return false;
}
aBuffer.append(c);
}
*pBegin = p;
*pString = aBuffer.makeStringAndClear();
return true;
}
}
Regexp Regexp::parse(OUString
const & rRegexp)
{
// Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
// where <scheme> is as defined in RFC 2396:
if (isScheme(rRegexp,
false))
return Regexp(Regexp::KIND_PREFIX,
rRegexp +
":",
false,
OUString(),
false,
OUString());
sal_Unicode
const * p = rRegexp.getStr();
sal_Unicode
const * pEnd = p + rRegexp.getLength();
OUString aPrefix;
scanStringLiteral(&p, pEnd, &aPrefix);
if (p == pEnd)
throw lang::IllegalArgumentException();
// This and the matchString() calls below are some of the few places where
// RTL_CONSTASCII_STRINGPARAM() should NOT be removed.
// (c.f. https://gerrit.libreoffice.org/3117)
if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
".*")))
{
if (p != pEnd)
throw lang::IllegalArgumentException();
return Regexp(Regexp::KIND_PREFIX, aPrefix,
false, OUString(),
false, OUString());
}
else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"(.*)->")))
{
OUString aReversePrefix;
scanStringLiteral(&p, pEnd, &aReversePrefix);
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"\\1"))
|| p != pEnd)
throw lang::IllegalArgumentException();
return Regexp(Regexp::KIND_PREFIX, aPrefix,
false, OUString(),
true, aReversePrefix);
}
else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"([/?#].*)?")))
{
if (p != pEnd)
throw lang::IllegalArgumentException();
return Regexp(Regexp::KIND_AUTHORITY, aPrefix,
false, OUString(),
false, OUString());
}
else if (matchString(&p, pEnd,
RTL_CONSTASCII_STRINGPARAM(
"(([/?#].*)?)->")))
{
OUString aReversePrefix;
if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
&& matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"\\1"))
&& p == pEnd))
throw lang::IllegalArgumentException();
return Regexp(Regexp::KIND_AUTHORITY, aPrefix,
false, OUString(),
true, aReversePrefix);
}
else
{
bool bOpen =
false;
if (p != pEnd && *p ==
'(')
{
++p;
bOpen =
true;
}
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"[^/?#]")))
throw lang::IllegalArgumentException();
if (p == pEnd || (*p !=
'*' && *p !=
'+'))
throw lang::IllegalArgumentException();
bool bEmptyDomain = *p++ ==
'*';
OUString aInfix;
scanStringLiteral(&p, pEnd, &aInfix);
if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"([/?#].*)?")))
throw lang::IllegalArgumentException();
OUString aReversePrefix;
if (bOpen
&& !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
")->"))
&& scanStringLiteral(&p, pEnd, &aReversePrefix)
&& matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(
"\\1"))))
throw lang::IllegalArgumentException();
if (p != pEnd)
throw lang::IllegalArgumentException();
return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
bOpen, aReversePrefix);
}
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */