unit UniRtf2HtmlUnit1;
{------------------------------------------------------------------------------
This RTF to HTML conversion program supports the following RTF statements:
- Bold, Italic, Underline
- Alignment
- Fonts + font size + font color
- Bullet
It doesn't support itemnumbers, instead it writes bullets.
RTF code generated by Microsoft Word may not work because it contains
an incredible amount of crap which may confuse the conversion routines.
RTF code generated by TRichEdit and WordPad seem to work perfectly.
Updated: 10-jun-2002
Comments may be sent to: [email protected]
-------------------------------------------------------------------------------}
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
StdCtrls, Dialogs, StrUtils;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
type
TSwitch = (Activate, Deactivate, Neutral);
TAlignment = (Left, Right, Center);
TFontTable = record
List: TStringList;
Status: TSwitch;
GroupCount: integer;
end;
TColorTable = record
List: TStringList;
GroupCount: integer;
end;
TTextFormat = record
FontTable: TFontTable;
ColorTable: TColorTable;
Bold: TSwitch;
Italic: TSwitch;
Underline: TSwitch;
NewFont: integer;
CurrentFont: integer;
NewSize: integer;
CurrentSize: integer;
NewColor: integer;
CurrentColor: integer;
IsOpen: boolean;
IsUpdated: boolean;
SizeFactor: integer;
RftDefaultSize: integer;
end;
TItemize = record
IsItemized: boolean;
IndentStatus: TSwitch;
BulletStatus: TSwitch;
BulletIsOpen: boolean;
//NumberedBullet: boolean; ItemNumbers is not supported because RTF is a piece of crap
//NumberingChars: string;
IndentCount: integer;
IndentDist: integer;
TextToBulletDist: integer;
end;
TParagraphFormat = record
CurrentAlign: TAlignment;
NewAlign: TAlignment;
NewLine: boolean;
IsOpen: boolean;
Itemize: TItemize;
end;
TUniRtf2Html = class(TObject)
function ConvertRtfToHtml(strRTFText: string): string;
procedure SpecialChars(var Text:String;tohtml:boolean);
private
{ private declaration }
TextFormat: TTextFormat;
ParagraphFormat: TParagraphFormat;
function QueueControlWords(var strRTFText: string;
var iPos, iGroupCount, iLength: integer): boolean;
function GetControlWord(var strRtfText, strControlWord, strParam : string;
var iPos, iGroupCount, iLength: integer): boolean;
function ConvertSpecialChars(c: char): string;
function WriteHeaderFooter(strHtmlText: string): string;
function WriteHtmlControlChar: string;
public
{ public declaration }
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
var
UniRtf2Html: TUniRtf2Html;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
implementation
procedure TUniRtf2Html.SpecialChars(var Text:String;tohtml:boolean);
const
strs:array[1..11,1..2] of String =
((' ', ' '),
('"', '„'),
('-', '–'),
('ß', 'ß'),
('ü', 'ü'),
('ä', 'ä'),
('ö', 'ö'),
('Ü', 'Ü'),
('Ä', 'Ä'),
('Ö', 'Ö'),
('', '€'));
var i:integer;
begin
for i:=2 to Length(strs) do begin
if tohtml then Text := StringReplace(Text, strs[i,2], strs[i,1], [rfReplaceAll])
else Text := StringReplace(Text, strs[i,1], strs[i,2], [rfReplaceAll]);
end;
if tohtml then
Text := StringReplace(Text, strs[1,2], strs[1,1], [rfReplaceAll])
end;
function TUniRtf2Html.ConvertRtfToHtml(strRTFText: string): string;
var
strHtmlText,Buf: string;
iPos,ll,hex: integer;
iLength: integer;
iGroupCount: integer;
function hexchar(c:char):integer;
begin
if c>'a' then hexchar:=ord(c)-ord('a')+10
else hexchar:=ord(c)-ord('0');
end;
begin
strRTFText := StringReplace(strRTFText, Chr(13), '', [rfReplaceAll]);
strRTFText := StringReplace(strRTFText, Chr(10), '', [rfReplaceAll]);
// convert octals like
ll:=1;
while ll<Length(strRTFText)-3 do begin
if (strRTFText[ll]='\') and (strRTFText[ll+1]='''')
and (strRTFText[ll+2] in ['0'..'9','a'..'z']) then begin
Buf:=MidStr(strRTFText,ll,4);
hex:=hexchar(Buf[3])*16+hexchar(Buf[4]);
strRTFText := StringReplace(strRTFText, Buf, chr(hex), [rfReplaceAll]);
ll:=ll+3;
end;
ll:=ll+1;
end;
// special chars
SpecialChars(strRtftext,false);
strRTFText := Trim(strRTFText);
//RVe: laatste 5 chars zijn rubbish.
iLength := Length(strRTFText) - 5;
iPos := 1;
iGroupCount := 0;
TextFormat.FontTable.Status := Neutral;
TextFormat.FontTable.List := TStringList.Create;
TextFormat.ColorTable.List := TStringList.Create;
TextFormat.Bold := Neutral;
TextFormat.Italic := Neutral;
TextFormat.Underline := Neutral;
TextFormat.CurrentFont := -1;
TextFormat.NewFont := -1;
TextFormat.CurrentSize := 20;
TextFormat.CurrentColor := 0;
TextFormat.NewColor := 0;
TextFormat.NewSize := -1;
TextFormat.IsOpen := False;
TextFormat.IsUpdated := False;
ParagraphFormat.Itemize.IsItemized := False;
ParagraphFormat.Itemize.IndentStatus := Neutral;
ParagraphFormat.Itemize.BulletStatus := Neutral;
ParagraphFormat.Itemize.BulletIsOpen := False;
//ParagraphFormat.Itemize.NumberedBullet := False;
ParagraphFormat.Itemize.IndentCount := 0;
ParagraphFormat.Itemize.IndentDist := 0;
ParagraphFormat.Itemize.TextToBulletDist := 0;
ParagraphFormat.CurrentAlign := Left;
ParagraphFormat.NewAlign := Left;
ParagraphFormat.NewLine := False;
ParagraphFormat.IsOpen := False;
strHtmlText := '';
while iPos <= iLength do
begin
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
'\':
begin
if QueueControlWords(strRtfText, iPos, iGroupCount, iLength) then
strHtmlText := strHtmlText + WriteHtmlControlChar;
end;
else
begin
if iGroupCount > 0 then
strHtmlText := strHtmlText + WriteHtmlControlChar +
ConvertSpecialChars(strRtfText[iPos]);
end;
end;
inc(iPos);
end;
if iGroupCount > 0 then
strHtmlText := strHtmlText + WriteHtmlControlChar;
Result := WriteHeaderFooter(strHtmlText);
TextFormat.FontTable.List.Free;
TextFormat.ColorTable.List.Free;
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
function TUniRtf2Html.GetControlWord(var strRtfText, strControlWord,
strParam : string; var iPos, iGroupCount, iLength: integer): boolean;
var
iTempGroupCount: integer;
begin
Result := False;
case strRtfText[iPos + 1] of
'*': // = Unknown controlword
begin
iTempGroupCount := iGroupCount;
// {/*...} ignore entire group when it contains: \*:
while (iGroupCount > iTempGroupCount - 1) and (iPos < iLength) do
begin
inc(iPos);
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
end;
end;
end;
'\', '{', '}': inc(iPos); // these char must just be added the HTML string
else
begin
Result := True;
// Capture the controlword:
strControlWord := '';
repeat
strControlWord := strControlWord + strRtfText[iPos];
inc(iPos);
until (strRtfText[iPos] in ['{', '}', '\', ' ', ';', '-', '0'..'9'])
or(iPos > iLength);
// Capture parameter of controlword:
strParam := '';
while (strRtfText[iPos] in ['a'..'z', '-', '0'..'9']) and (iPos <= iLength) do
begin
strParam := strParam + strRtfText[iPos];
inc(iPos);
end;
if strRtfText[iPos] = ' ' then inc(iPos);
dec(iPos);
end;
end;
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
function TUniRtf2Html.QueueControlWords(var strRtfText: string;
var iPos, iGroupCount, iLength: integer): boolean;
var
strControlWord: string;
strParam: string;
strFont, strColor, strRGB: string;
iRGB: integer;
begin
// strRtfText[i] = '\': start of a controlword
Result := False; // True = Forced propagation to HTML string
if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then
begin
if strControlWord = '\b' then
begin
if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Bold := Activate
else
TextFormat.Bold := Deactivate;
Exit;
end;
if strControlWord = '\i' then
begin
if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Italic := Activate
else
TextFormat.Italic := Deactivate;
Exit;
end;
if strControlWord = '\ul' then
begin
TextFormat.Underline := Activate;
Exit;
end;
if strControlWord = '\ulnone' then
begin
TextFormat.Underline := Deactivate;
Exit;
end;
if strControlWord = '\qc' then
begin
ParagraphFormat.NewAlign := Center;
Exit;
end;
if strControlWord = '\qr' then
begin
ParagraphFormat.NewAlign := Right;
Exit;
end;
if strControlWord = '\fi' then
begin
// \fi: starts an indent which is closed by: \pard
ParagraphFormat.Itemize.IndentStatus := Activate;
ParagraphFormat.Itemize.TextToBulletDist := Abs(StrToInt(strParam));
Exit;
end;
if strControlWord = '\li' then
begin
// \li: display a bullet which is closed by: \par
ParagraphFormat.Itemize.BulletStatus := Activate;
ParagraphFormat.Itemize.IndentDist := strToInt(strParam);
// Assuming that an indent of 400 in RTF is 1 indent in HTML
ParagraphFormat.Itemize.IndentCount :=
Round((ParagraphFormat.Itemize.IndentDist -
ParagraphFormat.Itemize.TextToBulletDist) / 400);
Exit;
end;
{ ---------------------------------------------------------------
Itemnumbers is not supported because RTF is piece of crap.
TRichEdit doesn't support it either so I don't need it anyway.
A bullet will be written instead and the itemnumber appears
as part of the text after the bullet.
---------------------------------------------------------------
if strControlWord = '\tx' then
begin
// \tx: display an itemnumber instead of a bullet
ParagraphFormat.Itemize.NumberedBullet := True;
// A space marks the beginning the number:
while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
begin
inc(iPos);
end;
inc(iPos); // Skip the space char.
// A backslash marks the end of a fontname:
ParagraphFormat.Itemize.NumberingChars := '';
while (strRtfText[iPos] <> '\') and (iPos <= iLength) do
begin
ParagraphFormat.Itemize.NumberingChars :=
ParagraphFormat.Itemize.NumberingChars + strRtfText[iPos];
inc(iPos);
end;
dec(iPos); // 1 step back so the \ isn't skipped in the next run.
Exit;
end;
--------
This code works for the first item but the following don't because
in rft the following itemnumbers are written without a \tx or any
other controlword.
--------
}
if strControlWord = '\pard' then
begin
Result := True; // Force propagation of to HTML string
// Restore paragraph defaults:
// Re-align to the left:
ParagraphFormat.NewAlign := Left;
// Close itemized section:
if ParagraphFormat.Itemize.IsItemized then
ParagraphFormat.Itemize.IndentStatus := Deactivate;
Exit;
end;
if strControlWord = '\par' then
begin
Result := True; // Force propagation of to HTML string
// Close bullet:
if ParagraphFormat.Itemize.BulletIsOpen then
ParagraphFormat.Itemize.BulletStatus := Deactivate;
// New line <BR>, paragraphs <p> are not used because of the line distance:
ParagraphFormat.NewLine := True;
Exit;
end;
if strControlWord = '\line' then
begin
// New line:
ParagraphFormat.NewLine := True;
Exit;
end;
if strControlWord = '\fonttbl' then
begin
// Create a fonttable.
TextFormat.FontTable.Status := Activate;
// The fonttable section ends when the current group ends.
TextFormat.FontTable.GroupCount := iGroupCount;
Exit;
end;
if (TextFormat.FontTable.Status = Activate) and
(TextFormat.FontTable.GroupCount > iGroupCount) then
TextFormat.FontTable.Status := Deactivate; // Fonttable group has ended, so: no more fonttable.
if strControlWord = '\f' then
begin
case TextFormat.FontTable.Status of
Activate:
begin
// Add a font to the list
strFont := '';
// A space marks the beginning a fontname:
while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
begin
inc(iPos);
end;
inc(iPos); // Skip the space char.
// A semicolon marks the end of a fontname:
while (strRtfText[iPos] <> ';') and (iPos <= iLength) do
begin
strFont := strFont + strRtfText[iPos];
inc(iPos);
end;
// Add font to the list
TextFormat.FontTable.List.Add(strFont);
end;
Deactivate:
begin
TextFormat.NewFont := StrToInt(strParam);
TextFormat.IsUpdated := True;
end;
end;
Exit;
end;
if strControlWord = '\fs' then
begin
TextFormat.NewSize := StrToInt(strParam);
TextFormat.IsUpdated := True;
end;
if strControlWord = '\colortbl' then
begin
// Create a colortable.
// The colortable section ends when the current group ends.
TextFormat.ColorTable.GroupCount := iGroupCount;
// Extract RGB values
// A semicolon marks the end of a color:
strColor := '000000'; // black
while TextFormat.ColorTable.GroupCount <= iGroupCount do
begin
inc(iPos);
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
'\':
begin
if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then
begin
if strControlWord = '\red' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 1);
Delete(strColor, 3, 2);
end;
if strControlWord = '\green' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 3);
Delete(strColor, 5, 2);
end;
if strControlWord = '\blue' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 5);
Delete(strColor, 7, 2);
end;
end;
end;
end;
if strRtfText[iPos] = ';' then
TextFormat.ColorTable.List.Add(strColor);
end;
Exit;
end;
if strControlWord = '\cf' then
begin
TextFormat.NewColor := StrToInt(strParam);
TextFormat.IsUpdated := True;
Exit;
end;
end;
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
function TUniRtf2Html.WriteHtmlControlChar: string;
var
j: integer;
begin
Result := '';
with ParagraphFormat do
begin
if Itemize.IndentStatus = Activate then
begin
Itemize.IsItemized := True;
j := 0;
while Itemize.IndentCount - j > 0 do
begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + ''
else}
Result := Result + ';
end;
Itemize.IndentStatus := Neutral;
NewLine := False;
end;
if Itemize.BulletStatus = Activate then
begin
Result := Result + '';
Itemize.BulletStatus := Neutral;
Itemize.BulletIsOpen := True;
NewLine := False;
end;
end;
with TextFormat do
begin
if IsUpdated then
begin
if IsOpen then
begin
Result := Result + '';
TextFormat.CurrentFont := -1;
TextFormat.CurrentSize := 20;
TextFormat.CurrentColor := 0;
IsOpen := False;
end;
{ ignore fonts
if CurrentFont <> NewFont then
begin
// Symbol font may be activated due to a bullet but will be ignored:
if FontTable.List.Strings[NewFont] <> 'Symbol' then
begin
Result := Result + ';
CurrentFont := NewFont;
IsOpen := True;
end;
end;
}
SizeFactor := 6; // Whatever value looks good.
RftDefaultSize := 20; // 20 is equivalent to screenfont height 10.
if Trunc(RftDefaultSize / SizeFactor) <> Trunc(NewSize / SizeFactor) then
begin
if Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor)) > 0 then
begin
if not IsOpen then
Result := Result + ';
Result := Result + ' size="+' +
IntToStr(Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True;
end;
if Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor)) < 0 then
begin
if not IsOpen then
Result := Result + ';
Result := Result + ' size="' +
IntToStr(Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True;
end;
end;
if CurrentColor <> NewColor then
begin
if not IsOpen then
Result := Result + ';
Result := Result + ' color="#' +
ColorTable.List.Strings[NewColor] + '"';
CurrentColor := NewColor;
IsOpen := True;
end;
if IsOpen then
begin
Result := Result + '>';
end;
IsUpdated := False;
end;
if Bold = Activate then
begin
Result := Result + '';
Bold := Neutral;
end;
if Bold = Deactivate then
begin
Result := Result + '';
Bold := Neutral;
end;
if Italic = Activate then
begin
Result := Result + '';
Italic := Neutral;
end;
if Italic = Deactivate then
begin
Result := Result + '';
Italic := Neutral;
end;
if Underline = Activate then
begin
Result := Result + '';
Underline := Neutral;
end;
if Underline = Deactivate then
begin
Result := Result + '';
Underline := Neutral;
end;
end;
with ParagraphFormat do
begin
if Itemize.BulletStatus = Deactivate then
begin
Result := Result + '';
Itemize.BulletIsOpen := False;
if (Itemize.IndentStatus = Neutral) and Itemize.IsItemized then
begin
Result := Result + '';
Itemize.BulletIsOpen := True;
end;
Itemize.BulletStatus := Neutral;
NewLine := False;
end;
if Itemize.IndentStatus = Deactivate then
begin
j := 0;
while Itemize.IndentCount - j > 0 do
begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + ''
else}
Result := Result + '';
end;
Itemize.IndentStatus := Neutral;
Itemize.IsItemized := False;
//Itemize.NumberedBullet := False;
NewLine := False;
end;
if CurrentAlign <> NewAlign then
begin
if IsOpen then
Result := Result + '';
case NewAlign of
Left: Result := Result + '' ;
Center: Result := Result + '' ;
Right: Result := Result + '' ;
end;
IsOpen := True;
CurrentAlign := NewAlign;
NewLine := False;
end;
if NewLine = True then
begin
Result := Result + ' ';
NewLine := False;
end;
end;
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
function TUniRtf2Html.ConvertSpecialChars(c: char): string;
begin
Result := '';
case c of
#0 :
Result := Result; // Writes pending codes only
#9 :
Result:= Result + #9; // Writes tab char
'>' :
Result:= Result + '>'; // Writes "greater than"
'<' :
Result:= Result + '<'; // Writes "less than"
else
Result:= Result + c; // Writes a character
end;
end;
//------------------------------------------------------------------------------
//
//------------------------------------------------------------------------------
function TUniRtf2Html.WriteHeaderFooter(strHtmlText: string): string;
begin
Result := //'<html><head><title></title></head><body>' +
strHtmlText;
if TextFormat.IsOpen then
Result := Result + '';
if ParagraphFormat.IsOpen then
Result := Result + '';
//Result := Result + '</body></html>';
end;
//------------------------------------------------------------------------------
// Ende der Quelle
//------------------------------------------------------------------------------
end.
¤ Dauer der Verarbeitung: 0.25 Sekunden
(vorverarbeitet)
¤
|
Haftungshinweis
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.
|