unit UniRtf2HtmlUnit1;
{------------------------------------------------------------------------------
This RTF to HTML conversion program supports the following RTF statements:
- Bold, Italic, Underline
- Alignment
- Fonts + font size + font color
- Bullet
It doesn't support itemnumbers, instead it writes bullets.
RTF code generated by Microsoft Word may not work because it contains
an incredible amount of crap which may confuse the conversion routines.
RTF code generated by TRichEdit and WordPad seem to work perfectly.
Updated: 10-jun-2002
Comments may be sent to: ruthjes@chello.nl
-------------------------------------------------------------------------------} interface uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
StdCtrls, Dialogs, StrUtils; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ type
TSwitch = (Activate, Deactivate, Neutral);
TAlignment = (Left, Right, Center);
TFontTable = record
List: TStringList;
Status: TSwitch;
GroupCount: integer; end;
TColorTable = record
List: TStringList;
GroupCount: integer; end;
TTextFormat = record
FontTable: TFontTable;
ColorTable: TColorTable;
Bold: TSwitch;
Italic: TSwitch;
Underline: TSwitch;
NewFont: integer;
CurrentFont: integer;
NewSize: integer;
CurrentSize: integer;
NewColor: integer;
CurrentColor: integer;
IsOpen: boolean;
IsUpdated: boolean;
SizeFactor: integer;
RftDefaultSize: integer; end;
TItemize = record
IsItemized: boolean;
IndentStatus: TSwitch;
BulletStatus: TSwitch;
BulletIsOpen: boolean; //NumberedBullet: boolean; ItemNumbers is not supported because RTF is a piece of crap //NumberingChars: string;
IndentCount: integer;
IndentDist: integer;
TextToBulletDist: integer; end;
TParagraphFormat = record
CurrentAlign: TAlignment;
NewAlign: TAlignment;
NewLine: boolean;
IsOpen: boolean;
Itemize: TItemize; end;
function QueueControlWords(var strRTFText: string; var iPos, iGroupCount, iLength: integer): boolean; function GetControlWord(var strRtfText, strControlWord, strParam : string; var iPos, iGroupCount, iLength: integer): boolean; function ConvertSpecialChars(c: char): string; function WriteHeaderFooter(strHtmlText: string): string; function WriteHtmlControlChar: string; public
{ public declaration } end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ var
UniRtf2Html: TUniRtf2Html; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ implementation procedure TUniRtf2Html.SpecialChars(var Text:String;tohtml:boolean); const
strs:array[1..11,1..2] ofString =
((' ', ' '),
('"', '„'),
('-', '–'),
('ß', 'ß'),
('ü', 'ü'),
('ä', 'ä'),
('ö', 'ö'),
('Ü', 'Ü'),
('Ä', 'Ä'),
('Ö', 'Ö'),
('', '€')); var i:integer; begin for i:=2 to Length(strs) dobegin if tohtml then Text := StringReplace(Text, strs[i,2], strs[i,1], [rfReplaceAll]) else Text := StringReplace(Text, strs[i,1], strs[i,2], [rfReplaceAll]); end; if tohtml then
Text := StringReplace(Text, strs[1,2], strs[1,1], [rfReplaceAll]) end; function TUniRtf2Html.ConvertRtfToHtml(strRTFText: string): string; var
strHtmlText,Buf: string;
iPos,ll,hex: integer;
iLength: integer;
iGroupCount: integer; function hexchar(c:char):integer; begin if c>'a'then hexchar:=ord(c)-ord('a')+10 else hexchar:=ord(c)-ord('0'); end; begin
strRTFText := StringReplace(strRTFText, Chr(13), '', [rfReplaceAll]);
strRTFText := StringReplace(strRTFText, Chr(10), '', [rfReplaceAll]);
// convert octals like
ll:=1; while ll<Length(strRTFText)-3 dobegin if (strRTFText[ll]='\') and (strRTFText[ll+1]='''') and (strRTFText[ll+2] in ['0'..'9','a'..'z']) thenbegin
Buf:=MidStr(strRTFText,ll,4);
hex:=hexchar(Buf[3])*16+hexchar(Buf[4]);
strRTFText := StringReplace(strRTFText, Buf, chr(hex), [rfReplaceAll]);
ll:=ll+3; end;
ll:=ll+1; end;
// special chars
SpecialChars(strRtftext,false);
strRTFText := Trim(strRTFText); //RVe: laatste 5 chars zijn rubbish.
iLength := Length(strRTFText) - 5;
strHtmlText := ''; while iPos <= iLength do begin case strRtfText[iPos] of '{': inc(iGroupCount); '}': dec(iGroupCount); '\': begin if QueueControlWords(strRtfText, iPos, iGroupCount, iLength) then
strHtmlText := strHtmlText + WriteHtmlControlChar; end; else begin if iGroupCount > 0 then
strHtmlText := strHtmlText + WriteHtmlControlChar +
ConvertSpecialChars(strRtfText[iPos]); end; end;
inc(iPos); end; if iGroupCount > 0 then
strHtmlText := strHtmlText + WriteHtmlControlChar;
Result := WriteHeaderFooter(strHtmlText);
TextFormat.FontTable.List.Free;
TextFormat.ColorTable.List.Free; end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ function TUniRtf2Html.GetControlWord(var strRtfText, strControlWord,
strParam : string; var iPos, iGroupCount, iLength: integer): boolean; var
iTempGroupCount: integer;
begin
Result := False; case strRtfText[iPos + 1] of '*': // = Unknown controlword begin
iTempGroupCount := iGroupCount; // {/*...} ignore entire group when it contains: \*: while (iGroupCount > iTempGroupCount - 1) and (iPos < iLength) do begin
inc(iPos); case strRtfText[iPos] of '{': inc(iGroupCount); '}': dec(iGroupCount); end; end; end; '\', '{', '}': inc(iPos); // these char must just be added the HTML string else begin
Result := True; // Capture the controlword:
strControlWord := ''; repeat
strControlWord := strControlWord + strRtfText[iPos];
inc(iPos); until (strRtfText[iPos] in ['{', '}', '\', ' ', ';', '-', '0'..'9']) or(iPos > iLength);
// Capture parameter of controlword:
strParam := ''; while (strRtfText[iPos] in ['a'..'z', '-', '0'..'9']) and (iPos <= iLength) do begin
strParam := strParam + strRtfText[iPos];
inc(iPos); end; if strRtfText[iPos] = ' 'then inc(iPos);
dec(iPos); end; end; end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ function TUniRtf2Html.QueueControlWords(var strRtfText: string; var iPos, iGroupCount, iLength: integer): boolean; var
strControlWord: string;
strParam: string;
strFont, strColor, strRGB: string;
iRGB: integer; begin // strRtfText[i] = '\': start of a controlword
Result := False; // True = Forced propagation to HTML string
if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then begin if strControlWord = '\b'then begin if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Bold := Activate else
TextFormat.Bold := Deactivate;
Exit; end;
if strControlWord = '\i'then begin if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Italic := Activate else
TextFormat.Italic := Deactivate;
Exit; end;
if strControlWord = '\ul'then begin
TextFormat.Underline := Activate;
Exit; end;
if strControlWord = '\ulnone'then begin
TextFormat.Underline := Deactivate;
Exit; end;
if strControlWord = '\qc'then begin
ParagraphFormat.NewAlign := Center;
Exit; end;
if strControlWord = '\qr'then begin
ParagraphFormat.NewAlign := Right;
Exit; end;
if strControlWord = '\fi'then begin // \fi: starts an indent which is closed by: \pard
ParagraphFormat.Itemize.IndentStatus := Activate;
ParagraphFormat.Itemize.TextToBulletDist := Abs(StrToInt(strParam));
Exit; end;
if strControlWord = '\li'then begin // \li: display a bullet which is closed by: \par
ParagraphFormat.Itemize.BulletStatus := Activate;
ParagraphFormat.Itemize.IndentDist := strToInt(strParam); // Assuming that an indent of 400 in RTF is 1 indent in HTML
ParagraphFormat.Itemize.IndentCount :=
Round((ParagraphFormat.Itemize.IndentDist -
ParagraphFormat.Itemize.TextToBulletDist) / 400);
Exit; end;
{ ---------------------------------------------------------------
Itemnumbers isnot supported because RTF is piece of crap.
TRichEdit doesn't support it either so I don't need it anyway.
A bullet will be written instead and the itemnumber appears as part of the text after the bullet.
--------------------------------------------------------------- if strControlWord = '\tx'then begin // \tx: display an itemnumber instead of a bullet
ParagraphFormat.Itemize.NumberedBullet := True; // A space marks the beginning the number: while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do begin
inc(iPos); end;
inc(iPos); // Skip the space char. // A backslash marks the end of a fontname:
ParagraphFormat.Itemize.NumberingChars := ''; while (strRtfText[iPos] <> '\') and (iPos <= iLength) do begin
ParagraphFormat.Itemize.NumberingChars :=
ParagraphFormat.Itemize.NumberingChars + strRtfText[iPos];
inc(iPos); end;
dec(iPos); // 1 step back so the \ isn't skipped in the next run.
Exit; end;
--------
This code works for the first item but the following don't because in rft the following itemnumbers are written without a \tx or any
other controlword.
--------
}
if strControlWord = '\pard'then begin
Result := True; // Force propagation of to HTML string // Restore paragraph defaults: // Re-align to the left:
ParagraphFormat.NewAlign := Left; // Close itemized section: if ParagraphFormat.Itemize.IsItemized then
ParagraphFormat.Itemize.IndentStatus := Deactivate;
Exit; end;
if strControlWord = '\par'then begin
Result := True; // Force propagation of to HTML string // Close bullet: if ParagraphFormat.Itemize.BulletIsOpen then
ParagraphFormat.Itemize.BulletStatus := Deactivate; // New line <BR>, paragraphs <p> are not used because of the line distance:
ParagraphFormat.NewLine := True;
Exit; end;
if strControlWord = '\line'then begin // New line:
ParagraphFormat.NewLine := True;
Exit; end; if strControlWord = '\fonttbl'then begin // Create a fonttable.
TextFormat.FontTable.Status := Activate; // The fonttable section ends when the current group ends.
TextFormat.FontTable.GroupCount := iGroupCount;
Exit; end;
if (TextFormat.FontTable.Status = Activate) and
(TextFormat.FontTable.GroupCount > iGroupCount) then
TextFormat.FontTable.Status := Deactivate; // Fonttable group has ended, so: no more fonttable.
if strControlWord = '\f'then begin case TextFormat.FontTable.Status of
Activate: begin // Add a font to the list
strFont := ''; // A space marks the beginning a fontname: while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do begin
inc(iPos); end;
inc(iPos); // Skip the space char. // A semicolon marks the end of a fontname: while (strRtfText[iPos] <> ';') and (iPos <= iLength) do begin
strFont := strFont + strRtfText[iPos];
inc(iPos); end; // Add font to the list
TextFormat.FontTable.List.Add(strFont); end;
Deactivate: begin
TextFormat.NewFont := StrToInt(strParam);
TextFormat.IsUpdated := True; end; end;
Exit; end; if strControlWord = '\fs'then begin
TextFormat.NewSize := StrToInt(strParam);
TextFormat.IsUpdated := True; end;
if strControlWord = '\colortbl'then begin // Create a colortable. // The colortable section ends when the current group ends.
TextFormat.ColorTable.GroupCount := iGroupCount; // Extract RGB values // A semicolon marks the end of a color:
strColor := '000000'; // black while TextFormat.ColorTable.GroupCount <= iGroupCount do begin
inc(iPos); case strRtfText[iPos] of '{': inc(iGroupCount); '}': dec(iGroupCount); '\': begin if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then begin if strControlWord = '\red'then begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 1);
Delete(strColor, 3, 2); end; if strControlWord = '\green'then begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 3);
Delete(strColor, 5, 2); end; if strControlWord = '\blue'then begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 5);
Delete(strColor, 7, 2); end; end; end; end; if strRtfText[iPos] = ';'then
TextFormat.ColorTable.List.Add(strColor); end;
Exit; end;
if strControlWord = '\cf'then begin
TextFormat.NewColor := StrToInt(strParam);
TextFormat.IsUpdated := True;
Exit; end; end; end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ function TUniRtf2Html.WriteHtmlControlChar: string; var
j: integer; begin
Result := '';
with ParagraphFormat do begin if Itemize.IndentStatus = Activate then begin
Itemize.IsItemized := True;
j := 0; while Itemize.IndentCount - j > 0 do begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + '' else}
Result := Result + '
with TextFormat do begin if IsUpdated then begin if IsOpen then begin
Result := Result + '';
TextFormat.CurrentFont := -1;
TextFormat.CurrentSize := 20;
TextFormat.CurrentColor := 0;
IsOpen := False; end;
{ ignore fonts if CurrentFont <> NewFont then begin // Symbol font may be activated due to a bullet but will be ignored: if FontTable.List.Strings[NewFont] <> 'Symbol'then begin
Result := Result + ';
CurrentFont := NewFont;
IsOpen := True; end; end;
}
SizeFactor := 6; // Whatever value looks good.
RftDefaultSize := 20; // 20 is equivalent to screenfont height 10. if Trunc(RftDefaultSize / SizeFactor) <> Trunc(NewSize / SizeFactor) then begin if Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor)) > 0 then begin ifnot IsOpen then
Result := Result + ';
Result := Result + ' size="+' +
IntToStr(Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True; end; if Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor)) < 0 then begin ifnot IsOpen then
Result := Result + ';
Result := Result + ' size="' +
IntToStr(Trunc((NewSize / SizeFactor) - (RftDefaultSize / SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True; end; end; if CurrentColor <> NewColor then begin ifnot IsOpen then
Result := Result + ';
Result := Result + ' color="#' +
ColorTable.List.Strings[NewColor] + '"';
CurrentColor := NewColor;
IsOpen := True; end; if IsOpen then begin
Result := Result + '>'; end;
IsUpdated := False; end;
if Bold = Activate then begin
Result := Result + '';
Bold := Neutral; end; if Bold = Deactivate then begin
Result := Result + '';
Bold := Neutral; end;
if Italic = Activate then begin
Result := Result + '';
Italic := Neutral; end; if Italic = Deactivate then begin
Result := Result + '';
Italic := Neutral; end;
if Underline = Activate then begin
Result := Result + '';
Underline := Neutral; end; if Underline = Deactivate then begin
Result := Result + '';
Underline := Neutral; end; end; with ParagraphFormat do begin if Itemize.BulletStatus = Deactivate then begin
Result := Result + '';
Itemize.BulletIsOpen := False; if (Itemize.IndentStatus = Neutral) and Itemize.IsItemized then begin
Result := Result + '
if Itemize.IndentStatus = Deactivate then begin
j := 0; while Itemize.IndentCount - j > 0 do begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + '' else}
Result := Result + ''; end;
Itemize.IndentStatus := Neutral;
Itemize.IsItemized := False; //Itemize.NumberedBullet := False;
NewLine := False; end;
if CurrentAlign <> NewAlign then begin if IsOpen then
Result := Result + ''; case NewAlign of
Left: Result := Result + '
'
;
Center: Result := Result + '
'
;
Right: Result := Result + '
'
; end;
IsOpen := True;
CurrentAlign := NewAlign;
NewLine := False; end; if NewLine = True then begin
Result := Result + ' ';
NewLine := False; end; end; end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ function TUniRtf2Html.ConvertSpecialChars(c: char): string; begin
Result := ''; case c of
#0 :
Result := Result; // Writes pending codes only
#9 :
Result:= Result + #9; // Writes tab char '>' :
Result:= Result + '>'; // Writes "greater than" '<' :
Result:= Result + '<'; // Writes "less than" else
Result:= Result + c; // Writes a character end; end; //------------------------------------------------------------------------------ // //------------------------------------------------------------------------------ function TUniRtf2Html.WriteHeaderFooter(strHtmlText: string): string; begin
Result := //'<html><head><title></title></head><body>' +
strHtmlText; if TextFormat.IsOpen then
Result := Result + ''; if ParagraphFormat.IsOpen then
Result := Result + ''; //Result := Result + '</body></html>'; end; //------------------------------------------------------------------------------ // Ende der Quelle //------------------------------------------------------------------------------ end.
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.16Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.