[yunqa.de] Re: DIHtmlParser

  • From: randy <randydom@xxxxxxxxx>
  • To: yunqa@xxxxxxxxxxxxx
  • Date: Wed, 6 Jan 2010 18:59:12 +0200

But , i want to save it along with its script tags :
</script>test test tes </script>
not only the script contents : test test tes

On Wed, Jan 6, 2010 at 5:46 PM, Delphi Inspiration <delphi@xxxxxxxx> wrote:

> At 14:57 06.01.2010, randy wrote:
>
> >Hi ,  what i want is to extract the scripts using your DIHtmlParser , then
> saving the extracted script into a new  html file named Ext_Script.html
>
> Very simple. See the demo project below and in the attach for an example
> implementation.
>
> Ralf
>
> { DIHtmlParser example showing how to extract <SCRIPT> contents.
>
>  Visit the DIHtmlParser homepage for latest information and updates:
>
>    http://www.yunqa.de/delphi/
>
>  Copyright (c) 2002-2010 Ralf Junker, The Delphi Inspiration <
> delphi@xxxxxxxx>
>
> ------------------------------------------------------------------------------
> }
>
> program DIHtmlParser_Extract_Scripts;
>
> {$APPTYPE Console}
> {$I DI.inc}
>
> uses
>  {$IFDEF FastMM}FastMM4, {$ENDIF}DIUtils, DIHtmlMisc, DIHtmlParser;
>
> const
>  HTML_DATA: AnsiString =
>    '<html>' + CRLF +
>    '<head>' + CRLF +
>    '<title>' + CRLF +
>    '  my web project' + CRLF +
>    '</title>' + CRLF +
>    '</head>' + CRLF +
>    '<body>' + CRLF +
>    '<script type="text/javascript">test test tes te t</script>  ' + CRLF +
>    '</body>' + CRLF +
>    '</html>';
>
> var
>  HtmlParser: TDIHtmlParser;
> begin
>  { Register HTML tags and attributes. }
>  RegisterHtmlTags;
>  RegisterHtmlAttribs;
>
>  WriteLn('Original HTML:');
>  WriteLn;
>  WriteLn(HTML_DATA);
>  WriteLn;
>
>  WriteLn('List of all script tags and contents:');
>
>  HtmlParser := TDIHtmlParser.Create(nil);
>  try
>    HtmlParser.SourceBufferAsStrA := HTML_DATA;
>
>    // Make sure comments are reported to the application.
>    HtmlParser.FilterHtmlTags.StartTags := fiShow;
>    HtmlParser.FilterScripts := fiShow;
>
>    while HtmlParser.ParseNextPiece do
>      case HtmlParser.PieceType of
>
>        ptHtmlTag:
>          case HtmlParser.HtmlTag.TagID of
>
>            TAG_SCRIPT_ID: // Here comes a <SCRIPT> HTML tag.
>              begin
>                { Is it a start tag? }
>                if HtmlParser.HtmlTag.tagtype = ttstarttag then
>                  begin
>                    WriteLn;
>                    WriteLn(string(HtmlParser.HtmlTag.Code));
>                  end;
>              end;
>          end;
>
>        ptScript: // Here comes the script's contents.
>          begin
>            { The <SCRIPT> ... </SCRIPT> contents is now stored in the
>              TDIHtmlParser's data. Here we retrieve it as a string and
>              write it to the console.
>
>              Alternatively you may save it as UTF-16LE to file or stream:
>
>              * HtmlParser.SaveDataToFile('FileName.txt');
>              * HtmlParser.SaveDataToStream(StreamInstance); }
>            WriteLn(string(HtmlParser.DataAsStrW));
>          end;
>
>      else
>        // Process other HTML pieces.
>      end;
>
>  finally
>    HtmlParser.Free;
>  end;
>
>  WriteLn;
>  WriteLn('Done - Press ENTER to exit.');
>  ReadLn;
> end.




-- 
Many thanks

-----

Yours Randy

Other related posts: