simple_page.html   [plain text]


<html>
<!--
    This page is used to test that WebPageSerializer::retrieveAllResources retrieves
    correctly the expected resources from various HTML nodes.
-->

<head>
<!-- Style sheet links should be retrieved -->
<link rel="stylesheet" type="text/css" href="http://www.example.com/beautifull.css"/>
<!-- Other link should NOT be retrieved -->
<link rel="copyright" type="text/plain" href="http://www.example.com/copyright"/>
<!-- Scripts should be retrieved -->
<script src="awesome.js"></script>
</head>

<!-- Images are always retrieved -->
<body background="bodyBackground.jpg">

<!-- Twice to make sure we only report each resource once -->
<img src="awesome.png"/>
<img src="awesome.png"/>

<form>
  <input type="image" src="imageButton.png"/>
</form>

<table background="tableBackground.png">
  <tr background="trBackground.png">
    <td background="tdBackground.png"></td>
  </tr>  
  <tr background="trBackground.png">
    <td background="tdBackground.png"></td>
  </tr>
</table>

<!-- Some more obscure tags -->
<blockquote cite="http://www.evene.fr/citations/auteur.php?ida=46"></blockquote>
<q CITE="http://www.brainyquote.com/quotes/authors/c/charles_darwin.html"></q>
<p>My favorite color is <del cite="why_deleted.html">blue</del> <ins>red</ins>!</p>
<p>My favorite color is <del>blue</del> <ins cite="why_inserted.html">red</ins>!</p>

<!-- Make sure we only retrieve URLs with the right schemes -->
<img src="https://www.secure.com/https.gif"/>  <!-- HTTPS is OK -->
<img src="file://c/my_folder/file.gif"/>  <!-- file is OK -->
<img src="ftp://ftp.com/ftp.gif"/>  <!-- FTP is not OK -->
<img src="unknown://unkown.com/unknown.gif"/>  <!-- Unknown schemes are not OK -->

</body>

</html>