<?php
$sourceData = "";
$sourceData = OpenFile("source.html");
function OpenFile($file){
if (file_exists($file)) {
$newAray = array();
$handle = fopen("$file", "r");
while ($lines = fgets($handle)) {
$str = $lines;
array_push($newAray,$str);
}
fclose($handle);
$sourceData = join('',$newAray);
}
return $sourceData;
}
// 正規表現を使って<a>タグと<img>タグと<source>タグの中のURLを抽出する
$urlRegex = '/(?:<a[^>]*href=[\'"]([^\'"]+)[\'"][^>]*>|<img[^>]*src=[\'"]([^\'"]+)[\'"][^>]*>|<source[^>]*srcset=[\'"]([^\'"]+)[\'"][^>]*>)/i';
$urls = [];
preg_match_all($urlRegex, $sourceData, $matches);
// 抽出したURLを配列に追加する
foreach ($matches[1] as $match) {
if (!empty($match)) {
$urls[] = $match;
}
}
foreach ($matches[2] as $match) {
if (!empty($match)) {
$urls[] = $match;
}
}
foreach ($matches[3] as $match) {
if (!empty($match)) {
$urls[] = $match;
}
}
// ソート
sort($urls);
// URLを出力
echo "Found URLs:<br>\n";
foreach ($urls as $url) {
echo $url . "<br>\n";
}
?>
<?php
#ini_set( 'display_errors', 1 );
$sourceData = "";
$sourceData = OpenFile("source.html");
function OpenFile($file){
if (file_exists($file)) {
$newAray = array();
$handle = fopen("$file", "r");
while ($lines = fgets($handle)) {
$str = $lines;
array_push($newAray,$str);
}
fclose($handle);
$sourceData = join('',$newAray);
}
return $sourceData;
}
// 正規表現を使って<a>タグと<img>タグと<source>タグの中のURLを抽出する
$urlRegex = '/(?:<(a|img|source)[^>]*(href|src|srcset)=[\'"]([^\'"]+)[\'"][^>]*>)/i';
$urls = [];
preg_match_all($urlRegex, $sourceData, $matches);
// 抽出したURLを配列に追加する
foreach ($matches[3] as $match) {
if (!empty($match)) {
$urls[] = $match;
}
}
// ソート
sort($urls);
// URLを出力
echo "Found URLs:<br>\n";
foreach ($urls as $url) {
echo $url . "<br>\n";
}
?>