im/vendor/tcwei/imgsrc/imgSrc/GetImgSrc.php

213 lines
7.5 KiB
PHP
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace tcwei\smallTools;
class GetImgSrc{
/**
* 提取HTML文章中的图片地址
* @param string $data HTML或者文章
* @param int $num 第 $num 个图片的src默认为第一张
* @param string $order 顺取倒取; 默认为 asc ,从正方向计数。 desc 从反方向计数
* @param string|array $blacklist 图片地址黑名单,排除图片地址中包含该数据的地址;例如 传入 baidu.com 会排除 src="http://www.baidu.com/img/a.png"
* @param string $model 默认为字符串模式;可取值 string pregstring模式处理效率高PHP版本越高速度越快可比正则快几倍
* @return false| null | src 当data为空时返回 false src不存在时返回 null 反之返回src
*/
public static function src($data, $num = 1, $order = 'asc', $blacklist = false, $model = 'string'){
if(isset($data)){
if($model === 'preg'){
$imgSrc = self::pregModel($data, $num-1, $order);
}else{
$imgSrc = self::strModel($data, $num, $order);
}
if($blacklist === false){
return $imgSrc;
}else{
if(is_array($blacklist)){
foreach($blacklist as $value){
if(strpos($imgSrc, $value) !== false){
return self::src($data, $num+1, $order, $blacklist, $model);
};
}
return $imgSrc;
}else{
if($blacklist === false || strpos($imgSrc, (string)$blacklist) === false){
return $imgSrc;
}else{
return self::src($data, $num+1, $order, $blacklist, $model);
}
}
}
}else{
return false;
}
}
/**
* 提取HTML文章中的图片地址
* @param string $data HTML或者文章
* @param int $startNum 默认为1从第一张图片开始抽取
* @param int $length 从 $startNum 开始抽取,共抽取 $length 张默认为0为0则抽取到最后
* @param string $order 顺取倒取; 默认为 asc ,从正方向计数。 desc 从反方向计数
* @param string|array $blacklist 图片地址黑名单,排除图片地址中包含该数据的地址;例如 传入 img.baidu.com 会排除 src="img.baidu.com/a.png"
* @param string $model 抽取集合时默认为正则模式可选模式preg string当 $length > 3 或者 $length = 0时强制使用正则模式因为取的数量大时正则速度更快。
* @return array 图片地址的集合数组,若无则返回空数组
*/
public static function srcList($data, $startNum = 1, $length = 0, $order = 'asc', $blacklist = false, $model = 'preg'){
if($model === 'preg' || $length > 3 || $length === 0){
$imgSrcArr = self::pregModel($data, [$startNum-1, $length, $blacklist], $order);
}else{
$imgSrcArr = [];
for($i=$startNum; $i<$startNum+$length; $i++){
$imgSrc = self::strModel($data, $i, $order);
if(is_array($blacklist)){
$blackBool = true;
foreach ($blacklist as $k=>$v){
if (strpos($imgSrc, $blacklist) !== false) {
$blackBool = false;
}
}
if($blackBool){
$imgSrcArr[] = $imgSrc;
}else{
$length++;
}
}else{
if ($blacklist === false || strpos($imgSrc, (string)$blacklist) === false) {
$imgSrcArr[] = $imgSrc;
}else{
$length++;
}
}
}
}
return $imgSrcArr;
}
/**
* @param $str
* @param $num
* @param $order
* @return bool|string|null
*/
public static function strModel($str, $num, $order){
$topStr = null;
if($order != 'asc'){
$funcStr = 'strrpos';
}else{
$funcStr = 'strpos';
}
for($i=1; $i<=$num; $i++){
$firstNum = $funcStr($str, '<img');
if($firstNum !== false){
if($order != 'asc'){
$topStr = $str;
$str = substr($str, 0, $firstNum);
}else{
$str = substr($str, $firstNum+4);
}
}else{
return null;
}
}
$str = $order=='asc'?$str:$topStr;
$firstNum1 = $funcStr($str, 'src=');
$type = substr($str, $firstNum1+4, 1);
$str2 = substr($str, $firstNum1+5);
if($type == '\''){
$position = strpos($str2, "'");
}else{
$position = strpos($str2, '"');
}
$imgPath = substr($str2, 0, $position);
return $imgPath;
}
/**
* @param $str
* @param $num
* @param $order
* @return string|array|null
*/
public static function pregModel($str, $num, $order){
preg_match_all("/<img.*>/isU", $str, $ereg);
$img = $ereg[0];
if($order != 'asc'){
$img = array_reverse($img);
};
if(is_array($num)){
$startNum = $num[0];
$length = $num[1];
$blacklist = $num[2];
$imgSrcArr = [];
foreach($img as $key=>$value){
$imgSrc = $value;
$pregModel='/src=(\'|")(.*)(?:\1)/isU';
preg_match_all($pregModel, $imgSrc, $img1);
if(is_array($blacklist)){
$blacklistBool = true;
foreach($blacklist as $v){
if(strpos($img1[2][0], $v) !== false){
$blacklistBool = false;
};
}
if($blacklistBool){
$imgSrcArr[] = $img1[2][0];
};
} else {
if ($blacklist === false || strpos($img1[2][0], (string)$blacklist) === false) {
$imgSrcArr[] = $img1[2][0];
};
}
}
if($length > 0){
return array_slice($imgSrcArr, $startNum, $length);
}else{
return array_slice($imgSrcArr, $startNum);
}
}else{
if(!empty($img[$num])){
$imgStr = $img[$num];
$pregModel='/src=(\'|")(.*)(?:\1)/isU';
preg_match_all($pregModel, $imgStr, $img1);
return $img1[2][0];
}else{
return null;
}
}
}
}
/*
$str = '<div>
<p>这里是普通文字</p>
<p>这里是干扰元素测试\'\'"""</p>
<img src="src1.png"/>
<img src=\'src2.png\'/>
<img src="src3.jpg"/>
<img src="src4.jpg"/>
<img src="src5.jpg"/>
</div>';
$src = GetImgSrc::srcList($str, 2, 3, 'asc', false, 'string');
$src2 = GetImgSrc::src($str, 1, 'asc', false, 'string');
var_dump($src);
var_dump($src2);
*/
?>