delphi 字符串截取自动修复;(仅utf8 字符串); 去除前后不足一个字符的字节

180it 2021-11-05 PM 1282℃ 0条
/**
 * 字符串截取自动修复;(仅utf8 字符串); 去除前后不足一个字符的字节
 * 截取多余部分用replace替换;
 * 
 * $str = substr("我的中国心",1,-2); 
 * $str = utf8Repair($str);
 * 
 * 1 1-128
 * 2 192-223, 128-191
 * 3 224-239, 128-191, 128-191
 * 4 240-247, 128-191, 128-191, 128-191
 */
function utf8Repair($str,$replace=''){
    $length   = strlen($str);
    $charByte = 0;$start = 0;$end = $length;
    $char = ord($str[$start]);
    while($char >= 128 && $char <= 191 && $start <= 5){
        $start ++;
        $char = ord($str[$start]);
    }
    
    for($i = $start; $i < $length; $i++){
        $char = ord($str[$i]);
        if($char <= 128) continue;
        if($char > 247){return $str;}
        else if ($char > 239) {$charByte = 4;}
        else if ($char > 223) {$charByte = 3;}
        else if ($char > 191) {$charByte = 2;}
        else {return $str;}
        if (($i + $charByte) > $length){
            $end = $i;break;
        }
        while ($charByte > 1) {
            $i++;$char = ord($str[$i]);
            if ($char < 128 || $char > 191){return $str;}
            $charByte--;
        }
    }
    
    $charStart = '';$charEnd = '';
    if($start == 0 && $end == $length) return $str;
    if($replace && $start){$charStart = str_repeat($replace,$start);}
    if($replace && $end != $length){$charEnd = str_repeat($replace,$length - $end);}
    
    // pr($start,$end,$length,$charStart,$charEnd);exit;
    return $charStart.substr($str,$start,$end - $start).$charEnd;
}

/**
 * 字符串截取,支持中文和其他编码
 * 
 * @param string $str 需要转换的字符串
 * @param string $start 开始位置
 * @param string $length 截取长度
 * @param string $charset 编码格式
 * @param string $suffix 截断显示字符
 * @return string 
 */
function msubstr($str, $start = 0, $length, $charset = "utf-8", $suffix = true){
    if (function_exists("mb_substr")) {
        $i_str_len = mb_strlen($str);
        $s_sub_str = mb_substr($str, $start, $length, $charset);
        if ($length >= $i_str_len) {
            return $s_sub_str;
        } 
        return $s_sub_str . '...';
    } elseif (function_exists('iconv_substr')) {
        return iconv_substr($str, $start, $length, $charset);
    } 
    $re['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/";
    $re['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/";
    $re['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/";
    $re['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/";
    preg_match_all($re[$charset], $str, $match);
    $slice = join("", array_slice($match[0], $start, $length));
    if ($suffix) return $slice . "…";
    return $slice;
}

支付宝打赏支付宝打赏 微信打赏微信打赏

如果文章或资源对您有帮助,欢迎打赏作者。一路走来,感谢有您!

标签: none

delphi 字符串截取自动修复;(仅utf8 字符串); 去除前后不足一个字符的字节