英文分句/分段落
php环境,指出一个原本分句的时候的误区,分句不用考虑小数点,不用考虑域名,因为标准的句子是句号后面加空格的,唯一要考虑的就是Mr. Li 这种。先采用分段落的方式是考虑到有些
php环境,指出一个原本分句的时候的误区,分句不用考虑小数点,不用考虑域名,因为标准的句子是句号后面加空格的,唯一要考虑的就是Mr. Li 这种。
先采用分段落的方式是考虑到有些引用采用冒号结尾。
先采用分段落的方式是考虑到有些引用采用冒号结尾。
<?php
/*TWWY'S ART*/
function break_passage($text){ //分割段落
return preg_split("/(\r|\n|\r\n)/", $text, -1, PREG_SPLIT_NO_EMPTY);
}
function break_sentence($text){ //分割句子 英文的句号后面必须有空格
$re = '/# Split sentences on whitespace between them.
(?<= # Begin positive lookbehind.
[.!?] # Either an end of sentence punct,
| [.!?][\'"] # or end of sentence punct and quote.
) # End positive lookbehind.
(?<! # Begin negative lookbehind.
Mr\. # Skip either "Mr."
| Mrs\. # or "Mrs.",
| Ms\. # or "Ms.",
| Jr\. # or "Jr.",
| Dr\. # or "Dr.",
| Prof\. # or "Prof.",
| Sr\. # or "Sr.",
# or... (you get the idea).
) # End negative lookbehind.
\s+ # Split on whitespace between sentences.
/ix';
$sentences = preg_split($re, $text, -1, PREG_SPLIT_NO_EMPTY);
return $sentences;
}
function get_sentence($text){ //先分割段落再分割句子 [推荐]
$passage = break_passage($text);
$return = array();
foreach ($passage as $key => $value) $return = array_merge($return, break_sentence($value));
return $return;
}
?>
精彩图集
精彩文章






