Browse Source

regex微调

dev
zxc 3 years ago
parent
commit
c8cf712178
  1. 40
      epmet-module/gov-voice/gov-voice-server/src/main/java/com/epmet/service/impl/ArticleServiceImpl.java

40
epmet-module/gov-voice/gov-voice-server/src/main/java/com/epmet/service/impl/ArticleServiceImpl.java

@ -1653,18 +1653,34 @@ public class ArticleServiceImpl extends BaseServiceImpl<ArticleDao, ArticleEntit
* @author zxc * @author zxc
* @date 2022/7/1 10:32 * @date 2022/7/1 10:32
*/ */
public static String getPreviewContent(String content) { public String getPreviewContent(String content) {
if (StringUtils.isBlank(content)){ // 定义script的正则表达式
return ""; String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>";
} // 定义style的正则表达式
String regex = "[\\u4e00-\\u9fa5]"; String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";
Pattern p = Pattern.compile(regex); // 定义HTML标签的正则表达式
Matcher m = p.matcher(content); String regEx_html = "<[^>]+>";
StringBuffer sb = new StringBuffer(); //定义空格回车换行符
while (m.find()) { String regEx_space = "\\s*|\t|\r|\n";
sb.append(m.group()); Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
} Matcher m_script = p_script.matcher(content);
return sb.toString(); // 过滤script标签
content = m_script.replaceAll("");
Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
Matcher m_style = p_style.matcher(content);
// 过滤style标签
content = m_style.replaceAll("");
Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
Matcher m_html = p_html.matcher(content);
// 过滤html标签
content = m_html.replaceAll("");
Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE);
Matcher m_space = p_space.matcher(content);
// 过滤空格回车标签
content = m_space.replaceAll("");
//去空格
content = content.replaceAll("&nbsp;", "");
return content.trim();
} }
@Override @Override

Loading…
Cancel
Save