java , HTML태그 제거 / HTML코드에서 첫번째 이미지만 가져오기
HTML코드에서 첫번째 이미지만 가져오기 , MYSQL
if(instr(content,'<img') = 0 , mid(content, instr(content,'<IMG')
, instr(substring(content,instr(content,'<IMG')),'>') ) ,mid(content, instr(content,'<img')
, instr(substring(content,instr(content,'<img')),'>') ) ) AS firstImg
<%@ page import="java.util.regex.Matcher" %>
<%@ page import="java.util.regex.Pattern" %>
<%
String content_clean = content; // content = html코드
Pattern SCRIPTS = Pattern.compile("<script([^'\"]|\"[^\"]*\"|'[^']*')*?</script>",Pattern.DOTALL);
Pattern STYLE = Pattern.compile("<style[^>]*>.*</style>",Pattern.DOTALL);
// Pattern TAGS = Pattern.compile("<(\"[^\"]*\"|\'[^\']*\'|[^\'\">])*>");
Pattern TAGS = Pattern.compile("<(/)?([a-zA-Z]*)(\\s[a-zA-Z]*=[^>]*)?(\\s)*(/)?>");
Pattern nTAGS = Pattern.compile("<\\w+\\s+[^<]*\\s*>");
Pattern ENTITY_REFS = Pattern.compile("&[^;]+;");
Pattern WHITESPACE = Pattern.compile("\\s\\s+");
Matcher m;
m = SCRIPTS.matcher(content_clean);
content_clean = m.replaceAll("");
m = STYLE.matcher(content_clean);
content_clean = m.replaceAll("");
m = TAGS.matcher(content_clean);
content_clean = m.replaceAll("");
m = ENTITY_REFS.matcher(content_clean);
content_clean = m.replaceAll("");
m = WHITESPACE.matcher(content_clean);
content_clean = m.replaceAll(" ");
//System.out.println("1.content_clean : " + content_clean);
content_clean=content_clean.replaceAll("<[^>]*>", "");//그래도 안없어 지는 것이 있어서 추가
//System.out.println("2.content_clean : " + content_clean);
content_clean = content_clean.substring(1,80); //앞부분만 노출시킬거야~
%>
<%= content_clean %>