차근차근/JAVA JSP

java , HTML태그 제거 / HTML코드에서 첫번째 이미지만 가져오기

예쁜꽃이피었으면 2018. 3. 13. 16:50



HTML코드에서 첫번째 이미지만 가져오기 , MYSQL

if(instr(content,'<img') = 0  , mid(content,  instr(content,'<IMG') 

   ,   instr(substring(content,instr(content,'<IMG')),'>') )  ,mid(content,  instr(content,'<img')  

   ,   instr(substring(content,instr(content,'<img')),'>') )  ) AS firstImg







<%@ page import="java.util.regex.Matcher" %>

<%@ page import="java.util.regex.Pattern" %>


<%

String content_clean =  content;  // content = html코드

Pattern SCRIPTS = Pattern.compile("<script([^'\"]|\"[^\"]*\"|'[^']*')*?</script>",Pattern.DOTALL);

Pattern STYLE = Pattern.compile("<style[^>]*>.*</style>",Pattern.DOTALL);

// Pattern TAGS = Pattern.compile("<(\"[^\"]*\"|\'[^\']*\'|[^\'\">])*>");

Pattern TAGS = Pattern.compile("<(/)?([a-zA-Z]*)(\\s[a-zA-Z]*=[^>]*)?(\\s)*(/)?>");

Pattern nTAGS = Pattern.compile("<\\w+\\s+[^<]*\\s*>");

Pattern ENTITY_REFS = Pattern.compile("&[^;]+;");

Pattern WHITESPACE = Pattern.compile("\\s\\s+");

Matcher m;

m = SCRIPTS.matcher(content_clean);

content_clean = m.replaceAll("");

m = STYLE.matcher(content_clean);

content_clean = m.replaceAll("");

m = TAGS.matcher(content_clean);

content_clean = m.replaceAll("");

m = ENTITY_REFS.matcher(content_clean);

content_clean = m.replaceAll("");

m = WHITESPACE.matcher(content_clean);

content_clean = m.replaceAll(" ");

//System.out.println("1.content_clean : " + content_clean);

content_clean=content_clean.replaceAll("<[^>]*>", "");//그래도 안없어 지는 것이 있어서 추가

//System.out.println("2.content_clean : " + content_clean);

content_clean = content_clean.substring(1,80); //앞부분만 노출시킬거야~

 %> 

<%= content_clean  %>

반응형