본문 바로가기
컴퓨터관련

Spring xml 파일을 읽어들여 JSoup으로 Parsing하기

by 기록이답이다 2016. 3. 22.
반응형

xml 파일을 설정파일로 읽어와 manager에서 관리하는 모듈을 작성해야 할 일이 생겼다.

그래서 아래와 같이 처리했다.

설정파일로 사용할 xml 파일을(정확히는 파일명) property로 받은 후 생성되는 bean에서 xml 파일을 읽어들여 jsoup으로 parsing 하는 예제 새로운 파일을 만들어 주는데 이때 만드는 파일은 contextConfigLocation 에서 지정하여 읽어들일 수 있도록 한다.

1
2
3
4
5
6
7
8
- web.xml
 
 <context-param>
  <param-name>contextConfigLocation</param-name>
  <param-value>
   classpath*:/spring/context-*.xml
  </param-value>
 </context-param>
xml을 읽어들이도록 bean 설정하는 xml은 아래와 같다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
- context-myloader.xml
 
 
<!--?xml version="1.0" encoding="UTF-8"?-->
 
    <bean id="webCollection" class="bugnote.search.SearchCollection">
        <property name="xmlName">
            <value>search/web_collection.xml</value>
        </property>
    </bean>
    <bean id="boardCollection" class="bugnote.search.SearchCollection">
        <property name="xmlName">
            <value>search/board_collection.xml</value>
        </property>
    </bean>
 
    <bean id="searchManager" class="bugnote.search.SearchManager">
        <property name="connectServerIp"><value>127.0.0.1</value></property>
        <property name="connectServerPort"><value>7000</value></property>
        <property name="connectServerTimeout"><value>10000</value></property>
        <property name="searchCollectionList">
            <list>
                <ref bean="webCollection">
                <ref bean="boardCollection">
            </ref>
        </ref>
    </list>
</property>
</bean>
</beans>
이런식으로 context-myloader.xml 파일을 생성해준다. 설명을 해보자면 Application이 start될때 bugnote.search.SearchManager 가 bean으로 생성이 되는데 SearchManager.setSearchCollectionList에 들어갈 arguments로는 List을 넣어준다. 이때 arguments로 들어가는 searchCollection들은 보다시피 webCollection과 boardCollection이다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
- SearchManager
 
package bugnote.search;
 
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
...
 
@Service("searchManager")
public class SearchManager {
    protected final Log logger = LogFactory.getLog(getClass());
 
    private List<string> collectionKeyList;
 
    public void setSearchCollectionList(List<nssearchcollection> searchCollectionList) {
        for(NSSearchCollection searchCollection : searchCollectionList) {
            addCollection(searchCollection);
        }
    }
 
    private void addCollection(List<nssearchcollection> searchCollectionList) {
        if(searchCollectionMap == null) searchCollectionMap = new HashMap<string, nssearchcollection="">();
        if(collectionKeyList == null) collectionKeyList = new ArrayList<string>();
        for(NSSearchCollection searchCollection : searchCollectionList) {
            this.searchCollectionMap.put(searchCollection.getName(), searchCollection);
            this.collectionKeyList.add(searchCollection.getName());
 
            if (logger.isInfoEnabled()) {
                logger.info("SearchCollection loading ... " + searchCollection.getName());
            }
        }
    }
 
                                              ...
                                              ...
     
}
</string></string,></nssearchcollection></nssearchcollection></string>
searchmanager의 setSearchCollectionList 부분이다. 이어서 살펴볼 부분은 이 문서의 제목처럼 SearchCollection에서 arguments로 넘겨받은 xml 파일을 jsoup으로 parsing하는 부분이다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
- SearchController.java
 
package bugnote.search;
 
import java.io.IOException;
import java.io.InputStream;
 
import kr.ictee.nsic.model.NSSearchCollection;
import kr.ictee.nsic.model.NSSearchField;
 
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.core.io.FileSystemResourceLoader;
import org.springframework.core.io.Resource;
 
public class SearchCollection extends NSSearchCollection {
    protected final Log logger = LogFactory.getLog(getClass());
     
    private Document getDocument(String xmlName) throws IOException {
        FileSystemResourceLoader fileSystemResourceLoader = new FileSystemResourceLoader();
        Resource xmlResource = fileSystemResourceLoader.getResource("classpath:"+xmlName);     
        InputStream stream = xmlResource.getInputStream();
        Document doc = Jsoup.parse(stream, "UTF-8", xmlResource.getURI().toString());
         
        return doc;
    }
     
    public void setXmlName(String xmlName) throws IOException {
        makeCollection(xmlName);
    }
 
    private void makeCollection(String xmlName) throws IOException {
        Document doc = getDocument(xmlName);
         
        String collectionName = doc.select("collection").attr("name").toString();
        String displayName = doc.select("displayName").text();
        setName(collectionName);
        setDisplayName(displayName);
 
        System.out.println(doc.select("queryAnalyzer useLa").text());
        // queryAnalyzer
        if(doc.select("queryAnalyzer useLa") != null) setQueryAnalyzerUseLa(doc.select("queryAnalyzer useLa").text());
        if(doc.select("queryAnalyzer useIgnoreCase") != null) setQueryAnalyzerIgnoreCase(doc.select("queryAnalyzer useIgnoreCase").text());
        if(doc.select("queryAnalyzer useToken") != null) setQueryAnalyzerUseToken(doc.select("queryAnalyzer useToken").text());
        if(doc.select("queryAnalyzer useSynonym") != null) setQueryAnalyzerUseSynonym(doc.select("queryAnalyzer useSynonym").text());
         
        // rank
        if(doc.select("rank rankMode") != null) setRankMode(doc.select("rank rankMode").text());
        if(doc.select("rank rankMethod") != null) setRankMethod(doc.select("rank rankMethod").text());
        if(doc.select("rank rankMaxCount") != null) {
            try {
                setRankMaxCount(Integer.parseInt(doc.select("rank rankMaxCount").text()));
            } catch(NumberFormatException ne) {
                logger.error("rank.rankMaxCount is must be integer");
            }
        }
         
        // highlight
        if(doc.select("highlight useHighlight") != null) setUseHighlight(doc.select("highlight useHighlight").text()); // highlight 사용
        if(doc.select("highlight highlightSnippet") != null) setHighlightSnippet(doc.select("highlight highlightSnippet").text());
         
        // pageInfo
        if(doc.select("pageInfo startPage") != null && !doc.select("pageInfo startPage").text().equals("")) {
            try {
                info("pageInfo.startPage : " + doc.select("pageInfo startPage").text());
                setPageInfoStartPage(Integer.parseInt(doc.select("pageInfo startPage").text()));
            } catch(NumberFormatException ne) {
                logger.error("pageInfo.startPage is must be integer");
            }          
        }
         
        if(doc.select("pageInfo listCount") != null && !doc.select("pageInfo listCount").text().equals("")) {
            try {
                info("pageInfo.listCount : " + doc.select("pageInfo listCount").text());
                setPageInfoListCount(Integer.parseInt(doc.select("pageInfo listCount").text()));
            } catch(NumberFormatException ne) {
                logger.error("pageInfo.listCount is must be integer");
            }          
        }
         
        if(doc.select("pageInfo integrateListCount") != null && !doc.select("pageInfo integrateListCount").text().equals("")) {
            try {
                info("pageInfo.integrateListCount : " + doc.select("pageInfo integrateListCount").text());
                setPageInfoIntegrateListCount(Integer.parseInt(doc.select("pageInfo integrateListCount").text()));
            } catch(NumberFormatException ne) {
                logger.error("pageInfo.integrateListCount is must be integer");
            }          
        }
         
         
        // fields
        Elements fields= doc.select("field");
        for(Element elementField : fields) {
            NSSearchField field = new NSSearchField();
            String fieldType = elementField.parent().attr("type").toString();
            if(fieldType == null || fieldType.equals("")) {
                logger.error("fieldset.fields.field type don't empty.");
                continue;
            }
            field.setFieldType(fieldType);
            if(elementField.select("name") != null) field.setFieldName(elementField.select("name").text());
            if(elementField.select("score") != null && !elementField.select("score").text().equals("")) {
                try {
                    info(elementField.select("score").text());
                    field.setFieldScore(Integer.parseInt(elementField.select("score").text()));
                } catch(NumberFormatException ne) {
                    logger.error("field.score is must be integer");
                }  
            }
            if(elementField.select("displayLength") != null && !elementField.select("displayLength").text().equals("")) {
                try {
                    info(elementField.select("displayLength").text());
                    field.setFieldDisplayLength(Integer.parseInt(elementField.select("displayLength").text()));
                } catch(NumberFormatException ne) {
                    logger.error("field.displayLength is must be integer");
                }                      
            }
         
            addField(field);
        }
    }
 
    private void info(String message) {
        if(logger.isInfoEnabled()) logger.info(message);
    }
 
}
xml 파일을 fileSystemResourceLoader로 읽어들인 후 inpuStream을 가져온다음 jsoup으로 parsing할 수 document객체를 생성한다. - getDocument 메소드 이후에는 jsoup에서 처리하듯이 doc.select (...) 하면서 처리하면 된다. 참고로 여기서 설정파일로 사용한 xml 파일의 내용은 아래와 같다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
-- board_collection.xml
 
 
<!--?xml version="1.0" encoding="UTF-8"?-->
<collection name="board">
    <displayname>게시판</displayname>
    <queryanalyzer>
        <usela>true</usela>
        <usetoken>true</usetoken>
        <useignorecase>true</useignorecase>
        <usesynonym>false</usesynonym>
    </queryanalyzer>
    <rank>
        <rankmode>basic</rankmode>
        <rankmethod>rfp</rankmethod>
        <rankmaxcount>10000</rankmaxcount>
    </rank>
    <highlight>
        <usehighlight>true</usehighlight>
        <highlightsnippet>true</highlightsnippet>
    </highlight>
    <pageinfo>
        <startpage>0</startpage>
        <listcount>5</listcount>
        <integratelistcount>3</integratelistcount>
    </pageinfo>
    <fieldset>
        <fields type="search">
            <field>
                <name>TITLE</name>
                <score>100</score>
            </field>
            <field>
                <name>CONTENT</name>
                <score>20</score>
            </field>
            <field>
                <name>FILECONT</name>
            </field>
            <field>
                <name>USERNAME</name>
            </field>
            <field>
                <name>USERID</name>
            </field>
            <field>
                <name>alias</name>
            </field>
        </fields>
        <fields type="document">
            <field>
                <name>DOCID</name>
            </field>
            <field>
                <name>TITLE</name>
                <displaylength>100</displaylength>
            </field>
            <field>
                <name>CONTENT</name>
                <displaylength>100</displaylength>
            </field>
            <field>
                <name>Date</name>
            </field>
            <field>
                <name>USERID</name>
            </field>
            <field>
                <name>USERNAME</name>
            </field>
            <field>
                <name>FILEPATH</name>
            </field>
            <field>
                <name>FILECONT</name>
            </field>
            <field>
                <name>SEQ</name>
            </field>
            <field>
                <name>MENU_SEQ</name>
            </field>
            <field>
                <name>alias</name>
            </field>
        </fields>
        <fields type="sort">
            <field>
                <name>RANK/DESC</name>
            </field>
            <field>
                <name>DATE/DESC</name>
            </field>
        </fields>
    </fieldset>
</collection>
끝.
반응형