Sal
Administrator
Seorang penulis yang suka belajar hal baru dan dunia fotografi
2 min read

Mengindex File XML dengan Menggunakan Lucene

Posted in Lucene

Post di sini adalah lanjutan dari tutorial membuat search engine dengan menggunakan Lucene khususnya pada bagian button Index. Button Index itu sendiri digunakan untuk mengindex file XML pertama kali dan menyimpannya ke dalam sebuah direktori "index".

Source Code

Berikut ini adalah source dari file Index.java
/*
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */
package luceneproject;

import java.io.File;
import java.io.IOException;
import javax.swing.JTextArea;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 *
 * @author Azhar
 */
class Index {
    private IndexWriter writer;
    JTextArea txtSearch= new JTextArea( );

    void run() {
        Document doc = getXmlDoc();
        prepareIndexDir();
        index(doc);
        try {
            writer.close();
        } catch (CorruptIndexException ex) {
            txtSearch.setText(ex.getMessage());
        } catch (IOException ex) {
            txtSearch.setText(ex.getMessage());
        }
    }

    private Document getXmlDoc() {
        File file = new File("DataSiswa.xml");
        Document doc = null;
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db;
        try {
            db = dbf.newDocumentBuilder();
            doc = db.parse(file);
        } catch (ParserConfigurationException ex) {
            txtSearch.setText(ex.getMessage());
        } catch (SAXException ex) {
            txtSearch.setText(ex.getMessage());
        } catch (IOException ex) {
            txtSearch.setText(ex.getMessage());
        } return doc;
    }

    private void prepareIndexDir() {
        Directory dir = null;
        try {
            dir = FSDirectory.open(new File("index"));
            IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_34,
                new StandardAnalyzer(Version.LUCENE_34));
            writer = new IndexWriter(dir, config);
        } catch (IOException ex) {
            txtSearch.setText(ex.getMessage());
        }
    }

    private void index(Document doc) {
        NodeList nodeLst = doc.getElementsByTagName("SISWA");
            for (int i = 0; i < nodeLst.getLength(); i++) {
                Node nNode = nodeLst.item(i);
                if (nNode.getNodeType() == Node.ELEMENT_NODE) {
                    Element e = (Element) nNode;
                    try {
                        writer.addDocument(createDocument(e));
                    } catch (CorruptIndexException ex) {
                        txtSearch.setText(ex.getMessage());
                    } catch (IOException ex) {
                        txtSearch.setText(ex.getMessage());
                    }
                }
            }
            txtSearch.append("Finished\n");
    }

    private org.apache.lucene.document.Document createDocument(Element e) {
        org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
        doc.add(new Field("NIS",getTagValue("NIS", e),Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("NAMA",
            getTagValue("NAMA", e),
            Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("KELAS",
            getTagValue("KELAS", e),
            Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("ALAMAT",
            getTagValue("ALAMAT", e),
           Field.Store.YES, Field.Index.ANALYZED));
        return doc;
    }

    private String getTagValue(String tag, Element e) {
        NodeList nlList = e.getElementsByTagName(tag).item(0).getChildNodes();
        Node nValue = (Node) nlList.item(0);
        return nValue.getNodeValue();
    }

}
Semoga bermanfaat.