001    /*
002     * JNI_SVM-light - A Java Native Interface for SVM-light
003     * 
004     * Copyright (C) 2005 
005     * Tom Crecelius & Martin Theobald 
006     * Max-Planck Institute for Computer Science
007     * 
008     * This program is free software; you can redistribute it and/or modify it under
009     * the terms of the GNU General Public License as published by the Free Software
010     * Foundation.
011     * 
012     * This program is distributed in the hope that it will be useful, but WITHOUT
013     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
014     * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
015     * details.
016     * 
017     * You should have received a copy of the GNU General Public License along with
018     * this program; if not, write to the Free Software Foundation, Inc., 51
019     * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020     */
021    
022    package jnisvmlight;
023    
024    import java.io.BufferedReader;
025    import java.io.BufferedWriter;
026    import java.io.File;
027    import java.io.FileWriter;
028    import java.io.IOException;
029    import java.io.InputStreamReader;
030    import java.io.Serializable;
031    import java.net.URL;
032    import java.text.ParseException;
033    
034    /**
035     * SVM classifier model returned by SVM-light.
036     * 
037     * @author Tom Crecelius & Martin Theobald
038     */
039    public class SVMLightModel implements Serializable {
040    
041      /**
042       * Reads an SVM-light model form a URL and creates an SVMLightModel object in Java. The format is compatible to the
043       * SVM-light model files.
044       */
045      public static SVMLightModel readSVMLightModelFromURL(URL file)
046          throws ParseException {
047    
048        LabeledFeatureVector[] lfv;
049    
050        String format;
051        long kType;
052        long dParam;
053        double gParam;
054        double sParam;
055        double rParam;
056        String uParam;
057        long highFeatIdx;
058        long trainDocs;
059        long numSupVecs;
060        double threshold;
061    
062        int linecnt = 0;
063        try {
064    
065          BufferedReader bi = new BufferedReader(new InputStreamReader(file
066              .openStream()));
067          String line = null;
068    
069          line = bi.readLine();
070          format = line.toString();
071          linecnt++;
072          line = bi.readLine();
073          kType = new Long((line.substring(0, line.indexOf('#'))).trim())
074              .longValue();
075          linecnt++;
076          line = bi.readLine();
077          dParam = new Long((line.substring(0, line.indexOf('#'))).trim())
078              .longValue();
079          linecnt++;
080          line = bi.readLine();
081          gParam = new Double((line.substring(0, line.indexOf('#'))).trim())
082              .doubleValue();
083          linecnt++;
084          line = bi.readLine();
085          sParam = new Double((line.substring(0, line.indexOf('#'))).trim())
086              .doubleValue();
087          linecnt++;
088          line = bi.readLine();
089          rParam = new Double((line.substring(0, line.indexOf('#'))).trim())
090              .doubleValue();
091          linecnt++;
092          line = bi.readLine();
093          uParam = (line.substring(0, line.indexOf('#'))).trim();
094          linecnt++;
095          line = bi.readLine();
096          highFeatIdx = new Long((line.substring(0, line.indexOf('#'))).trim())
097              .longValue();
098          linecnt++;
099          line = bi.readLine();
100          trainDocs = new Long((line.substring(0, line.indexOf('#'))).trim())
101              .longValue();
102          linecnt++;
103          line = bi.readLine();
104          numSupVecs = new Long((line.substring(0, line.indexOf('#'))).trim())
105              .longValue();
106          linecnt++;
107          line = bi.readLine();
108          threshold = new Double((line.substring(0, line.indexOf('#'))).trim())
109              .doubleValue();
110    
111          bi.close();
112    
113        } catch (IOException ioe) {
114          ioe.printStackTrace();
115          throw new ParseException("Parse error in header at line " + linecnt
116              + ": " + ioe.getMessage() + " of File: '" + file.toString()
117              + "'. Not a svmlight-model file ?!", 0);
118        } catch (NullPointerException npe) {
119          throw new ParseException("Parse error in header at line " + linecnt
120              + ": " + npe.getMessage() + " of File: '" + file.toString()
121              + "'. Not a svmlight-model file ?!", 0);
122        } catch (IndexOutOfBoundsException ibe) {
123          throw new ParseException("Parse error in header at line " + linecnt
124              + ": " + ibe.getMessage() + " of File: '" + file.toString()
125              + "'. Not a svmlight-model file ?!", 0);
126        } catch (NumberFormatException nfe) {
127          throw new ParseException("Parse error in header at line " + linecnt
128              + ": " + nfe.getMessage() + " of File: '" + file.toString()
129              + "'. Not a svmlight-model file ?!", 0);
130        }
131    
132        lfv = SVMLightTrainer.getLabeledFeatureVectorsFromURL(file, linecnt);
133    
134        return new SVMLightModel(format, kType, dParam, gParam, sParam, rParam,
135            uParam, highFeatIdx, trainDocs, numSupVecs, threshold, lfv);
136      }
137    
138      private LabeledFeatureVector[] m_docs;
139    
140      private long m_dParam;
141    
142      private String m_format;
143    
144      private double m_gParam;
145    
146      private long m_highFeatIdx;
147    
148      private Kernel m_kernel;
149    
150      private long m_kType;
151    
152      private long m_numSupVecs;
153    
154      private double m_rParam;
155    
156      private double m_sParam;
157    
158      private double m_threshold;
159    
160      private long m_trainDocs;
161    
162      private String m_uParam;
163    
164      protected SVMLightModel(String format, long ktype, long dparm, double gparm,
165          double sparm, double rparm, String uparm, long hfi, long tdocs, long nsv,
166          double threshold, LabeledFeatureVector[] docs) {
167        m_format = format;
168        m_kType = ktype;
169        m_dParam = dparm;
170        m_gParam = gparm;
171        m_sParam = sparm;
172        m_rParam = rparm;
173        m_uParam = uparm;
174        m_highFeatIdx = hfi;
175        m_trainDocs = tdocs;
176        m_numSupVecs = nsv;
177        m_threshold = threshold;
178        m_docs = docs;
179    
180        switch ((int) m_kType) {
181          case 0:
182            m_kernel = new LinearKernel();
183            break;
184          case 1:
185            m_kernel = new PolynomialKernel(new LinearKernel(), m_dParam, m_sParam,
186                m_rParam);
187            break;
188          case 2:
189            m_kernel = new RadialBaseKernel(new LinearKernel(), m_gParam);
190            break;
191          case 3:
192            m_kernel = new SigmoidKernel(new LinearKernel(), m_sParam, m_rParam);
193          default:
194            throw new RuntimeException("This type of kernel is not supported!");
195        }
196      }
197    
198      public double classify(FeatureVector v) {
199        double delta = 0;
200        for (int i = 0; i < m_docs.length; i++) {
201          double alpha = m_docs[i].getLabel();
202          if (alpha != 0)
203            delta += alpha * m_kernel.evaluate(m_docs[i], v);
204        }
205        return delta - m_threshold;
206      }
207    
208      public void setThreshold(double threshold) {
209        m_threshold = threshold;
210      }
211    
212      public String toString() {
213        String s = "\n(| Format     : "
214            + m_format
215            + "|\n"
216            + "| KType      : "
217            + m_kType
218            + "\n"
219            + "| dParam     : "
220            + m_dParam
221            + "\n"
222            + "| gParam     : "
223            + m_gParam
224            + "\n"
225            + "| sParam     : "
226            + m_sParam
227            + "\n"
228            + "| rParam     : "
229            + m_rParam
230            + "\n"
231            + "| uParam     : "
232            + m_uParam
233            + "\n"
234            + "| HighFeatIdx   : "
235            + m_highFeatIdx
236            + "\n"
237            + "| trainDocs  : "
238            + m_trainDocs
239            + "\n"
240            + "| numSupVecs : "
241            + m_numSupVecs
242            + "\n"
243            + "| threshold  : "
244            + m_threshold
245            + "\n"
246            + (m_docs != null ? ("#SampleData:" + m_docs.length + "\n|[0]"
247                + m_docs[0] + "|\n" + (0 < m_docs.length ? ("...\n" + "|["
248                + (m_docs.length - 1) + "]" + m_docs[m_docs.length - 1] + "|\n")
249                : "")) : "");
250        return s;
251      }
252    
253      /**
254       * Writes this SVMLightModel to a file. The format is compatible to the
255       * SVM-light model files.
256       */
257      public void writeModelTofile(String path) {
258        File dump = new File(path);
259        if (m_docs != null) {
260          String header = m_format
261              + "\n"
262              + m_kType
263              + " # kernel type\n"
264              + m_dParam
265              + " # kernel parameter -d\n"
266              + m_gParam
267              + " # kernel parameter -g\n"
268              + m_sParam
269              + " # kernel parameter -s\n"
270              + m_rParam
271              + " # kernel parameter -r\n"
272              + m_uParam
273              + "# kernel parameter -u\n"
274              + m_highFeatIdx
275              + " # highest feature index\n"
276              + m_trainDocs
277              + " # number of training documents\n"
278              + m_numSupVecs
279              + " # number of support vectors plus 1\n"
280              + m_threshold
281              + " # threshold b, each following line is a SV (starting with alpha*y)\n";
282    
283          try {
284            BufferedWriter bd = new BufferedWriter(new FileWriter(dump));
285            bd.write(header);
286            for (int i = 0; i < m_docs.length; i++) {
287              bd.write(m_docs[i].toString());
288            }
289            bd.close();
290          } catch (IOException ioe) {
291            ioe.printStackTrace();
292          }
293    
294        } else {
295          System.out.println("Cannot write model file..");
296        }
297      }
298    }