001 /*
002 * JNI_SVM-light - A Java Native Interface for SVM-light
003 *
004 * Copyright (C) 2005
005 * Tom Crecelius & Martin Theobald
006 * Max-Planck Institute for Computer Science
007 *
008 * This program is free software; you can redistribute it and/or modify it under
009 * the terms of the GNU General Public License as published by the Free Software
010 * Foundation.
011 *
012 * This program is distributed in the hope that it will be useful, but WITHOUT
013 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
014 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
015 * details.
016 *
017 * You should have received a copy of the GNU General Public License along with
018 * this program; if not, write to the Free Software Foundation, Inc., 51
019 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020 */
021
022 package jnisvmlight;
023
024 import java.io.BufferedReader;
025 import java.io.BufferedWriter;
026 import java.io.File;
027 import java.io.FileWriter;
028 import java.io.IOException;
029 import java.io.InputStreamReader;
030 import java.io.Serializable;
031 import java.net.URL;
032 import java.text.ParseException;
033
034 /**
035 * SVM classifier model returned by SVM-light.
036 *
037 * @author Tom Crecelius & Martin Theobald
038 */
039 public class SVMLightModel implements Serializable {
040
041 /**
042 * Reads an SVM-light model form a URL and creates an SVMLightModel object in Java. The format is compatible to the
043 * SVM-light model files.
044 */
045 public static SVMLightModel readSVMLightModelFromURL(URL file)
046 throws ParseException {
047
048 LabeledFeatureVector[] lfv;
049
050 String format;
051 long kType;
052 long dParam;
053 double gParam;
054 double sParam;
055 double rParam;
056 String uParam;
057 long highFeatIdx;
058 long trainDocs;
059 long numSupVecs;
060 double threshold;
061
062 int linecnt = 0;
063 try {
064
065 BufferedReader bi = new BufferedReader(new InputStreamReader(file
066 .openStream()));
067 String line = null;
068
069 line = bi.readLine();
070 format = line.toString();
071 linecnt++;
072 line = bi.readLine();
073 kType = new Long((line.substring(0, line.indexOf('#'))).trim())
074 .longValue();
075 linecnt++;
076 line = bi.readLine();
077 dParam = new Long((line.substring(0, line.indexOf('#'))).trim())
078 .longValue();
079 linecnt++;
080 line = bi.readLine();
081 gParam = new Double((line.substring(0, line.indexOf('#'))).trim())
082 .doubleValue();
083 linecnt++;
084 line = bi.readLine();
085 sParam = new Double((line.substring(0, line.indexOf('#'))).trim())
086 .doubleValue();
087 linecnt++;
088 line = bi.readLine();
089 rParam = new Double((line.substring(0, line.indexOf('#'))).trim())
090 .doubleValue();
091 linecnt++;
092 line = bi.readLine();
093 uParam = (line.substring(0, line.indexOf('#'))).trim();
094 linecnt++;
095 line = bi.readLine();
096 highFeatIdx = new Long((line.substring(0, line.indexOf('#'))).trim())
097 .longValue();
098 linecnt++;
099 line = bi.readLine();
100 trainDocs = new Long((line.substring(0, line.indexOf('#'))).trim())
101 .longValue();
102 linecnt++;
103 line = bi.readLine();
104 numSupVecs = new Long((line.substring(0, line.indexOf('#'))).trim())
105 .longValue();
106 linecnt++;
107 line = bi.readLine();
108 threshold = new Double((line.substring(0, line.indexOf('#'))).trim())
109 .doubleValue();
110
111 bi.close();
112
113 } catch (IOException ioe) {
114 ioe.printStackTrace();
115 throw new ParseException("Parse error in header at line " + linecnt
116 + ": " + ioe.getMessage() + " of File: '" + file.toString()
117 + "'. Not a svmlight-model file ?!", 0);
118 } catch (NullPointerException npe) {
119 throw new ParseException("Parse error in header at line " + linecnt
120 + ": " + npe.getMessage() + " of File: '" + file.toString()
121 + "'. Not a svmlight-model file ?!", 0);
122 } catch (IndexOutOfBoundsException ibe) {
123 throw new ParseException("Parse error in header at line " + linecnt
124 + ": " + ibe.getMessage() + " of File: '" + file.toString()
125 + "'. Not a svmlight-model file ?!", 0);
126 } catch (NumberFormatException nfe) {
127 throw new ParseException("Parse error in header at line " + linecnt
128 + ": " + nfe.getMessage() + " of File: '" + file.toString()
129 + "'. Not a svmlight-model file ?!", 0);
130 }
131
132 lfv = SVMLightTrainer.getLabeledFeatureVectorsFromURL(file, linecnt);
133
134 return new SVMLightModel(format, kType, dParam, gParam, sParam, rParam,
135 uParam, highFeatIdx, trainDocs, numSupVecs, threshold, lfv);
136 }
137
138 private LabeledFeatureVector[] m_docs;
139
140 private long m_dParam;
141
142 private String m_format;
143
144 private double m_gParam;
145
146 private long m_highFeatIdx;
147
148 private Kernel m_kernel;
149
150 private long m_kType;
151
152 private long m_numSupVecs;
153
154 private double m_rParam;
155
156 private double m_sParam;
157
158 private double m_threshold;
159
160 private long m_trainDocs;
161
162 private String m_uParam;
163
164 protected SVMLightModel(String format, long ktype, long dparm, double gparm,
165 double sparm, double rparm, String uparm, long hfi, long tdocs, long nsv,
166 double threshold, LabeledFeatureVector[] docs) {
167 m_format = format;
168 m_kType = ktype;
169 m_dParam = dparm;
170 m_gParam = gparm;
171 m_sParam = sparm;
172 m_rParam = rparm;
173 m_uParam = uparm;
174 m_highFeatIdx = hfi;
175 m_trainDocs = tdocs;
176 m_numSupVecs = nsv;
177 m_threshold = threshold;
178 m_docs = docs;
179
180 switch ((int) m_kType) {
181 case 0:
182 m_kernel = new LinearKernel();
183 break;
184 case 1:
185 m_kernel = new PolynomialKernel(new LinearKernel(), m_dParam, m_sParam,
186 m_rParam);
187 break;
188 case 2:
189 m_kernel = new RadialBaseKernel(new LinearKernel(), m_gParam);
190 break;
191 case 3:
192 m_kernel = new SigmoidKernel(new LinearKernel(), m_sParam, m_rParam);
193 default:
194 throw new RuntimeException("This type of kernel is not supported!");
195 }
196 }
197
198 public double classify(FeatureVector v) {
199 double delta = 0;
200 for (int i = 0; i < m_docs.length; i++) {
201 double alpha = m_docs[i].getLabel();
202 if (alpha != 0)
203 delta += alpha * m_kernel.evaluate(m_docs[i], v);
204 }
205 return delta - m_threshold;
206 }
207
208 public void setThreshold(double threshold) {
209 m_threshold = threshold;
210 }
211
212 public String toString() {
213 String s = "\n(| Format : "
214 + m_format
215 + "|\n"
216 + "| KType : "
217 + m_kType
218 + "\n"
219 + "| dParam : "
220 + m_dParam
221 + "\n"
222 + "| gParam : "
223 + m_gParam
224 + "\n"
225 + "| sParam : "
226 + m_sParam
227 + "\n"
228 + "| rParam : "
229 + m_rParam
230 + "\n"
231 + "| uParam : "
232 + m_uParam
233 + "\n"
234 + "| HighFeatIdx : "
235 + m_highFeatIdx
236 + "\n"
237 + "| trainDocs : "
238 + m_trainDocs
239 + "\n"
240 + "| numSupVecs : "
241 + m_numSupVecs
242 + "\n"
243 + "| threshold : "
244 + m_threshold
245 + "\n"
246 + (m_docs != null ? ("#SampleData:" + m_docs.length + "\n|[0]"
247 + m_docs[0] + "|\n" + (0 < m_docs.length ? ("...\n" + "|["
248 + (m_docs.length - 1) + "]" + m_docs[m_docs.length - 1] + "|\n")
249 : "")) : "");
250 return s;
251 }
252
253 /**
254 * Writes this SVMLightModel to a file. The format is compatible to the
255 * SVM-light model files.
256 */
257 public void writeModelTofile(String path) {
258 File dump = new File(path);
259 if (m_docs != null) {
260 String header = m_format
261 + "\n"
262 + m_kType
263 + " # kernel type\n"
264 + m_dParam
265 + " # kernel parameter -d\n"
266 + m_gParam
267 + " # kernel parameter -g\n"
268 + m_sParam
269 + " # kernel parameter -s\n"
270 + m_rParam
271 + " # kernel parameter -r\n"
272 + m_uParam
273 + "# kernel parameter -u\n"
274 + m_highFeatIdx
275 + " # highest feature index\n"
276 + m_trainDocs
277 + " # number of training documents\n"
278 + m_numSupVecs
279 + " # number of support vectors plus 1\n"
280 + m_threshold
281 + " # threshold b, each following line is a SV (starting with alpha*y)\n";
282
283 try {
284 BufferedWriter bd = new BufferedWriter(new FileWriter(dump));
285 bd.write(header);
286 for (int i = 0; i < m_docs.length; i++) {
287 bd.write(m_docs[i].toString());
288 }
289 bd.close();
290 } catch (IOException ioe) {
291 ioe.printStackTrace();
292 }
293
294 } else {
295 System.out.println("Cannot write model file..");
296 }
297 }
298 }