001    /*

002     * JNI_SVM-light - A Java Native Interface for SVM-light

003     * 

004     * Copyright (C) 2005 

005     * Tom Crecelius & Martin Theobald 

006     * Max-Planck Institute for Computer Science

007     * 

008     * This program is free software; you can redistribute it and/or modify it under

009     * the terms of the GNU General Public License as published by the Free Software

010     * Foundation.

011     * 

012     * This program is distributed in the hope that it will be useful, but WITHOUT

013     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

014     * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more

015     * details.

016     * 

017     * You should have received a copy of the GNU General Public License along with

018     * this program; if not, write to the Free Software Foundation, Inc., 51

019     * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

020     */

021    

022    package jnisvmlight;

023    

024    import java.io.BufferedReader;

025    import java.io.BufferedWriter;

026    import java.io.File;

027    import java.io.FileWriter;

028    import java.io.IOException;

029    import java.io.InputStreamReader;

030    import java.io.Serializable;

031    import java.net.URL;

032    import java.text.ParseException;

033    

034    /**

035     * SVM classifier model returned by SVM-light.

036     * 

037     * @author Tom Crecelius & Martin Theobald

038     */

039    public class SVMLightModel implements Serializable {

040    

041      /**

042       * Reads an SVM-light model form a URL and creates an SVMLightModel object in Java. The format is compatible to the

043       * SVM-light model files.

044       */

045      public static SVMLightModel readSVMLightModelFromURL(URL file)

046          throws ParseException {

047    

048        LabeledFeatureVector[] lfv;

049    

050        String format;

051        long kType;

052        long dParam;

053        double gParam;

054        double sParam;

055        double rParam;

056        String uParam;

057        long highFeatIdx;

058        long trainDocs;

059        long numSupVecs;

060        double threshold;

061    

062        int linecnt = 0;

063        try {

064    

065          BufferedReader bi = new BufferedReader(new InputStreamReader(file

066              .openStream()));

067          String line = null;

068    

069          line = bi.readLine();

070          format = line.toString();

071          linecnt++;

072          line = bi.readLine();

073          kType = new Long((line.substring(0, line.indexOf('#'))).trim())

074              .longValue();

075          linecnt++;

076          line = bi.readLine();

077          dParam = new Long((line.substring(0, line.indexOf('#'))).trim())

078              .longValue();

079          linecnt++;

080          line = bi.readLine();

081          gParam = new Double((line.substring(0, line.indexOf('#'))).trim())

082              .doubleValue();

083          linecnt++;

084          line = bi.readLine();

085          sParam = new Double((line.substring(0, line.indexOf('#'))).trim())

086              .doubleValue();

087          linecnt++;

088          line = bi.readLine();

089          rParam = new Double((line.substring(0, line.indexOf('#'))).trim())

090              .doubleValue();

091          linecnt++;

092          line = bi.readLine();

093          uParam = (line.substring(0, line.indexOf('#'))).trim();

094          linecnt++;

095          line = bi.readLine();

096          highFeatIdx = new Long((line.substring(0, line.indexOf('#'))).trim())

097              .longValue();

098          linecnt++;

099          line = bi.readLine();

100          trainDocs = new Long((line.substring(0, line.indexOf('#'))).trim())

101              .longValue();

102          linecnt++;

103          line = bi.readLine();

104          numSupVecs = new Long((line.substring(0, line.indexOf('#'))).trim())

105              .longValue();

106          linecnt++;

107          line = bi.readLine();

108          threshold = new Double((line.substring(0, line.indexOf('#'))).trim())

109              .doubleValue();

110    

111          bi.close();

112    

113        } catch (IOException ioe) {

114          ioe.printStackTrace();

115          throw new ParseException("Parse error in header at line " + linecnt

116              + ": " + ioe.getMessage() + " of File: '" + file.toString()

117              + "'. Not a svmlight-model file ?!", 0);

118        } catch (NullPointerException npe) {

119          throw new ParseException("Parse error in header at line " + linecnt

120              + ": " + npe.getMessage() + " of File: '" + file.toString()

121              + "'. Not a svmlight-model file ?!", 0);

122        } catch (IndexOutOfBoundsException ibe) {

123          throw new ParseException("Parse error in header at line " + linecnt

124              + ": " + ibe.getMessage() + " of File: '" + file.toString()

125              + "'. Not a svmlight-model file ?!", 0);

126        } catch (NumberFormatException nfe) {

127          throw new ParseException("Parse error in header at line " + linecnt

128              + ": " + nfe.getMessage() + " of File: '" + file.toString()

129              + "'. Not a svmlight-model file ?!", 0);

130        }

131    

132        lfv = SVMLightTrainer.getLabeledFeatureVectorsFromURL(file, linecnt);

133    

134        return new SVMLightModel(format, kType, dParam, gParam, sParam, rParam,

135            uParam, highFeatIdx, trainDocs, numSupVecs, threshold, lfv);

136      }

137    

138      private LabeledFeatureVector[] m_docs;

139    

140      private long m_dParam;

141    

142      private String m_format;

143    

144      private double m_gParam;

145    

146      private long m_highFeatIdx;

147    

148      private Kernel m_kernel;

149    

150      private long m_kType;

151    

152      private long m_numSupVecs;

153    

154      private double m_rParam;

155    

156      private double m_sParam;

157    

158      private double m_threshold;

159    

160      private long m_trainDocs;

161    

162      private String m_uParam;

163    

164      protected SVMLightModel(String format, long ktype, long dparm, double gparm,

165          double sparm, double rparm, String uparm, long hfi, long tdocs, long nsv,

166          double threshold, LabeledFeatureVector[] docs) {

167        m_format = format;

168        m_kType = ktype;

169        m_dParam = dparm;

170        m_gParam = gparm;

171        m_sParam = sparm;

172        m_rParam = rparm;

173        m_uParam = uparm;

174        m_highFeatIdx = hfi;

175        m_trainDocs = tdocs;

176        m_numSupVecs = nsv;

177        m_threshold = threshold;

178        m_docs = docs;

179    

180        switch ((int) m_kType) {

181          case 0:

182            m_kernel = new LinearKernel();

183            break;

184          case 1:

185            m_kernel = new PolynomialKernel(new LinearKernel(), m_dParam, m_sParam,

186                m_rParam);

187            break;

188          case 2:

189            m_kernel = new RadialBaseKernel(new LinearKernel(), m_gParam);

190            break;

191          case 3:

192            m_kernel = new SigmoidKernel(new LinearKernel(), m_sParam, m_rParam);

193          default:

194            throw new RuntimeException("This type of kernel is not supported!");

195        }

196      }

197    

198      public double classify(FeatureVector v) {

199        double delta = 0;

200        for (int i = 0; i < m_docs.length; i++) {

201          double alpha = m_docs[i].getLabel();

202          if (alpha != 0)

203            delta += alpha * m_kernel.evaluate(m_docs[i], v);

204        }

205        return delta - m_threshold;

206      }

207    

208      public void setThreshold(double threshold) {

209        m_threshold = threshold;

210      }

211    

212      public String toString() {

213        String s = "\n(| Format     : "

214            + m_format

215            + "|\n"

216            + "| KType      : "

217            + m_kType

218            + "\n"

219            + "| dParam     : "

220            + m_dParam

221            + "\n"

222            + "| gParam     : "

223            + m_gParam

224            + "\n"

225            + "| sParam     : "

226            + m_sParam

227            + "\n"

228            + "| rParam     : "

229            + m_rParam

230            + "\n"

231            + "| uParam     : "

232            + m_uParam

233            + "\n"

234            + "| HighFeatIdx   : "

235            + m_highFeatIdx

236            + "\n"

237            + "| trainDocs  : "

238            + m_trainDocs

239            + "\n"

240            + "| numSupVecs : "

241            + m_numSupVecs

242            + "\n"

243            + "| threshold  : "

244            + m_threshold

245            + "\n"

246            + (m_docs != null ? ("#SampleData:" + m_docs.length + "\n|[0]"

247                + m_docs[0] + "|\n" + (0 < m_docs.length ? ("...\n" + "|["

248                + (m_docs.length - 1) + "]" + m_docs[m_docs.length - 1] + "|\n")

249                : "")) : "");

250        return s;

251      }

252    

253      /**

254       * Writes this SVMLightModel to a file. The format is compatible to the

255       * SVM-light model files.

256       */

257      public void writeModelTofile(String path) {

258        File dump = new File(path);

259        if (m_docs != null) {

260          String header = m_format

261              + "\n"

262              + m_kType

263              + " # kernel type\n"

264              + m_dParam

265              + " # kernel parameter -d\n"

266              + m_gParam

267              + " # kernel parameter -g\n"

268              + m_sParam

269              + " # kernel parameter -s\n"

270              + m_rParam

271              + " # kernel parameter -r\n"

272              + m_uParam

273              + "# kernel parameter -u\n"

274              + m_highFeatIdx

275              + " # highest feature index\n"

276              + m_trainDocs

277              + " # number of training documents\n"

278              + m_numSupVecs

279              + " # number of support vectors plus 1\n"

280              + m_threshold

281              + " # threshold b, each following line is a SV (starting with alpha*y)\n";

282    

283          try {

284            BufferedWriter bd = new BufferedWriter(new FileWriter(dump));

285            bd.write(header);

286            for (int i = 0; i < m_docs.length; i++) {

287              bd.write(m_docs[i].toString());

288            }

289            bd.close();

290          } catch (IOException ioe) {

291            ioe.printStackTrace();

292          }

293    

294        } else {

295          System.out.println("Cannot write model file..");

296        }

297      }

298    }