001    /*

002     * JNI_SVM-light - A Java Native Interface for SVM-light

003     * 

004     * Copyright (C) 2005 

005     * Tom Crecelius & Martin Theobald 

006     * Max-Planck Institute for Computer Science

007     * 

008     * This program is free software; you can redistribute it and/or modify it under

009     * the terms of the GNU General Public License as published by the Free Software

010     * Foundation.

011     * 

012     * This program is distributed in the hope that it will be useful, but WITHOUT

013     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

014     * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more

015     * details.

016     * 

017     * You should have received a copy of the GNU General Public License along with

018     * this program; if not, write to the Free Software Foundation, Inc., 51

019     * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

020     */

021    

022    package jnisvmlight;

023    

024    /**

025     * A feature vector. Features are dimension-value pairs. This class implements a simple

026     * dictionary data structure to map dimensions onto their values. Note that for

027     * convenience, features do not have be sorted according to their dimensions at

028     * this point. The SVMLightTrainer class has an option for sorting input vectors

029     * prior to training.

030     * 

031     * @author Tom Crecelius & Martin Theobald

032     */

033    public class FeatureVector implements java.io.Serializable {

034    

035      protected int[] m_dims;

036    

037      protected double[] m_vals;

038    

039      public FeatureVector(int size) {

040        this.m_dims = new int[size];

041        this.m_vals = new double[size];

042      }

043    

044      public FeatureVector(int[] dims, double[] vals) {

045        this.m_dims = dims;

046        this.m_vals = vals;

047      }

048    

049      /**

050       * Returns the cosine similarity between two feature vectors.

051       */

052      public double getCosine(FeatureVector v) {

053        double cosine = 0.0;

054        int dim;

055        double q_i, d_i;

056        for (int i = 0; i < Math.min(this.size(), v.size()); i++) {

057          dim = v.getDimAt(i);

058          q_i = v.getValueAt(dim);

059          d_i = this.getValueAt(dim);

060          cosine += q_i * d_i;

061        }

062        return cosine / (this.getL2Norm() * v.getL2Norm());

063      }

064    

065      public int getDimAt(int index) {

066        return m_dims[index];

067      }

068    

069      /**

070       * Returns the linear norm factor of this vector's values (i.e., the sum of

071       * it's values).

072       */

073      public double getL1Norm() {

074        double sum = 0.0;

075        for (int i = 0; i < m_vals.length; i++) {

076          sum += m_vals[i];

077        }

078        return sum;

079      }

080    

081      /**

082       * Returns the L2 norm factor of this vector's values.

083       */

084      public double getL2Norm() {

085        double square_sum = 0.0;

086        for (int i = 0; i < m_vals.length; i++) {

087          square_sum += (m_vals[i] * m_vals[i]);

088        }

089        return Math.sqrt(square_sum);

090      }

091    

092      public double getValueAt(int index) {

093        return m_vals[index];

094      }

095    

096      /**

097       * Performs a linear normalization to the value 1.

098       */

099      public void normalizeL1() {

100        normalizeL1(getL1Norm());

101      }

102    

103      /**

104       * Performs a linear normalization to the given norm value.

105       */

106      public void normalizeL1(double norm) {

107        for (int i = 0; i < m_vals.length; i++) {

108          if (m_vals[i] > 0) {

109            m_vals[i] /= norm;

110          }

111        }

112      }

113    

114      /**

115       * Performs an L2 normalization to the value 1.

116       */

117      public void normalizeL2() {

118        double norm = Math.pow(getL2Norm(), 2);

119        for (int i = 0; i < m_vals.length; i++) {

120          m_vals[i] = Math.pow(m_vals[i], 2) / norm;

121        }

122      }

123    

124      public void setFeatures(int[] dims, double[] vals) {

125        this.m_dims = dims;

126        this.m_vals = vals;

127      }

128    

129      public int size() {

130        return m_dims.length;

131      }

132    

133      public String toString() {

134        String s = "";

135        for (int i = 0; i < m_vals.length; i++) {

136          s += "" + m_dims[i] + ":" + m_vals[i] + ""

137              + (i < m_vals.length - 1 ? " " : "");

138        }

139        s += "";

140        return s;

141      }

142    }