001    /*
002     * JNI_SVM-light - A Java Native Interface for SVM-light
003     * 
004     * Copyright (C) 2005 
005     * Tom Crecelius & Martin Theobald 
006     * Max-Planck Institute for Computer Science
007     * 
008     * This program is free software; you can redistribute it and/or modify it under
009     * the terms of the GNU General Public License as published by the Free Software
010     * Foundation.
011     * 
012     * This program is distributed in the hope that it will be useful, but WITHOUT
013     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
014     * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
015     * details.
016     * 
017     * You should have received a copy of the GNU General Public License along with
018     * this program; if not, write to the Free Software Foundation, Inc., 51
019     * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020     */
021    
022    package jnisvmlight;
023    
024    /**
025     * A feature vector. Features are dimension-value pairs. This class implements a simple
026     * dictionary data structure to map dimensions onto their values. Note that for
027     * convenience, features do not have be sorted according to their dimensions at
028     * this point. The SVMLightTrainer class has an option for sorting input vectors
029     * prior to training.
030     * 
031     * @author Tom Crecelius & Martin Theobald
032     */
033    public class FeatureVector implements java.io.Serializable {
034    
035      protected int[] m_dims;
036    
037      protected double[] m_vals;
038    
039      public FeatureVector(int size) {
040        this.m_dims = new int[size];
041        this.m_vals = new double[size];
042      }
043    
044      public FeatureVector(int[] dims, double[] vals) {
045        this.m_dims = dims;
046        this.m_vals = vals;
047      }
048    
049      /**
050       * Returns the cosine similarity between two feature vectors.
051       */
052      public double getCosine(FeatureVector v) {
053        double cosine = 0.0;
054        int dim;
055        double q_i, d_i;
056        for (int i = 0; i < Math.min(this.size(), v.size()); i++) {
057          dim = v.getDimAt(i);
058          q_i = v.getValueAt(dim);
059          d_i = this.getValueAt(dim);
060          cosine += q_i * d_i;
061        }
062        return cosine / (this.getL2Norm() * v.getL2Norm());
063      }
064    
065      public int getDimAt(int index) {
066        return m_dims[index];
067      }
068    
069      /**
070       * Returns the linear norm factor of this vector's values (i.e., the sum of
071       * it's values).
072       */
073      public double getL1Norm() {
074        double sum = 0.0;
075        for (int i = 0; i < m_vals.length; i++) {
076          sum += m_vals[i];
077        }
078        return sum;
079      }
080    
081      /**
082       * Returns the L2 norm factor of this vector's values.
083       */
084      public double getL2Norm() {
085        double square_sum = 0.0;
086        for (int i = 0; i < m_vals.length; i++) {
087          square_sum += (m_vals[i] * m_vals[i]);
088        }
089        return Math.sqrt(square_sum);
090      }
091    
092      public double getValueAt(int index) {
093        return m_vals[index];
094      }
095    
096      /**
097       * Performs a linear normalization to the value 1.
098       */
099      public void normalizeL1() {
100        normalizeL1(getL1Norm());
101      }
102    
103      /**
104       * Performs a linear normalization to the given norm value.
105       */
106      public void normalizeL1(double norm) {
107        for (int i = 0; i < m_vals.length; i++) {
108          if (m_vals[i] > 0) {
109            m_vals[i] /= norm;
110          }
111        }
112      }
113    
114      /**
115       * Performs an L2 normalization to the value 1.
116       */
117      public void normalizeL2() {
118        double norm = Math.pow(getL2Norm(), 2);
119        for (int i = 0; i < m_vals.length; i++) {
120          m_vals[i] = Math.pow(m_vals[i], 2) / norm;
121        }
122      }
123    
124      public void setFeatures(int[] dims, double[] vals) {
125        this.m_dims = dims;
126        this.m_vals = vals;
127      }
128    
129      public int size() {
130        return m_dims.length;
131      }
132    
133      public String toString() {
134        String s = "";
135        for (int i = 0; i < m_vals.length; i++) {
136          s += "" + m_dims[i] + ":" + m_vals[i] + ""
137              + (i < m_vals.length - 1 ? " " : "");
138        }
139        s += "";
140        return s;
141      }
142    }