001 /*
002 * JNI_SVM-light - A Java Native Interface for SVM-light
003 *
004 * Copyright (C) 2005
005 * Tom Crecelius & Martin Theobald
006 * Max-Planck Institute for Computer Science
007 *
008 * This program is free software; you can redistribute it and/or modify it under
009 * the terms of the GNU General Public License as published by the Free Software
010 * Foundation.
011 *
012 * This program is distributed in the hope that it will be useful, but WITHOUT
013 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
014 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
015 * details.
016 *
017 * You should have received a copy of the GNU General Public License along with
018 * this program; if not, write to the Free Software Foundation, Inc., 51
019 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020 */
021
022 package jnisvmlight;
023
024 /**
025 * A feature vector. Features are dimension-value pairs. This class implements a simple
026 * dictionary data structure to map dimensions onto their values. Note that for
027 * convenience, features do not have be sorted according to their dimensions at
028 * this point. The SVMLightTrainer class has an option for sorting input vectors
029 * prior to training.
030 *
031 * @author Tom Crecelius & Martin Theobald
032 */
033 public class FeatureVector implements java.io.Serializable {
034
035 protected int[] m_dims;
036
037 protected double[] m_vals;
038
039 public FeatureVector(int size) {
040 this.m_dims = new int[size];
041 this.m_vals = new double[size];
042 }
043
044 public FeatureVector(int[] dims, double[] vals) {
045 this.m_dims = dims;
046 this.m_vals = vals;
047 }
048
049 /**
050 * Returns the cosine similarity between two feature vectors.
051 */
052 public double getCosine(FeatureVector v) {
053 double cosine = 0.0;
054 int dim;
055 double q_i, d_i;
056 for (int i = 0; i < Math.min(this.size(), v.size()); i++) {
057 dim = v.getDimAt(i);
058 q_i = v.getValueAt(dim);
059 d_i = this.getValueAt(dim);
060 cosine += q_i * d_i;
061 }
062 return cosine / (this.getL2Norm() * v.getL2Norm());
063 }
064
065 public int getDimAt(int index) {
066 return m_dims[index];
067 }
068
069 /**
070 * Returns the linear norm factor of this vector's values (i.e., the sum of
071 * it's values).
072 */
073 public double getL1Norm() {
074 double sum = 0.0;
075 for (int i = 0; i < m_vals.length; i++) {
076 sum += m_vals[i];
077 }
078 return sum;
079 }
080
081 /**
082 * Returns the L2 norm factor of this vector's values.
083 */
084 public double getL2Norm() {
085 double square_sum = 0.0;
086 for (int i = 0; i < m_vals.length; i++) {
087 square_sum += (m_vals[i] * m_vals[i]);
088 }
089 return Math.sqrt(square_sum);
090 }
091
092 public double getValueAt(int index) {
093 return m_vals[index];
094 }
095
096 /**
097 * Performs a linear normalization to the value 1.
098 */
099 public void normalizeL1() {
100 normalizeL1(getL1Norm());
101 }
102
103 /**
104 * Performs a linear normalization to the given norm value.
105 */
106 public void normalizeL1(double norm) {
107 for (int i = 0; i < m_vals.length; i++) {
108 if (m_vals[i] > 0) {
109 m_vals[i] /= norm;
110 }
111 }
112 }
113
114 /**
115 * Performs an L2 normalization to the value 1.
116 */
117 public void normalizeL2() {
118 double norm = Math.pow(getL2Norm(), 2);
119 for (int i = 0; i < m_vals.length; i++) {
120 m_vals[i] = Math.pow(m_vals[i], 2) / norm;
121 }
122 }
123
124 public void setFeatures(int[] dims, double[] vals) {
125 this.m_dims = dims;
126 this.m_vals = vals;
127 }
128
129 public int size() {
130 return m_dims.length;
131 }
132
133 public String toString() {
134 String s = "";
135 for (int i = 0; i < m_vals.length; i++) {
136 s += "" + m_dims[i] + ":" + m_vals[i] + ""
137 + (i < m_vals.length - 1 ? " " : "");
138 }
139 s += "";
140 return s;
141 }
142 }