View Javadoc

1   package org.kit.furia.fragment;
2   
3   import java.io.StringReader;
4   import java.util.HashMap;
5   import java.util.Iterator;
6   import java.util.LinkedList;
7   import java.util.List;
8   import java.util.ListIterator;
9   import java.util.Map;
10  
11  import org.ajmm.obsearch.exception.OBException;
12  import org.ajmm.obsearch.ob.OBShort;
13  import org.kit.furia.misc.IntegerHolder;
14  
15  import com.sleepycat.bind.tuple.TupleInput;
16  import com.sleepycat.bind.tuple.TupleOutput;
17  
18  /*
19   Furia-chan: An Open Source software license violation detector.    
20   Copyright (C) 2008 Kyushu Institute of Technology
21  
22   This program is free software: you can redistribute it and/or modify
23   it under the terms of the GNU General Public License as published by
24   the Free Software Foundation, either version 3 of the License, or
25   (at your option) any later version.
26  
27   This program is distributed in the hope that it will be useful,
28   but WITHOUT ANY WARRANTY; without even the implied warranty of
29   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
30   GNU General Public License for more details.
31  
32   You should have received a copy of the GNU General Public License
33   along with this program.  If not, see <http://www.gnu.org/licenses/>.
34   */
35  
36  /**
37   * Implementation of the mtd algorithm. This algorithm exploits hash codes from
38   * string representation of the tree and its complete subtrees. Additionally, it
39   * exploits the fact that once two complete subtrees j and m that belong
40   * respectively to trees T1 and T2, the intersection of j and m and all their
41   * complete subtrees can be calculated in linear time. This requires to have a
42   * value of multiplicity in each node of T1 and T2.
43   * So the expected running time is around O(|T1|) (where |T1| the size of the smallest tree)
44   * @author Arnoldo Jose Muller Molina
45   */
46  
47  public final class OBFragment implements OBShort {
48  
49      private Map < MTDFragmentAST, Tuple > mapS;
50  
51      private Map < String, IntegerHolder > mapN;
52  
53      private int maxId;
54  
55      private MTDFragmentAST tree;
56      
57      public OBFragment(){
58          
59      }
60  
61      public OBFragment(String x) throws OBException {
62          updateTree(x);
63      }
64  
65      /**
66       * Internal method that updates the Tree from the String
67       * @throws OBException
68       */
69      protected final void updateTree(String x) throws OBException {
70          tree = parseTree(x);
71  
72          mapS = new HashMap < MTDFragmentAST, Tuple >();
73          mapN = new HashMap < String, IntegerHolder >();
74          decorate(tree, new IntegerHolder(0));
75      }
76  
77      public static final  MTDFragmentAST parseTree(String x)
78              throws FragmentParseException {
79          try {
80              FragmentLexer lexer = new FragmentLexer(new StringReader(x));
81              FragmentParser parser = new FragmentParser(lexer);
82              parser.setASTNodeClass("org.kit.furia.fragment.MTDFragmentAST");
83              parser.fragment();
84              MTDFragmentAST t = (MTDFragmentAST) parser.getAST();
85              t.update();
86              return t;
87          } catch (Exception e) {
88              throw new FragmentParseException(x, e);
89          }
90      }
91  
92      /**
93       * Returns the size (in nodes) of the tree.
94       * @return The size of the tree.
95       * @throws OBException
96       *                 If something goes wrong.
97       */
98      public final int size() throws OBException {
99          return tree.getSize();
100     }
101 
102     /**
103      * @return A String representation of the tree.
104      */
105     public final String toString() {
106         String res = ":(";
107         try {
108             res = tree.toFuriaChanTree() + "|" + tree.getSize() + "|";
109         } catch (Exception e) {
110             assert false;
111         }
112         return res;
113     }
114 
115     /**
116      * Re-creates this object from the given byte stream
117      * @param in
118      *                A byte stream with the data that must be loaded.
119      * @see org.ajmm.obsearch.Storable#load(com.sleepycat.bind.tuple.TupleInput)
120      */
121     public final void load(TupleInput in) throws OBException {
122         short size = in.readShort();
123         updateTree(in.readBytes(size));
124     }
125 
126     /**
127      * Stores this object into the given byte stream.
128      * @param out
129      *                The byte stream to be used
130      * @see org.ajmm.obsearch.Storable#store(com.sleepycat.bind.tuple.TupleOutput)
131      */
132     public final void store(TupleOutput out) {
133         String str = tree.toFuriaChanTree();
134         out.writeShort(str.length());
135         out.writeBytes(str);
136     }
137 
138     private void decorate(MTDFragmentAST t, IntegerHolder id) {
139         if (t == null) {
140             return;
141         }
142         t.update();
143         Tuple tMapped = mapS.get(t);
144         if (tMapped != null) {
145             assert tMapped.tree.hashCode() == t.hashCode();
146             tMapped.repetitions.inc();
147 
148         } else {
149             tMapped = new Tuple();
150             tMapped.repetitions = new IntegerHolder(1);
151             tMapped.id = id.getValue();
152             tMapped.tree = t;
153             mapS.put(t, tMapped);
154             id.inc();
155             maxId = id.getValue();
156         }
157         t.id = tMapped.id;
158         t.repetitions = tMapped.repetitions;
159         assert mapS.get(t).tree.equals(t) : mapS.get(t).tree.prettyPrint()
160                 + " != " + t.prettyPrint();
161         assert t.equals(tMapped.tree);
162         IntegerHolder node = mapN.get(t.getText());
163         if (node != null) {
164             node.inc();
165         } else {
166             mapN.put(t.getText(), new IntegerHolder(1));
167         }
168         decorate(t.getLeft(), id);
169         decorate(t.getSibbling(), id);
170     }
171 
172     public final short distance(OBShort other) {
173         OBFragment b = (OBFragment) other;
174         // use the smallest tree for the match!
175         if (this.tree.getSize() < b.tree.getSize()) {
176             return this.mtd( b);
177         } else {
178             return b.mtd( this);
179         }
180         /*if (this.tree.getSize() < b.tree.getSize()) {
181             return (short)this.ds( b);
182         } else {
183             return (short)b.ds( this);
184         }*/
185     }
186 
187     public final short mtd(OBFragment other) {
188 
189         int res =  (ds(other) + dn(other) ) / 2;
190         //assert res == distance(this.tree, other.tree);
191         return (short)res;
192     }
193 
194     private int dnAux(OBFragment other) {
195         int res = 0;
196         Iterator < Map.Entry < String, IntegerHolder >> keysIt = mapN
197                 .entrySet().iterator();
198         while (keysIt.hasNext()) {
199             Map.Entry < String, IntegerHolder > entry = keysIt.next();
200             IntegerHolder n2 = other.mapN.get(entry.getKey());
201             if (n2 != null) {
202                 IntegerHolder n1 = entry.getValue();
203                 res += Math.min(n1.getValue(), n2.getValue());
204             }
205         }
206         return res;
207     }
208 
209     public int ds(OBFragment other) {
210         boolean[] visited = new boolean[maxId];
211         int res = ds(tree, other, visited);
212         return (tree.getSize() + other.tree.getSize()) - (2 * res);
213     }
214 
215     public int dn(OBFragment other) {
216         int res = dnAux(other);
217         return (tree.getSize() + other.tree.getSize()) - (2 * res);
218     }
219 
220     /*
221      * public int ftedAux(MTDFragmentASTHolder other) { boolean[] visited = new
222      * boolean[maxId]; int res = ds(tree, other, visited); return
223      * (tree.getSize() + other.tree.getSize()) - (2 * res); }
224      */
225 
226     private int ds(MTDFragmentAST current, OBFragment other, boolean[] visited) {
227         if (current == null) {
228             return 0;
229         }
230         int res = 0;
231         if (!visited[current.id]) {
232 
233             Tuple m2 = other.mapS.get(current);
234 
235             if (m2 != null) {
236                 assert current.equals(m2.tree) : " Tree A:"
237                         + current.prettyPrint() + " Tree B: "
238                         + m2.tree.prettyPrint() + " hash A "
239                         + current.hashCode() + " hash B " + m2.tree.hashCode();
240                 res = update(current, m2.tree, visited);
241             } else {
242                 res = ds(current.getLeft(), other, visited);
243             }
244         }
245         return res + ds(current.getSibbling(), other, visited);
246     }
247 
248     private int update(MTDFragmentAST current, MTDFragmentAST current2,
249             boolean[] visited) {
250         assert current != null;
251         if (visited[current.id]) {
252             return 0;
253         }
254         visited[current.id] = true;
255         int res = Math.min(current.repetitions.getValue(), current2.repetitions
256                 .getValue());
257         return res + updateAux(current.getLeft(), current2.getLeft(), visited);
258     }
259 
260     private int updateAux(MTDFragmentAST current, MTDFragmentAST current2,
261             boolean[] visited) {
262         if (current == null) {
263             return 0;
264         }
265         assert current != null && current2 != null;
266         int res = 0, res1 = 0, res2 = 0;
267 
268         if (!visited[current.id]) {
269 
270             visited[current.id] = true;
271             res = Math.min(current.repetitions.getValue(), current2.repetitions
272                     .getValue());
273             res1 = updateAux(current.getLeft(), current2.getLeft(), visited);
274 
275         }
276         res2 = updateAux(current.getSibbling(), current2.getSibbling(), visited);
277         return res + res1 + res2;
278 
279     }
280 
281     private class Tuple {
282         public MTDFragmentAST tree;
283 
284         public int id;
285 
286         public IntegerHolder repetitions;
287 
288         public String toString() {
289             return tree.toString() + " " + id + " " + repetitions.getValue();
290         }
291     }
292 
293     public MTDFragmentAST getTree() {
294         return tree;
295     }
296 
297     public void setTree(MTDFragmentAST tree) {
298         this.tree = tree;
299     }
300 
301     /**
302      * Returns true of this.tree.equals(obj.tree). For this distance function
303      * this.distance(obj) == 0 implies that this.equals(obj) == true
304      * @param obj
305      *                Object to compare.
306      * @return true if this == obj
307      */
308     public final boolean equals(final Object obj) {
309         return tree.equals(obj);
310     }
311 
312     /**
313      * A hashCode based on the string representation of the tree.
314      * @return a hash code of the string representation of this tree.
315      */
316     public final int hashCode() {
317         return this.tree.hashCode();
318     }
319     
320     /**
321      * Calculates the distance between trees a and b. Used to validate the
322      * distance implementation. Only to be executed in assert mode.
323      * @param a
324      *                The first tree (should be smaller than b)
325      * @param b
326      *                The second tree
327      * @return The distance of the trees.
328      */
329     private final short distance(FragmentAST a, FragmentAST b) {
330 
331         List < FragmentAST > aExpanded = a.depthFirst();
332         List < FragmentAST > bExpanded = b.depthFirst();
333         List < FragmentAST > bExpanded2 = new LinkedList < FragmentAST >();
334         bExpanded2.addAll(bExpanded);
335         int Na = aExpanded.size() * 2;
336         int Nb = bExpanded.size() * 2;
337 
338         ListIterator < FragmentAST > ait = aExpanded.listIterator();
339         int res = 0;
340         while (ait.hasNext()) {
341             FragmentAST aTree = ait.next();
342             ListIterator < FragmentAST > bit = bExpanded.listIterator();
343             while (bit.hasNext()) {
344                 FragmentAST bTree = bit.next();
345                 if (aTree.equalsTree(bTree)) {
346                     res++;
347                     bit.remove();
348                     break;
349                 }
350             }
351             // do the same for the nodes without children
352             bit = bExpanded2.listIterator();
353             while (bit.hasNext()) {
354                 FragmentAST bTree = bit.next();
355                 if (aTree.getText().equals(bTree.getText())) {
356                     res++;
357                     bit.remove();
358                     break;
359                 }
360             }
361         }
362         // return Na - res + Nb - res;
363         // return (Na + Nb) - ( 2 * res);
364         short r = (short) (((Na + Nb) - (2 * res)) / 2);
365         assert r >= 0;
366         return r;
367     }
368 
369 }