View Javadoc

1   package org.kit.furia.io;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileReader;
6   import java.io.FilenameFilter;
7   import java.io.IOException;
8   import java.util.Iterator;
9   import java.util.NoSuchElementException;
10  
11  import org.ajmm.obsearch.OB;
12  import org.ajmm.obsearch.exception.OBException;
13  import org.apache.log4j.Logger;
14  import org.kit.furia.Document;
15  
16  /*
17   Furia-chan: An Open Source software license violation detector.    
18   Copyright (C) 2007 Kyushu Institute of Technology
19  
20   This program is free software: you can redistribute it and/or modify
21   it under the terms of the GNU General Public License as published by
22   the Free Software Foundation, either version 3 of the License, or
23   (at your option) any later version.
24  
25   This program is distributed in the hope that it will be useful,
26   but WITHOUT ANY WARRANTY; without even the implied warranty of
27   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28   GNU General Public License for more details.
29  
30   You should have received a copy of the GNU General Public License
31   along with this program.  If not, see <http://www.gnu.org/licenses/>.
32   */
33  
34  /**
35   * AbstractFuriaInput is in charge of reading fragment files and creating
36   * documents out of them.
37   * @author Arnoldo Jose Muller Molina
38   * @since 0
39   */
40  
41  public abstract class AbstractFuriaInput < O extends OB > {
42  
43      private static final Logger logger = Logger.getLogger(AbstractFuriaInput.class);
44  
45      private File directory;
46      
47      /**
48       * Creates a new fragment file reader based on the given directory.
49       * @param directory
50       */
51      public AbstractFuriaInput(File directory){
52          this.directory = directory;
53      }
54      
55      /**
56       * Reads and creates an O object from the given string.
57       * @param data
58       *                The string to be parsed
59       * @return an O object that was created from data
60       * @throws OBException if something goes wrong when parsing the data.
61       */
62      protected abstract O readObjectFromStringLine(String data) throws OBException;
63  
64      /**
65       * The name of the file that holds the fragments (words) inside a directory.
66       */
67      public static final String fragmentFileName = "fragments";
68  
69      /**
70       * This method receives a directory and returns an iterator that will lazily
71       * create documents from the given directory. The directory is composed of
72       * directories in which a file called "fragments" was previously created.
73       * @param directory
74       *                that will be processed
75       * @throws IOException
76       *                 If the given directory does not exist.
77       * @return An iterator that will return one by one all the documents found
78       *         in the given directory.
79       */
80      public Iterator < Document < O >> getDocumentsFromDirectory()
81              throws IOException {
82          if (!directory.exists()) {
83              throw new IOException("Directory does not exist: " + directory);
84          }
85          return new FuriaInputIterator(directory);
86      }
87  
88      /**
89       * Parses a file that is in the furia-chan fragment file format: "#" starts
90       * a comment and it is ignored. Every object is a string separated by a
91       * newline. The subclass knows how to interpret this line, and an
92       * appropriate O object will be generated from this line.
93       * @param fragments
94       *                A file in which fragment files can be found.
95       * @param id
96       *                The id that the document will hold.
97       * @return A document of O objects created from the given file.
98       * @throws IOException
99       *                 If fragments does not exist, or any other error occurs.
100      */
101     public Document < O > getDocument(String id, File fragments)
102             throws IOException, OBException {
103         if (!fragments.exists()) {
104             throw new IOException("File does not exist: " + fragments);
105         }
106         Document < O > doc = new Document < O >(id);
107         BufferedReader r = new BufferedReader(new FileReader(fragments));
108         String re = r.readLine();
109         while (re != null) {
110             if (isParsableLine(re)) {
111                 String[] tuple = re.split("\t");
112                 int multiplicity = Integer.parseInt(tuple[0]);
113                 O word = readObjectFromStringLine(tuple[1]);
114                 doc.setWord(word,multiplicity);
115             }
116             re = r.readLine();
117         }
118         r.close();
119         return doc;
120     }
121 
122     /**
123      * Returns true if the given line is not null or if it is not a comment.
124      * @return true if the given line can be parsed.
125      */
126     public boolean isParsableLine(final String line) {
127         return  !("".equals(line.trim()) || (line.startsWith("#")));
128     }
129 
130     /**
131      * Iterator class that creates Documents from the given directory.
132      * @author Arnoldo Jose Muller Molina
133      *
134      */
135     private class FuriaInputIterator implements Iterator < Document < O >> {
136         
137         private class FragmentsFileFilter implements FilenameFilter{
138 
139             public boolean accept(File dir, String name) {
140                 return name.equals(fragmentFileName);
141             }            
142         }
143         
144         private FragmentsFileFilter  fileFilter = new FragmentsFileFilter();
145         
146         /**
147          * The documents that will be lazily processed.
148          */
149         private File[] documents;
150 
151         /**
152          * The current index that will be processed.
153          */
154         private int i;
155 
156         /**
157          * Builds an iterator of applications. If the given directory
158          * has a "fragments" file, then the program works in Single app mode.
159          * Otherwise, we run in directory of applications mode.
160          * @param directory
161          */
162         FuriaInputIterator(File directory) {
163             if(directoryOfDirectoriesMode(directory)){
164                 documents = directory.listFiles();
165             }else{
166                 documents = new File[1];
167                 documents[0] = directory;
168             }
169            
170             i = 0;
171             moveTapeToNextValidDocument();
172         }
173         
174         /**
175          * Returns true if the given directory does not have a fragments file.
176          * This means that we will operate on a directory of directories.
177          * @param directory
178          * @return
179          */
180         private boolean directoryOfDirectoriesMode(File directory){
181             File [] all = directory.listFiles(fileFilter);            
182             return all.length !=1;
183         }
184 
185         /**
186          * Moves to the next valid document.
187          */
188         private void moveTapeToNextValidDocument() {
189             while (i < documents.length) {
190                 if (documents[i].isDirectory()) {
191                     File data = new File(documents[i], fragmentFileName);
192                     if (data.exists()) {
193                         break;
194                     }
195                 }
196                 i++;
197             }
198         }
199 
200         /**
201          * Returns the next document.
202          * @return The next document.
203          * @throws NoSuchElementException
204          *                 if {@link #hasNext()} == false or if the current
205          *                 element could not be processed.
206          */
207         public Document < O > next() {
208             if (!hasNext()) {
209                 throw new NoSuchElementException("No more elements!");
210             }
211             File data = new File(documents[i], fragmentFileName);
212             String name = documents[i].getName();
213             Document < O > res = null;
214             try {
215                 res = getDocument(name, data);
216                 i++;
217                 moveTapeToNextValidDocument();
218             } catch (Exception e) {
219                 if(logger.isDebugEnabled()){
220                     logger.debug(e);
221                 }
222                 throw new NoSuchElementException(res.toString());
223             }
224             return res;
225         }
226 
227         /**
228          * The remove operation does not make sense in this Iterator. This
229          * method does not do anything.
230          */
231         public void remove() {
232             assert false;
233         }
234 
235         public boolean hasNext() {
236             return i < documents.length;
237         }
238     }
239 }