View Javadoc

1   /**
2    * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3    */
4   package net.sourceforge.pmd.cpd;
5   
6   import java.io.File;
7   import java.io.FileNotFoundException;
8   import java.io.IOException;
9   import java.util.HashMap;
10  import java.util.HashSet;
11  import java.util.Iterator;
12  import java.util.List;
13  import java.util.Map;
14  import java.util.Set;
15  
16  import net.sourceforge.pmd.util.FileFinder;
17  
18  public class CPD {
19  
20      private Map<String, SourceCode> source = new HashMap<String, SourceCode>();
21      private CPDListener listener = new CPDNullListener();
22      private Tokens tokens = new Tokens();
23      private int minimumTileSize;
24      private MatchAlgorithm matchAlgorithm;
25      private Language language;
26      private boolean skipDuplicates;
27      public static boolean debugEnable = false;
28      private String encoding = System.getProperty("file.encoding");
29  
30  
31      public CPD(int minimumTileSize, Language language) {
32          this.minimumTileSize = minimumTileSize;
33          this.language = language;
34      }
35  
36      public void skipDuplicates() {
37          this.skipDuplicates = true;
38      }
39  
40      public void setCpdListener(CPDListener cpdListener) {
41          this.listener = cpdListener;
42      }
43  
44      public void setEncoding(String encoding) {
45          this.encoding = encoding;
46      }
47  
48      public void go() {
49          TokenEntry.clearImages();
50          matchAlgorithm = new MatchAlgorithm(source, tokens, minimumTileSize, listener);
51          matchAlgorithm.findMatches();
52      }
53  
54      public Iterator<Match> getMatches() {
55          return matchAlgorithm.matches();
56      }
57  
58      public void add(File file) throws IOException {
59          add(1, file);
60      }
61  
62      public void addAllInDirectory(String dir) throws IOException {
63          addDirectory(dir, false);
64      }
65  
66      public void addRecursively(String dir) throws IOException {
67          addDirectory(dir, true);
68      }
69  
70      public void add(List<File> files) throws IOException {
71          for (File f: files) {
72              add(files.size(), f);
73          }
74      }
75  
76      private void addDirectory(String dir, boolean recurse) throws IOException {
77          if (!(new File(dir)).exists()) {
78              throw new FileNotFoundException("Couldn't find directory " + dir);
79          }
80          FileFinder finder = new FileFinder();
81          // TODO - could use SourceFileSelector here
82          add(finder.findFilesFrom(dir, language.getFileFilter(), recurse));
83      }
84  
85      private Set<String> current = new HashSet<String>();
86  
87      private void add(int fileCount, File file) throws IOException {
88  
89          if (skipDuplicates) {
90              // TODO refactor this thing into a separate class
91              String signature = file.getName() + '_' + file.length();
92              if (current.contains(signature)) {
93                  System.err.println("Skipping " + file.getAbsolutePath() + " since it appears to be a duplicate file and --skip-duplicate-files is set");
94                  return;
95              }
96              current.add(signature);
97          }
98  
99          if (!file.getCanonicalPath().equals(new File(file.getAbsolutePath()).getCanonicalPath())) {
100             System.err.println("Skipping " + file + " since it appears to be a symlink");
101             return;
102         }
103 
104         listener.addedFile(fileCount, file);
105         SourceCode sourceCode = new SourceCode(new SourceCode.FileCodeLoader(file, encoding));
106         language.getTokenizer().tokenize(sourceCode, tokens);
107         source.put(sourceCode.getFileName(), sourceCode);
108     }
109 
110     public static Renderer getRendererFromString(String name, String encoding) {
111         if (name.equalsIgnoreCase("text") || name.equals("")) {
112             return new SimpleRenderer();
113         } else if ("xml".equals(name)) {
114             return new XMLRenderer(encoding);
115         }  else if ("csv".equals(name)) {
116             return new CSVRenderer();
117         }  else if ("vs".equals(name)) {
118             return new VSRenderer();
119         }
120         try {
121             return (Renderer) Class.forName(name).newInstance();
122         } catch (Exception e) {
123             System.out.println("Can't find class '" + name + "', defaulting to SimpleRenderer.");
124         }
125         return new SimpleRenderer();
126     }
127 
128     private static boolean findBooleanSwitch(String[] args, String name) {
129         for (int i = 0; i < args.length; i++) {
130             if (args[i].equals(name)) {
131                 return true;
132             }
133         }
134         return false;
135     }
136 
137     private static String findRequiredStringValue(String[] args, String name) {
138         for (int i = 0; i < args.length; i++) {
139             if (args[i].equals(name)) {
140                 return args[i + 1];
141             }
142         }
143         System.out.println("No " + name + " value passed in");
144         usage();
145         throw new RuntimeException();
146     }
147 
148     private static String findOptionalStringValue(String[] args, String name, String defaultValue) {
149         for (int i = 0; i < args.length; i++) {
150             if (args[i].equals(name)) {
151                 return args[i + 1];
152             }
153         }
154         return defaultValue;
155     }
156 
157     public static void main(String[] args) {
158         if (args.length == 0) {
159             usage();
160         }
161 
162         try {
163             boolean skipDuplicateFiles = findBooleanSwitch(args, "--skip-duplicate-files");
164             String languageString = findOptionalStringValue(args, "--language", "java");
165             String formatString = findOptionalStringValue(args, "--format", "text");
166             String encodingString = findOptionalStringValue(args, "--encoding", System.getProperty("file.encoding"));
167             int minimumTokens = Integer.parseInt(findRequiredStringValue(args, "--minimum-tokens"));
168             LanguageFactory f = new LanguageFactory();
169             Language language = f.createLanguage(languageString);
170             Renderer renderer = CPD.getRendererFromString(formatString, encodingString);
171             CPD cpd = new CPD(minimumTokens, language);
172             cpd.setEncoding(encodingString);
173             if (skipDuplicateFiles) {
174                 cpd.skipDuplicates();
175             }
176             /* FIXME: Improve this !!!	*/
177             boolean missingFiles = true;
178             for (int position = 0; position < args.length; position++) {
179                 if (args[position].equals("--files")) {
180                 	cpd.addRecursively(args[position + 1]);
181                 	if ( missingFiles ) {
182                         missingFiles = false;
183                     }
184                 }
185             }
186 
187             if ( missingFiles ) {
188 	            System.out.println("No " + "--files" + " value passed in");
189 	            usage();
190 	            throw new RuntimeException();
191             }
192 
193             cpd.go();
194             System.out.println(renderer.render(cpd.getMatches()));
195         } catch (Exception e) {
196             e.printStackTrace();
197         }
198     }
199 
200     private static void usage() {
201         System.out.println("Usage:");
202         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens xxx --files xxx [--language xxx] [--encoding xxx] [--format (xml|text|csv|vs)] [--skip-duplicate-files] ");
203         System.out.println("i.e: ");
204         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files c:\\jdk14\\src\\java ");
205         System.out.println("or: ");
206         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --files /path/to/c/code --language c ");
207         System.out.println("or: ");
208         System.out.println(" java net.sourceforge.pmd.cpd.CPD --minimum-tokens 100 --encoding UTF-16LE --files /path/to/java/code --format xml");
209     }
210 
211 }