forked from karar-hayder/Replicheck
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
130 lines (109 loc) · 3.34 KB
/
cli.py
File metadata and controls
130 lines (109 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python3
import argparse
import sys
from pathlib import Path
from tqdm import tqdm
from replicheck import CodeParser, DuplicateDetector, Reporter
from replicheck.utils import find_files
def parse_args():
parser = argparse.ArgumentParser(
description="Replicheck - Code Duplication Detection Tool"
)
parser.add_argument(
"--path",
required=True,
help="Directory to analyze",
)
parser.add_argument(
"--min-sim",
type=float,
default=0.8,
help="Minimum similarity threshold (0.0 to 1.0)",
)
parser.add_argument(
"--min-size",
type=int,
default=50,
help="Minimum size of code blocks to compare (in tokens)",
)
parser.add_argument(
"--extensions",
default=".py,.js,.jsx",
help="Comma-separated file extensions to analyze",
)
parser.add_argument(
"--output-format",
choices=["text", "json"],
default="text",
help="Output format (text or json)",
)
parser.add_argument(
"--output-file",
help="Path to save the report (if not specified, prints to console)",
)
parser.add_argument(
"--ignore-dirs",
nargs="+",
default=[
".git",
".venv",
"venv",
"env",
"ENV",
"build",
"dist",
"node_modules",
],
help="Directories to ignore",
)
return parser.parse_args()
def main():
args = parse_args()
try:
path = Path(args.path)
if not path.exists():
print(f"Error: Path '{path}' does not exist")
return 1
parser = CodeParser()
detector = DuplicateDetector(
min_similarity=args.min_sim, min_size=args.min_size
)
reporter = Reporter(output_format=args.output_format)
print("\nFinding files...")
files = find_files(
path,
extensions=set(args.extensions.split(",")),
ignore_dirs=args.ignore_dirs,
)
print(f"Found {len(files)} files to analyze")
if not files:
print("No files found to analyze. Check your path and extensions.")
return 0
print("\nParsing files...")
all_blocks = []
for file_path in tqdm(files, desc="Parsing"):
try:
blocks = parser.parse_file(file_path)
all_blocks.extend(blocks)
except Exception as e:
print(f"Warning: Error parsing {file_path}: {e}")
print(f"\nFound {len(all_blocks)} code blocks to analyze")
if not all_blocks:
print("No code blocks found to analyze.")
return 0
print("\nAnalyzing code blocks...")
duplicates = detector.find_duplicates(all_blocks)
output_path = Path(args.output_file) if args.output_file else None
reporter.generate_report(duplicates, output_path)
if duplicates:
print(f"\nFound {len(duplicates)} duplicate code blocks")
if output_path:
print(f"Report written to: {output_path}")
else:
print("\nNo duplicate code blocks found")
return 0
except Exception as e:
print(f"Error: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())