@@ -95,22 +95,49 @@ def main():
95
95
print (f"Input file: { input_file } " )
96
96
print (f"Output file: { output_file } " )
97
97
98
- # Read names from input file and generate CSV
99
- with open (input_file , 'r' ) as infile , open (output_file , 'w' , newline = '' ) as outfile :
98
+ # Read names from input file and collect unique IDs
99
+ seen_ids = set ()
100
+ unique_entries = []
101
+ duplicate_names = []
102
+
103
+ with open (input_file , 'r' ) as infile :
104
+ for line_num , line in enumerate (infile , 1 ):
105
+ name = line .strip ()
106
+ if name : # Skip empty lines
107
+ name_id = keccak256_to_uint (name )
108
+ if name_id not in seen_ids :
109
+ seen_ids .add (name_id )
110
+ unique_entries .append ((name_id , duration_seconds , name ))
111
+ else :
112
+ duplicate_names .append ((line_num , name ))
113
+
114
+ # Write CSV with unique entries only
115
+ with open (output_file , 'w' , newline = '' ) as outfile :
100
116
csv_writer = csv .writer (outfile )
101
117
102
118
# Write header
103
119
csv_writer .writerow (['id' , 'duration' ])
104
120
105
- # Process each name
106
- for line in infile :
107
- name = line .strip ()
108
- if name : # Skip empty lines
109
- name_id = keccak256_to_uint (name )
110
- csv_writer .writerow ([name_id , duration_seconds ])
121
+ # Write unique entries
122
+ for name_id , duration , _ in unique_entries :
123
+ csv_writer .writerow ([name_id , duration ])
124
+
125
+ # Report results
126
+ total_names = sum (1 for line in open (input_file ) if line .strip ())
127
+ unique_count = len (unique_entries )
128
+ duplicate_count = len (duplicate_names )
111
129
112
130
print (f"CSV file generated: { output_file } " )
113
- print (f"Processed { sum (1 for line in open (input_file ) if line .strip ())} names" )
131
+ print (f"Total names processed: { total_names } " )
132
+ print (f"Unique entries written: { unique_count } " )
133
+ print (f"Duplicates found and skipped: { duplicate_count } " )
134
+
135
+ if duplicate_names :
136
+ print ("\n Duplicate names found:" )
137
+ for line_num , name in duplicate_names :
138
+ print (f" Line { line_num } : { name } " )
139
+ else :
140
+ print ("No duplicates found." )
114
141
return 0
115
142
116
143
if __name__ == "__main__" :
0 commit comments