Download- Smile.zip -3.16 Mb- -

# 4. CSV inspection (first few rows) csv_summaries = {} for p in ROOT.rglob('*.csv'): try: df = pd.read_csv(p) csv_summaries[str(p.relative_to(ROOT))] = 'rows': len(df), 'cols': len(df.columns), 'col_names': list(df.columns), 'missing_perc': (df.isna().mean()*100).to_dict() except Exception as e: csv_summaries[str(p)] = 'error': str(e)

# 2. SHA256 hashes (detect duplicates) hashes = {} for p in ROOT.rglob('*'): if p.is_file(): h = hashlib.sha256() with p.open('rb') as f: while chunk := f.read(8192): h.update(chunk) dig = h.hexdigest() hashes.setdefault(dig, []).append(p.relative_to(ROOT)) Download- smile.zip -3.16 MB-

out['csv_summaries'] = csv_summaries