#!/usr/bin/env python3 """ Employee Comparison Report Generator Generates a detailed side-by-side HTML report for 2-3 employees. Usage: python3 compare_employees.py --users "Name1" "Name2" "Name3" --months 3 python3 compare_employees.py --users "Name1" "Name2" --start 2025-11-01 --end 2026-02-18 """ import csv import argparse import json from pathlib import Path from datetime import datetime, timedelta from collections import defaultdict from typing import Dict, List, Tuple, Optional RAW_DATA_DIR = Path("data/raw") OUTPUT_DIR = Path("reports") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) USER_COLORS = [ {'bg': 'rgba(102, 126, 234, 0.8)', 'border': '#667eea', 'light': '#eef0fd'}, {'bg': 'rgba(249, 115, 22, 0.8)', 'border': '#f97316', 'light': '#fff4ee'}, {'bg': 'rgba(16, 185, 129, 0.8)', 'border': '#10b981', 'light': '#ecfdf5'}, ] # --------------------------------------------------------------------------- # Data loading & analysis (mirrors generate_employee_report.py conventions) # --------------------------------------------------------------------------- def get_date_range(months: int = 3, start_date: str = None, end_date: str = None): if start_date and end_date: return (datetime.strptime(start_date, '%Y-%m-%d').date(), datetime.strptime(end_date, '%Y-%m-%d').date()) end = datetime.now().date() start = end - timedelta(days=(months or 3) * 30) return start, end def load_user_data(user_name: str, start_date, end_date) -> List[Dict]: all_data = [] for csv_file in sorted(RAW_DATA_DIR.glob("*.csv")): stem = csv_file.stem if '_' not in stem: continue date_str = stem.split('_')[-1] try: file_date = datetime.strptime(date_str, '%Y-%m-%d').date() if not (start_date <= file_date <= end_date): continue with open(csv_file, 'r') as f: for row in csv.DictReader(f): if row.get('user_name') == user_name or row.get('user_id') == user_name: row['date'] = date_str row['file_date'] = file_date ts = row.get('timestamp', '') if ts: try: tp = ts.split('T')[1] if 'T' in ts else (ts.split(' ')[1] if ' ' in ts else ts) clean = tp.split('+')[0].split('-')[0].split('.')[0].split('Z')[0] if ':' in clean: p = clean.split(':') row['time'] = f"{p[0].zfill(2)}:{p[1].zfill(2)}" except Exception: row['time'] = '' all_data.append(row) except Exception: continue return all_data def analyze_daily_activity(data: List[Dict]) -> List[Dict]: daily = defaultdict(lambda: { 'date': '', 'day_of_week': '', 'total_minutes': 0, 'active_minutes': 0, 'first_active': None, 'last_active': None, }) for row in data: d = row.get('date') if not d: continue s = daily[d] s['date'] = d s['day_of_week'] = datetime.strptime(d, '%Y-%m-%d').strftime('%A') s['total_minutes'] += 1 if row.get('presence') == 'active': s['active_minutes'] += 1 t = row.get('time', '') if t: if not s['first_active'] or t < s['first_active']: s['first_active'] = t if not s['last_active'] or t > s['last_active']: s['last_active'] = t result = [] for d, s in sorted(daily.items()): s['active_hours'] = round(s['active_minutes'] / 60, 2) s['activity_rate'] = ( round(s['active_minutes'] / s['total_minutes'] * 100, 1) if s['total_minutes'] else 0 ) result.append(s) return result def group_by_week(daily_stats: List[Dict]) -> List[Dict]: if not daily_stats: return [] weeks, cur, cur_num = [], [], None for day in daily_stats: wn = datetime.strptime(day['date'], '%Y-%m-%d').date().isocalendar()[1] if cur_num is None: cur_num = wn if wn != cur_num: if cur: weeks.append(_make_week(cur_num, cur)) cur, cur_num = [], wn cur.append(day) if cur: weeks.append(_make_week(cur_num, cur)) return weeks def _make_week(week_num, days): return { 'week_num': week_num, 'start_date': days[0]['date'], 'end_date': days[-1]['date'], 'days': days, 'total_hours': round(sum(d['active_hours'] for d in days), 2), 'working_days': len([d for d in days if d['active_hours'] > 0]), } def group_by_month(daily_stats: List[Dict]) -> List[Dict]: if not daily_stats: return [] months: Dict[str, list] = defaultdict(list) for day in daily_stats: months[day['date'][:7]].append(day) result = [] for mk, days in sorted(months.items()): weeks = group_by_week(days) result.append({ 'month': mk, 'month_name': datetime.strptime(mk, '%Y-%m').strftime('%B %Y'), 'days': days, 'weeks': weeks, 'total_hours': round(sum(d['active_hours'] for d in days), 2), 'avg_hours_per_day': round(sum(d['active_hours'] for d in days) / len(days), 2) if days else 0, 'working_days': len([d for d in days if d['active_hours'] > 0]), 'total_days': len(days), }) return result def calculate_time_patterns(daily_stats: List[Dict]) -> Dict: starts = sorted(d['first_active'] for d in daily_stats if d.get('first_active')) ends = sorted(d['last_active'] for d in daily_stats if d.get('last_active')) hourly: Dict[int, int] = defaultdict(int) active_days = len(starts) for day in daily_stats: if day.get('first_active') and day.get('last_active'): try: for h in range(int(day['first_active'].split(':')[0]), int(day['last_active'].split(':')[0]) + 1): hourly[h] += 1 except Exception: pass return { 'typical_start': starts[len(starts) // 2] if starts else 'N/A', 'typical_end': ends[len(ends) // 2] if ends else 'N/A', 'earliest_ever': starts[0] if starts else 'N/A', 'latest_ever': ends[-1] if ends else 'N/A', 'hourly_activity': { h: round(hourly[h] / active_days * 100, 1) if active_days else 0 for h in range(24) }, } # --------------------------------------------------------------------------- # HTML generation # --------------------------------------------------------------------------- def generate_html(user_names: List[str], users_data: List[Dict], start_date, end_date) -> str: n = len(user_names) colors = USER_COLORS[:n] # --- Chart data --- all_months_set = sorted({m['month'] for ud in users_data for m in ud['months']}) monthly_labels = [datetime.strptime(mk, '%Y-%m').strftime('%B %Y') for mk in all_months_set] monthly_series = [ [{m['month']: m['total_hours'] for m in ud['months']}.get(mk, 0) for mk in all_months_set] for ud in users_data ] all_weeks_set = sorted({ w['start_date'] for ud in users_data for m in ud['months'] for w in m['weeks'] }) weekly_labels = [datetime.strptime(w, '%Y-%m-%d').strftime('%b %d') for w in all_weeks_set] weekly_series = [] for ud in users_data: wmap = {w['start_date']: w['total_hours'] for m in ud['months'] for w in m['weeks']} weekly_series.append([wmap.get(w, 0) for w in all_weeks_set]) dow_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] dow_short = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] dow_series = [] for ud in users_data: bucket: Dict[str, list] = defaultdict(list) for day in ud['daily']: bucket[day['day_of_week']].append(day['active_hours']) dow_series.append([ round(sum(bucket[dow]) / len(bucket[dow]), 2) if bucket.get(dow) else 0 for dow in dow_order ]) # --- Per-user summary stats --- stats = [] for ud in users_data: daily = ud['daily'] active = [d for d in daily if d['active_hours'] > 0] total_h = sum(d['active_hours'] for d in daily) all_weeks = [w for m in ud['months'] for w in m['weeks']] stats.append({ 'total_days': len(daily), 'active_days': len(active), 'absent_days': len(daily) - len(active), 'total_hours': round(total_h, 1), 'avg_per_day': round(total_h / len(active), 1) if active else 0, 'avg_per_week': round(sum(w['total_hours'] for w in all_weeks) / len(all_weeks), 1) if all_weeks else 0, 'typical_start': ud['patterns']['typical_start'], 'typical_end': ud['patterns']['typical_end'], }) # --- Date lookup for the daily table --- user_date_maps = [{day['date']: day for day in ud['daily']} for ud in users_data] cols_tpl = f'repeat({n}, 1fr)' title = ' vs '.join(user_names) # ----------------------------------------------------------------------- html = f""" Comparison: {title}

Employee Comparison Report

Period: {start_date.strftime('%B %d, %Y')} — {end_date.strftime('%B %d, %Y')}
""" for i, name in enumerate(user_names): html += f'
{name}
\n' html += """
""" # ---- Section: Overview ---------------------------------------- html += f"""
Overview
""" for i, (name, s) in enumerate(zip(user_names, stats)): c = colors[i] html += f"""
{name}
Total Hours
{s['total_hours']}h
Active Days
{s['active_days']}
Absent Days
{s['absent_days']}
Avg / Active Day
{s['avg_per_day']}h
Avg / Week
{s['avg_per_week']}h
Typical Hours
{s['typical_start']} – {s['typical_end']}
""" html += """
""" # ---- Section: Monthly + Weekly charts ------------------------- html += """
Monthly & Weekly Trends
Monthly Hours Comparison
Weekly Trend
""" # ---- Section: Day-of-week chart ------------------------------- html += """
Average Hours by Day of Week
""" # ---- Section: Hourly distribution ----------------------------- html += f"""
Work Hours Distribution (06:00 – 20:00)
""" for i, (name, ud) in enumerate(zip(user_names, users_data)): c = colors[i] html += f"""
{name}
""" for h in range(6, 21): pct = ud['patterns']['hourly_activity'].get(h, 0) html += f"""
{h:02d}:00
{pct:.0f}%
""" html += "
\n" html += """
""" # ---- Section: Monthly detail accordion ------------------------ html += """
Monthly Breakdown
""" all_month_keys = sorted({m['month'] for ud in users_data for m in ud['months']}) for mi, mk in enumerate(all_month_keys): month_name = datetime.strptime(mk, '%Y-%m').strftime('%B %Y') user_month = [ next((m for m in ud['months'] if m['month'] == mk), None) for ud in users_data ] hours_parts = [] for i, mdata in enumerate(user_month): if mdata: hours_parts.append(f"{user_names[i]}: {mdata['total_hours']:.1f}h") else: hours_parts.append(f"{user_names[i]}: —") hours_summary = '  |  '.join(hours_parts) is_last = (mi == len(all_month_keys) - 1) open_cls = 'open' if is_last else '' tog_char = '▼' if is_last else '▶' html += f"""
{month_name}
{hours_summary}
{tog_char}
""" for i, (name, mdata) in enumerate(zip(user_names, user_month)): c = colors[i] if mdata: html += f"""
{name}
{mdata['total_hours']:.1f}h total {mdata['working_days']}/{mdata['total_days']} days {mdata['avg_hours_per_day']:.1f}h/day
""" else: html += f"""
{name}
No data
""" html += "
\n" # Day-by-day table all_dates = sorted({ day['date'] for ud in users_data for m in ud['months'] if m['month'] == mk for day in m['days'] }) if all_dates: user_th_cells = ''.join( f'{user_names[i]}' for i in range(n) ) sub_th_cells = ''.join( f'HoursStartEnd' for i in range(n) ) html += f"""
{user_th_cells} {sub_th_cells} """ for date_str in all_dates: dt = datetime.strptime(date_str, '%Y-%m-%d') is_weekend = dt.weekday() >= 5 row_cls = 'weekend' if is_weekend else '' date_disp = dt.strftime('%b %d') dow_disp = dt.strftime('%a') html += f' \n' html += f' \n' html += f' \n' for i, udm in enumerate(user_date_maps): c = colors[i] day_data = udm.get(date_str) if day_data: h = day_data['active_hours'] start = day_data.get('first_active') or '—' end = day_data.get('last_active') or '—' if is_weekend: badge = f'{h:.1f}h' elif h == 0: badge = 'Absent' elif h < 6: badge = f'{h:.1f}h ▼' elif h > 9: badge = f'{h:.1f}h ▲' else: badge = f'{h:.1f}h' html += f' \n' html += f' \n' html += f' \n' else: html += f' \n' html += f' \n' html += f' \n' html += ' \n' html += """
DateDay
{date_disp}{dow_disp}{badge}{start}{end}
""" html += """
""" html += "
\n" # close Monthly Breakdown section # ---- Footer --------------------------------------------------- html += f"""
""" # ---- JavaScript ----------------------------------------------- # Build datasets JSON for charts def js_datasets(series, labels_arg=None, chart_type='bar'): parts = [] for i, (name, data) in enumerate(zip(user_names, series)): c = colors[i] if chart_type == 'line': parts.append( f'{{"label":{json.dumps(name)},"data":{json.dumps(data)},' f'"borderColor":"{c["border"]}","backgroundColor":"rgba(0,0,0,.05)",' f'"borderWidth":2,"tension":0.3,"fill":false,"pointRadius":3}}' ) else: parts.append( f'{{"label":{json.dumps(name)},"data":{json.dumps(data)},' f'"backgroundColor":"{c["bg"]}","borderColor":"{c["border"]}",' f'"borderWidth":2,"borderRadius":4}}' ) return '[' + ','.join(parts) + ']' html += f""" """ return html # --------------------------------------------------------------------------- # Entry points # --------------------------------------------------------------------------- def generate_comparison(user_names: List[str], months: int = 3, start_date: str = None, end_date: str = None) -> Path: start, end = get_date_range(months, start_date, end_date) print(f"\nComparison: {' vs '.join(user_names)}") print(f"Period: {start} → {end}\n") users_data = [] for name in user_names: print(f"Loading {name}...") raw = load_user_data(name, start, end) if not raw: print(f" WARNING: no data found for '{name}' — check the exact name in the raw CSVs") daily = analyze_daily_activity(raw) months_data = group_by_month(daily) patterns = calculate_time_patterns(daily) users_data.append({'name': name, 'daily': daily, 'months': months_data, 'patterns': patterns}) print(f" {len(raw):,} records · {len(daily)} days · {len(months_data)} months") print("\nBuilding HTML report...") html = generate_html(user_names, users_data, start, end) safe = '_vs_'.join(n.lower().replace(' ', '_') for n in user_names) filename = f"comparison_{safe}_{start}_{end}.html" out = OUTPUT_DIR / filename out.write_text(html, encoding='utf-8') print(f"\nSaved: {out} ({out.stat().st_size / 1024:.1f} KB)") return out def main(): parser = argparse.ArgumentParser( description='Generate a side-by-side employee comparison HTML report', formatter_class=argparse.RawDescriptionHelpFormatter, epilog="""Examples: python3 compare_employees.py --users "Vilius Ramanauskas" "Tomas Mockus" "Titas Surname" --months 3 python3 compare_employees.py --users "Name1" "Name2" --start 2025-11-01 --end 2026-02-18 """, ) parser.add_argument('--users', nargs='+', required=True, help='2 or 3 user full names (must match the user_name column in raw CSVs)') parser.add_argument('--months', type=int, default=3, help='Months to include, counting back from today (default: 3)') parser.add_argument('--start', type=str, help='Start date YYYY-MM-DD') parser.add_argument('--end', type=str, help='End date YYYY-MM-DD') args = parser.parse_args() if len(args.users) < 2: parser.error("Provide at least 2 user names") if len(args.users) > 3: parser.error("Maximum 3 users are supported") generate_comparison(args.users, args.months, args.start, args.end) if __name__ == '__main__': main()