#!/usr/bin/env python3 """ Employee Comparison Report Generator Generates a detailed side-by-side HTML report for 2-3 employees. Usage: python3 compare_employees.py --users "Name1" "Name2" "Name3" --months 3 python3 compare_employees.py --users "Name1" "Name2" --start 2025-11-01 --end 2026-02-18 """ import csv import argparse import json from pathlib import Path from datetime import datetime, timedelta from collections import defaultdict from typing import Dict, List, Tuple, Optional RAW_DATA_DIR = Path("data/raw") OUTPUT_DIR = Path("reports") OUTPUT_DIR.mkdir(parents=True, exist_ok=True) USER_COLORS = [ {'bg': 'rgba(102, 126, 234, 0.8)', 'border': '#667eea', 'light': '#eef0fd'}, {'bg': 'rgba(249, 115, 22, 0.8)', 'border': '#f97316', 'light': '#fff4ee'}, {'bg': 'rgba(16, 185, 129, 0.8)', 'border': '#10b981', 'light': '#ecfdf5'}, ] # --------------------------------------------------------------------------- # Data loading & analysis (mirrors generate_employee_report.py conventions) # --------------------------------------------------------------------------- def get_date_range(months: int = 3, start_date: str = None, end_date: str = None): if start_date and end_date: return (datetime.strptime(start_date, '%Y-%m-%d').date(), datetime.strptime(end_date, '%Y-%m-%d').date()) end = datetime.now().date() start = end - timedelta(days=(months or 3) * 30) return start, end def load_user_data(user_name: str, start_date, end_date) -> List[Dict]: all_data = [] for csv_file in sorted(RAW_DATA_DIR.glob("*.csv")): stem = csv_file.stem if '_' not in stem: continue date_str = stem.split('_')[-1] try: file_date = datetime.strptime(date_str, '%Y-%m-%d').date() if not (start_date <= file_date <= end_date): continue with open(csv_file, 'r') as f: for row in csv.DictReader(f): if row.get('user_name') == user_name or row.get('user_id') == user_name: row['date'] = date_str row['file_date'] = file_date ts = row.get('timestamp', '') if ts: try: tp = ts.split('T')[1] if 'T' in ts else (ts.split(' ')[1] if ' ' in ts else ts) clean = tp.split('+')[0].split('-')[0].split('.')[0].split('Z')[0] if ':' in clean: p = clean.split(':') row['time'] = f"{p[0].zfill(2)}:{p[1].zfill(2)}" except Exception: row['time'] = '' all_data.append(row) except Exception: continue return all_data def analyze_daily_activity(data: List[Dict]) -> List[Dict]: daily = defaultdict(lambda: { 'date': '', 'day_of_week': '', 'total_minutes': 0, 'active_minutes': 0, 'first_active': None, 'last_active': None, }) for row in data: d = row.get('date') if not d: continue s = daily[d] s['date'] = d s['day_of_week'] = datetime.strptime(d, '%Y-%m-%d').strftime('%A') s['total_minutes'] += 1 if row.get('presence') == 'active': s['active_minutes'] += 1 t = row.get('time', '') if t: if not s['first_active'] or t < s['first_active']: s['first_active'] = t if not s['last_active'] or t > s['last_active']: s['last_active'] = t result = [] for d, s in sorted(daily.items()): s['active_hours'] = round(s['active_minutes'] / 60, 2) s['activity_rate'] = ( round(s['active_minutes'] / s['total_minutes'] * 100, 1) if s['total_minutes'] else 0 ) result.append(s) return result def group_by_week(daily_stats: List[Dict]) -> List[Dict]: if not daily_stats: return [] weeks, cur, cur_num = [], [], None for day in daily_stats: wn = datetime.strptime(day['date'], '%Y-%m-%d').date().isocalendar()[1] if cur_num is None: cur_num = wn if wn != cur_num: if cur: weeks.append(_make_week(cur_num, cur)) cur, cur_num = [], wn cur.append(day) if cur: weeks.append(_make_week(cur_num, cur)) return weeks def _make_week(week_num, days): return { 'week_num': week_num, 'start_date': days[0]['date'], 'end_date': days[-1]['date'], 'days': days, 'total_hours': round(sum(d['active_hours'] for d in days), 2), 'working_days': len([d for d in days if d['active_hours'] > 0]), } def group_by_month(daily_stats: List[Dict]) -> List[Dict]: if not daily_stats: return [] months: Dict[str, list] = defaultdict(list) for day in daily_stats: months[day['date'][:7]].append(day) result = [] for mk, days in sorted(months.items()): weeks = group_by_week(days) result.append({ 'month': mk, 'month_name': datetime.strptime(mk, '%Y-%m').strftime('%B %Y'), 'days': days, 'weeks': weeks, 'total_hours': round(sum(d['active_hours'] for d in days), 2), 'avg_hours_per_day': round(sum(d['active_hours'] for d in days) / len(days), 2) if days else 0, 'working_days': len([d for d in days if d['active_hours'] > 0]), 'total_days': len(days), }) return result def calculate_time_patterns(daily_stats: List[Dict]) -> Dict: starts = sorted(d['first_active'] for d in daily_stats if d.get('first_active')) ends = sorted(d['last_active'] for d in daily_stats if d.get('last_active')) hourly: Dict[int, int] = defaultdict(int) active_days = len(starts) for day in daily_stats: if day.get('first_active') and day.get('last_active'): try: for h in range(int(day['first_active'].split(':')[0]), int(day['last_active'].split(':')[0]) + 1): hourly[h] += 1 except Exception: pass return { 'typical_start': starts[len(starts) // 2] if starts else 'N/A', 'typical_end': ends[len(ends) // 2] if ends else 'N/A', 'earliest_ever': starts[0] if starts else 'N/A', 'latest_ever': ends[-1] if ends else 'N/A', 'hourly_activity': { h: round(hourly[h] / active_days * 100, 1) if active_days else 0 for h in range(24) }, } # --------------------------------------------------------------------------- # HTML generation # --------------------------------------------------------------------------- def generate_html(user_names: List[str], users_data: List[Dict], start_date, end_date) -> str: n = len(user_names) colors = USER_COLORS[:n] # --- Chart data --- all_months_set = sorted({m['month'] for ud in users_data for m in ud['months']}) monthly_labels = [datetime.strptime(mk, '%Y-%m').strftime('%B %Y') for mk in all_months_set] monthly_series = [ [{m['month']: m['total_hours'] for m in ud['months']}.get(mk, 0) for mk in all_months_set] for ud in users_data ] all_weeks_set = sorted({ w['start_date'] for ud in users_data for m in ud['months'] for w in m['weeks'] }) weekly_labels = [datetime.strptime(w, '%Y-%m-%d').strftime('%b %d') for w in all_weeks_set] weekly_series = [] for ud in users_data: wmap = {w['start_date']: w['total_hours'] for m in ud['months'] for w in m['weeks']} weekly_series.append([wmap.get(w, 0) for w in all_weeks_set]) dow_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] dow_short = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] dow_series = [] for ud in users_data: bucket: Dict[str, list] = defaultdict(list) for day in ud['daily']: bucket[day['day_of_week']].append(day['active_hours']) dow_series.append([ round(sum(bucket[dow]) / len(bucket[dow]), 2) if bucket.get(dow) else 0 for dow in dow_order ]) # --- Per-user summary stats --- stats = [] for ud in users_data: daily = ud['daily'] active = [d for d in daily if d['active_hours'] > 0] total_h = sum(d['active_hours'] for d in daily) all_weeks = [w for m in ud['months'] for w in m['weeks']] stats.append({ 'total_days': len(daily), 'active_days': len(active), 'absent_days': len(daily) - len(active), 'total_hours': round(total_h, 1), 'avg_per_day': round(total_h / len(active), 1) if active else 0, 'avg_per_week': round(sum(w['total_hours'] for w in all_weeks) / len(all_weeks), 1) if all_weeks else 0, 'typical_start': ud['patterns']['typical_start'], 'typical_end': ud['patterns']['typical_end'], }) # --- Date lookup for the daily table --- user_date_maps = [{day['date']: day for day in ud['daily']} for ud in users_data] cols_tpl = f'repeat({n}, 1fr)' title = ' vs '.join(user_names) # ----------------------------------------------------------------------- html = f"""
| Date | Day | {user_th_cells}||||||
|---|---|---|---|---|---|---|---|
| {sub_th_cells} | |||||||
| {date_disp} | \n' html += f'{dow_disp} | \n' for i, udm in enumerate(user_date_maps): c = colors[i] day_data = udm.get(date_str) if day_data: h = day_data['active_hours'] start = day_data.get('first_active') or '—' end = day_data.get('last_active') or '—' if is_weekend: badge = f'{h:.1f}h' elif h == 0: badge = 'Absent' elif h < 6: badge = f'{h:.1f}h ▼' elif h > 9: badge = f'{h:.1f}h ▲' else: badge = f'{h:.1f}h' html += f'{badge} | \n' html += f'{start} | \n' html += f'{end} | \n' else: html += f'— | \n' html += f'— | \n' html += f'— | \n' html += '