"""
파일 검증 유틸리티 모듈
Excel 파일의 구조와 데이터를 검증합니다.
"""

import pandas as pd
import re
import numpy as np
from typing import Dict, List, Tuple, Any
from utils.common import clean_dataframe_data

class FileValidationResult:
    """파일 검증 결과를 담는 클래스"""
    
    def __init__(self):
        self.is_valid = True
        self.errors = []
        self.warnings = []
        self.info = {}
        
    def add_error(self, message: str):
        """오류 메시지 추가"""
        console.log(f"검증 오류 추가: {message}")  # 디버깅 로그
        self.errors.append(message)
        self.is_valid = False
        
    def add_warning(self, message: str):
        """경고 메시지 추가"""
        console.log(f"검증 경고 추가: {message}")  # 디버깅 로그
        self.warnings.append(message)
        
    def add_info(self, key: str, value: Any):
        """정보 추가"""
        console.log(f"검증 정보 추가: {key} = {value}")  # 디버깅 로그
        self.info[key] = value

def validate_excel_file(file_content) -> FileValidationResult:
    """
    Excel 파일 검증
    
    Args:
        file_content: 업로드된 파일 내용
        
    Returns:
        FileValidationResult: 검증 결과
    """
    result = FileValidationResult()
    
    try:
        console.log("Excel 파일 검증 시작")  # 디버깅 로그
        
        # Excel 파일 읽기
        df = pd.read_excel(file_content)
        
        # DataFrame 데이터 정리 및 trim 처리
        df = clean_dataframe_data(df)
        
        console.log(f"파일 읽기 완료: {len(df)}행, {len(df.columns)}열")  # 디버깅 로그
        
        # 데이터 전처리 - 문자열 컬럼의 앞뒤 공백 제거
        string_columns = ['부재명', '부재위치', '손상내용', '단위']
        for col in string_columns:
            if col in df.columns:
                df[col] = df[col].astype(str).str.strip()
                console.log(f"{col} 컬럼 공백 제거 완료")  # 디버깅 로그
        
        # 기본 정보 추가
        result.add_info('total_rows', len(df))
        result.add_info('total_columns', len(df.columns))
        
        # 1. 필수 컬럼 검증
        required_columns = ['부재명', '부재위치', '손상내용', '손상물량', '개소', '단위']
        missing_columns = []
        
        for col in required_columns:
            if col not in df.columns:
                missing_columns.append(col)
        
        if missing_columns:
            result.add_error(f"필수 컬럼이 누락되었습니다: {', '.join(missing_columns)}")
            return result
        
        console.log("필수 컬럼 검증 통과")  # 디버깅 로그
        
        # 2. 데이터 타입 검증
        result = validate_data_types(df, result)
        
        # 3. 데이터 값 검증
        result = validate_data_values(df, result)
        
        # 4. 부재명 및 손상내용 검증
        result = validate_component_and_damage(df, result)
        
        # 5. 상세 손상물량 검증 수행
        detailed_validation = perform_damage_quantity_validation(df)
        result.add_info('validation_details', detailed_validation)
        
        # 6. 테이블 미리보기 생성 (오류 행 하이라이트 포함)
        table_preview = generate_table_preview_with_highlighting(df, detailed_validation.get('error_rows', []))
        result.add_info('table_preview', table_preview)
        
        # 7. 통계 정보 생성
        result = generate_statistics(df, result)
        
        console.log(f"검증 완료: valid={result.is_valid}, errors={len(result.errors)}, warnings={len(result.warnings)}")  # 디버깅 로그
        
    except Exception as e:
        console.log(f"검증 중 오류 발생: {str(e)}")  # 디버깅 로그
        result.add_error(f"파일 처리 중 오류가 발생했습니다: {str(e)}")
    
    return result

def validate_data_types(df: pd.DataFrame, result: FileValidationResult) -> FileValidationResult:
    """데이터 타입 검증"""
    console.log("데이터 타입 검증 시작")  # 디버깅 로그
    
    # 손상물량 검증
    try:
        damage_quantity = pd.to_numeric(df['손상물량'], errors='coerce')
        invalid_quantity_count = damage_quantity.isna().sum()
        
        if invalid_quantity_count > 0:
            result.add_error(f"손상물량에 숫자가 아닌 값이 {invalid_quantity_count}개 있습니다.")
        
        # 음수 검증
        negative_count = (damage_quantity < 0).sum()
        if negative_count > 0:
            result.add_error(f"손상물량에 음수 값이 {negative_count}개 있습니다.")
            
    except Exception as e:
        result.add_error(f"손상물량 검증 중 오류: {str(e)}")
    
    # 개소 검증
    try:
        count_values = pd.to_numeric(df['개소'], errors='coerce')
        invalid_count_count = count_values.isna().sum()
        
        if invalid_count_count > 0:
            result.add_error(f"개소에 숫자가 아닌 값이 {invalid_count_count}개 있습니다.")
        
        # 음수 검증
        negative_count = (count_values < 0).sum()
        if negative_count > 0:
            result.add_error(f"개소에 음수 값이 {negative_count}개 있습니다.")
            
    except Exception as e:
        result.add_error(f"개소 검증 중 오류: {str(e)}")
    
    console.log("데이터 타입 검증 완료")  # 디버깅 로그
    return result

def validate_data_values(df: pd.DataFrame, result: FileValidationResult) -> FileValidationResult:
    """데이터 값 검증"""
    console.log("데이터 값 검증 시작")  # 디버깅 로그
    
    # 빈 값 검증
    empty_component = df['부재명'].isna() | (df['부재명'] == '')
    empty_position = df['부재위치'].isna() | (df['부재위치'] == '')
    empty_damage = df['손상내용'].isna() | (df['손상내용'] == '')
    
    if empty_component.sum() > 0:
        result.add_error(f"부재명이 비어있는 행이 {empty_component.sum()}개 있습니다.")
    
    if empty_position.sum() > 0:
        result.add_error(f"부재위치가 비어있는 행이 {empty_position.sum()}개 있습니다.")
    
    if empty_damage.sum() > 0:
        result.add_error(f"손상내용이 비어있는 행이 {empty_damage.sum()}개 있습니다.")
    
    # 단위 검증 (경고 대신 오류로 처리)
    valid_units = ['m', 'mm', 'm²', '㎡', 'm2', 'cm', 'ea', 'EA', '개소', '개', '식', 'set']
    invalid_units = df[~df['단위'].isin(valid_units)]['단위'].unique()
    
    if len(invalid_units) > 0:
        result.add_error(f"지원되지 않는 단위가 사용되었습니다: {', '.join(invalid_units)}")
    
    console.log("데이터 값 검증 완료")  # 디버깅 로그
    return result

def validate_component_and_damage(df: pd.DataFrame, result: FileValidationResult) -> FileValidationResult:
    """부재명 및 손상내용 검증"""
    console.log("부재명 및 손상내용 검증 시작")  # 디버깅 로그
    
    # 표준 부재명 목록
    standard_components = [
        '바닥판', '거더', '가로보', '세로보', '격벽',
        '교대', '교각', '기초', '받침', '신축이음',
        '교면포장', '배수시설', '난간', '방호벽'
    ]
    
    # 부재명 검증 - 공백 제거 후 unique 확인
    unique_components = df['부재명'].dropna().unique()
    console.log(f"유니크 부재명 개수: {len(unique_components)}")  # 디버깅 로그
    
    non_standard_components = []

    for component in unique_components:
        if pd.isna(component) or str(component).strip() == '':
            non_standard_components.append('빈값 또는 NaN')
        else:
            cleaned_component = str(component).strip()
            is_standard = any(std in cleaned_component for std in standard_components)
            if not is_standard:
                non_standard_components.append(cleaned_component)
    
    # 경고 메시지 삭제됨 - 표준 부재명이 아니어도 오류로 처리하지 않음
    
    # 손상내용 키워드 검증 - 경고 메시지 삭제
    console.log("부재명 및 손상내용 검증 완료")  # 디버깅 로그
    return result

def generate_statistics(df: pd.DataFrame, result: FileValidationResult) -> FileValidationResult:
    """통계 정보 생성"""
    console.log("통계 정보 생성 시작")  # 디버깅 로그
    
    try:
        # 부재별 통계는 삭제됨 - 부재별 데이터 수 표시하지 않음
        
        # 손상내용별 통계 - 상위 10개만
        damage_stats = df['손상내용'].value_counts().head(10).to_dict()
        result.add_info('damage_count', damage_stats)
        
        # 부재위치별 통계
        position_stats = df['부재위치'].value_counts().to_dict()
        result.add_info('position_count', position_stats)
        
        # 중복 부재명 확인 (공백 차이로 인한)
        component_duplicates = check_component_duplicates(df)
        if component_duplicates:
            result.add_warning(f"띄어쓰기 차이로 인한 중복 부재명 발견: {component_duplicates}")
            result.add_info('duplicate_components', component_duplicates)
        
        # 손상물량 통계
        damage_quantity = pd.to_numeric(df['손상물량'], errors='coerce').fillna(0)
        result.add_info('total_damage_quantity', float(damage_quantity.sum()))
        result.add_info('average_damage_quantity', float(damage_quantity.mean()))
        
        # 개소 통계
        count_values = pd.to_numeric(df['개소'], errors='coerce').fillna(0)
        result.add_info('total_count', int(count_values.sum()))
        result.add_info('average_count', float(count_values.mean()))
        
        console.log("통계 정보 생성 완료")  # 디버깅 로그
        
    except Exception as e:
        console.log(f"통계 정보 생성 중 오류: {str(e)}")  # 디버깅 로그
        result.add_warning(f"통계 정보 생성 중 오류가 발생했습니다: {str(e)}")
    
    return result

def check_component_duplicates(df: pd.DataFrame) -> list:
    """
    공백 차이로 인한 중복 부재명 확인
    
    Returns:
        list: 중복 부재명 그룹 리스트
    """
    component_groups = {}
    duplicates = []
    
    # 부재명을 정규화하여 그룹화
    for component in df['부재명'].dropna().unique():
        if pd.notna(component):
            # 공백을 모두 제거한 정규화된 이름
            normalized = str(component).replace(' ', '').replace('\t', '')
            
            if normalized not in component_groups:
                component_groups[normalized] = []
            component_groups[normalized].append(str(component))
    
    # 중복이 있는 그룹 찾기
    for normalized, variants in component_groups.items():
        if len(variants) > 1:
            # 실제로 다른 변형들이 있는지 확인
            unique_variants = list(set(variants))
            if len(unique_variants) > 1:
                duplicates.append(unique_variants)
    
    return duplicates

def console_log(message: str):
    """JavaScript 스타일 콘솔 로그 (Python에서는 print 사용)"""
    #print(f"[FILE_VALIDATION] {message}")

def perform_damage_quantity_validation(df: pd.DataFrame) -> dict:
    """
    손상물량 계산 검증 수행
    
    Args:
        df: 검증할 DataFrame
        
    Returns:
        dict: 검증 결과
    """
    console.log("손상물량 계산 검증 시작")
    
    error_rows = []
    valid_rows = 0
    
    # 필수 컬럼 체크
    required_columns = ['부재명', '부재위치', '손상내용', '손상물량', '개소', '단위']
    missing_columns = [col for col in required_columns if col not in df.columns]
    
    if missing_columns:
        return {
            'error_rows': [],
            'valid_rows': 0,
            'total_rows': len(df),
            'missing_columns': missing_columns
        }
    
    # 길이/너비 컬럼 찾기 (선택적)
    length_columns = ['길이', 'L', 'length', '연장']
    width_columns = [ '너비', 'W', 'width', 'B'] 
    count_columns =  ['ea', 'EA', '개', '개소']

    available_length_col = None
    available_width_col = None
    available_count_col = None

    # 부분일치(포함)로 길이 컬럼 찾기
    for col in df.columns:
        if any(key in col for key in length_columns):
            available_length_col = col
            break
        
    # 부분일치(포함)로 길이 컬럼 찾기
    for col in df.columns:
        if any(key in col for key in width_columns):
            available_width_col = col
            break
    
    
    print(f"사용 가능한 길이 컬럼: {available_length_col}, 너비 컬럼: {available_width_col}")  # 디버깅 로그
    # 부분일치(포함)로 길이 컬럼 찾기
    for col in df.columns:
        if any(key in col for key in count_columns):
            available_count_col = col
            break
         
         
    
    for idx, row in df.iterrows():
        row_errors = []
        
        try:
            # 기본 데이터 체크
            if pd.isna(row['부재명']) or str(row['부재명']).strip() == '':
                row_errors.append("부재명이 비어있음")
            
            if pd.isna(row['부재위치']) or str(row['부재위치']).strip() == '':
                row_errors.append("부재위치가 비어있음")
            
            if pd.isna(row['손상내용']) or str(row['손상내용']).strip() == '':
                row_errors.append("손상내용이 비어있음")
            
            if pd.isna(row['손상물량']):
                row_errors.append("손상물량이 비어있음")
                
            if pd.isna(row['개소']):
                row_errors.append("개소가 비어있음")
                
            if pd.isna(row['단위']) or str(row['단위']).strip() == '':
                row_errors.append("단위가 비어있음")
            
            # 손상물량 계산 검증 - 필수 데이터 체크 후 계산
            unit = str(row['단위']).strip() if pd.notna(row['단위']) else ''
            damage_quantity = pd.to_numeric(row['손상물량'], errors='coerce') if pd.notna(row['손상물량']) else None
            count = pd.to_numeric(row[available_count_col], errors='coerce') if pd.notna(row[available_count_col]) else None
            
            if pd.notna(damage_quantity) and pd.notna(count) and unit:
                # 길이 및 너비 정보 추출
                length = None
                width = None
                
                if available_length_col and available_length_col in row.index and pd.notna(row[available_length_col]):
                    length = pd.to_numeric(row[available_length_col], errors='coerce')
                
                if available_width_col and available_width_col in row.index and pd.notna(row[available_width_col]):
                    width = pd.to_numeric(row[available_width_col], errors='coerce')
                
                if available_count_col and available_count_col in row.index and pd.notna(row[available_count_col]):
                    count = pd.to_numeric(row[available_count_col], errors='coerce')
                    
                # 단위별 검증 - 계산값과 손상물량이 다르면 오류 처리
                if unit == 'm':
                    # m 단위: 길이 × 개소 = 손상물  량
                    if pd.notna(length) and pd.notna(count):
                        expected_quantity = length * count
                        if abs(damage_quantity - expected_quantity) > 0.01:  # 오차 허용
                            row_errors.append(f"m 단위 손상물량 계산 오류1: 현재 {damage_quantity}, 올바른 값 {expected_quantity:.2f} (길이 {length} × 개소 {count})")
                    else:
                        missing_info = []
                        if pd.isna(length):
                            missing_info.append("길이")
                        if pd.isna(count):
                            missing_info.append("개소")
                        if missing_info:
                            row_errors.append(f"m 단위 검증을 위한 정보가 없음: {', '.join(missing_info)}")
                
                elif unit in ['㎡', 'm²', 'm2']:
                    
                    #=IF(unit="m",length*count,IF(unit="㎡",length*width*count,IF(unit="EA",count,"")))
                    
                    # ㎡ 단위: 길이 × 너비 × 개소 = 손상물량
                    if pd.notna(length) and pd.notna(width) and pd.notna(count):
                        expected_quantity = length * width * count
                        if abs(damage_quantity - expected_quantity) > 0.01:  # 오차 허용
                            row_errors.append(f"㎡ 단위 손상물량 계산 오류2: 현재 {damage_quantity}, 올바른 값 {expected_quantity:.2f} (길이 {length} × 너비 {width} × 개소 {count})")
                    else:
                        missing_info = []
                        if pd.isna(length):
                            missing_info.append("길이")
                        if pd.isna(width):
                            missing_info.append("너비")
                        if pd.isna(count):
                            missing_info.append("개소")
                        if missing_info:
                            row_errors.append(f"㎡ 단위 검증을 위한 정보가 없음2: {', '.join(missing_info)}")
                
                elif unit.lower() in ['ea', 'EA', '개', '개소']:
                    # ea/EA/개 단위: 개소 = 손상물량
                    if abs(damage_quantity - count) > 0.01:  # 오차 허용
                        row_errors.append(f"개수 단위 손상물량 계산 오류3: 현재 {damage_quantity}, 올바른 값 {count} (개소와 동일해야 함)")
                
                # 음수 검증
                if damage_quantity < 0:
                    row_errors.append("손상물량이 음수임")
                    
                if count < 0:
                    row_errors.append("개소가 음수임")
                    
                    
                # expected_quantity = 0;   
                # if unit == 'm':
                #     if pd.notna(length) and pd.notna(count):
                #         expected_quantity = length * count
                #         if abs(damage_quantity - expected_quantity) > 0.01:
                #             row_errors.append(f"m 단위 손상물량 계산 오류: 현재 {damage_quantity}, 올바른 값 {expected_quantity:.2f} (길이 {length} × 개소 {count})")
                # elif unit in ['㎡', 'm²', 'm2']:
                #     if pd.notna(length) and pd.notna(width) and pd.notna(count):
                #         expected_quantity = length * width * count
                #         if abs(damage_quantity - expected_quantity) > 0.01:
                #             row_errors.append(f"㎡ 단위 손상물량 계산 오류: 현재 {damage_quantity}, 올바른 값 {expected_quantity:.2f} (길이 {length} × 너비 {width} × 개소 {count})")
                # elif unit.lower() in ['ea', '개', '개소']:
                #     if abs(damage_quantity - count) > 0.01:
                #         row_errors.append(f"개수 단위 손상물량 계산 오류: 현재 {damage_quantity}, 올바른 값 {count} (개소와 동일해야 함)")
                
                
                    
        except Exception as e:
            row_errors.append(f"검증 중 오류 발생: {str(e)}")
        
        if row_errors:
            error_rows.append({
                'row_index': idx + 2,  # Excel 행 번호 (헤더 포함)
                'errors': row_errors,
                'data': {
                    '부재명': str(row['부재명']) if pd.notna(row['부재명']) else '',
                    '부재위치': str(row['부재위치']) if pd.notna(row['부재위치']) else '',
                    '손상내용': str(row['손상내용']) if pd.notna(row['손상내용']) else '',
                    '손상물량': str(row['손상물량']) if pd.notna(row['손상물량']) else '',
                    '개소': str(row['개소']) if pd.notna(row['개소']) else '',
                    '단위': str(row['단위']) if pd.notna(row['단위']) else ''
                }
            })
        else:
            valid_rows += 1
    
    console.log(f"손상물량 계산 검증 완료: 총 {len(df)}행 중 {len(error_rows)}개 오류")
    
    return {
        'error_rows': error_rows,
        'valid_rows': valid_rows,
        'total_rows': len(df)
    }

def generate_table_preview_with_highlighting(df: pd.DataFrame, error_rows: list = None) -> str:
    """
    전체 테이블 미리보기 생성 (오류 행 하이라이트 포함)
    
    Args:
        df: 미리보기할 DataFrame
        error_rows: 오류 행 정보 리스트
        
    Returns:
        str: HTML 테이블 문자열
    """
    try:
        # 전체 데이터 표시 (최대 500행까지)
        max_rows = 500
        preview_df = df.head(max_rows).copy()
        
        # 필수 컬럼만 선택
        required_columns = ['부재명', '부재위치', '손상내용','개소', '단위','손상물량']
        
        # 추가 컬럼도 포함 (길이, 너비 등)
        length_columns = ['길이', 'L', 'length', '연장']
        width_columns = ['폭', '너비', '너비(m)', 'W', 'width', 'B']
        
        available_columns = []
        for col in required_columns:
            if col in preview_df.columns:
                available_columns.append(col)
        
        # 길이/너비 컬럼 추가
        for col in length_columns + width_columns:
            if col in preview_df.columns and col not in available_columns:
                available_columns.append(col)
        
        if available_columns:
            preview_df = preview_df[available_columns]
        
        # 오류 행 정보 매핑
        error_row_indices = set()
        if error_rows:
            for error_row in error_rows:
                # Excel 행 번호를 DataFrame 인덱스로 변환 (헤더 제외하고 -2)
                df_index = error_row['row_index'] - 2
                if 0 <= df_index < len(preview_df):
                    error_row_indices.add(df_index)
        
        # HTML 테이블 생성
        html = '<table class="table table-striped table-bordered table-hover" id="preview-table" style="font-size: 12px;">'
        
        # 헤더 생성
        html += '<thead class="table-dark"><tr><th style="position: sticky; top: 0; z-index: 10;">행번호</th>'
        for col in preview_df.columns:
            if col in ['개소', '단위']:
                html += f'<th style="position: sticky; top: 0; z-index: 10; min-width: 50px; text-align: left;">{col}</th>'
            else:
                html += f'<th style="position: sticky; top: 0; z-index: 10; min-width: 100px;">{col}</th>'
        html += '</tr></thead>'
        
        # 데이터 행 생성
        html += '<tbody>'
        for idx, (df_idx, row) in enumerate(preview_df.iterrows()):
            row_class = 'table-danger' if idx in error_row_indices else ''
            error_title = ''
            error_cols = set()
            
            if idx in error_row_indices and error_rows:
                for error_row in error_rows:
                    if error_row['row_index'] - 2 == idx:
                        error_title = f'title="오류: {" | ".join(error_row["errors"])}"'
                        for col in preview_df.columns:
                            if any(col in err for err in error_row["errors"]):
                                error_cols.add(col)
                        break

            html += f'<tr class="{row_class}" {error_title}>'
            html += f'<td><strong>{idx + 2}</strong></td>'  # Excel 행 번호 (헤더 포함)

            for col in preview_df.columns:
                cell_value = str(row[col]) if pd.notna(row[col]) else ''
                if len(cell_value) > 30:
                    cell_value = cell_value[:30] + '...'

                # 왼쪽 정렬 및 50px 컬럼 스타일 적용
                if col in ['개소', '단위']:
                    style = 'text-align: left; min-width: 50px;'
                else:
                    style = ''

                # 해당 셀의 컬럼명이 오류 메시지에 포함되어 있으면 강조
                if idx in error_row_indices and col in error_cols:
                    html += f'<td style="background-color: #f8d7da; color: #721c24;{style}"><i class="fas fa-exclamation-triangle text-danger me-1"></i>{cell_value}</td>'
                else:
                    html += f'<td style="{style}">{cell_value}</td>'
            html += '</tr>'
        
        html += '</tbody></table>'
        
        # 추가 정보 표시
        if len(df) > max_rows:
            html += f'<div class="alert alert-info mt-2"><small><i class="fas fa-info-circle"></i> 전체 {len(df)}행 중 처음 {max_rows}행만 표시됩니다.</small></div>'
        
        return html
        
    except Exception as e:
        console.log(f"테이블 미리보기 생성 중 오류: {str(e)}")
        return "<p>테이블 미리보기를 생성할 수 없습니다.</p>"

# console 객체 시뮬레이션
class Console:
    def log(self, message: str):
        print(f"[FILE_VALIDATION] {message}")

console = Console()