import re
import os
from collections import Counter, defaultdict
from html import escape

# Load program code from the file
with open("./qlxc.lsb", "r") as file:
    program_code = file.read()

# Step 1: Parse and structure all declared subroutines with parameters, locals, and dims
subs = defaultdict(lambda: {"sub_kind": None, "params": defaultdict(dict), "locals": defaultdict(dict), "dims": defaultdict(dict)})

current_sub = None
for line in program_code.splitlines():
    line = line.strip()

    # Subroutine declaration start
    proc_match = re.match(r'\b(defproc|deffn)\s+(\w+\$?)\s*(\(([^)]*)\))?', line, re.IGNORECASE)
    if proc_match:
        sub_kind, sub_name, _, param_str = proc_match.groups()
        current_sub = sub_name
        subs[sub_name]["sub_kind"] = sub_kind.lower()

        if param_str:
            for param in [p.strip() for p in param_str.split(",") if p]:
                param_type = "string" if param.endswith('$') else "numeric"
                subs[sub_name]["params"][param] = {"type": param_type}
        continue

    # End of subroutine block
    if current_sub:
        if line.lower() == "enddef":
            current_sub = None
            continue

        # Parse locals
        if line.lower().startswith("local"):
            for var in line[6:].split(','):
                var = var.strip()
                if var:
                    var_name, var_type = (var.split('(')[0].strip(), "string array" if var.endswith('$') else "numeric array") if '(' in var else (var, "string" if var.endswith('$') else "numeric")
                    subs[current_sub]["locals"][var_name] = {"type": var_type}
            continue

        # Parse dims with dimensions only, excluding comments
        if line.lower().startswith("dim"):
            dim_vars = re.findall(r'\b(\w+\$?\([^)]*\))', line)
            for dim_var in dim_vars:
                dim_name = dim_var.split('(')[0].strip()
                dim_type = "string array" if dim_name.endswith('$') else "numeric array"
                subs[current_sub]["dims"][dim_name] = {"type": dim_type, "dimensions": dim_var[len(dim_name):]}

# Step 2: Exclusion list
exclusion_set = set(subs.keys())
for sub_data in subs.values():
    exclusion_set.update(sub_data["params"].keys())
    exclusion_set.update(sub_data["locals"].keys())
    exclusion_set.update(sub_data["dims"].keys())

# Analyze usage of each subroutine in the source code
lowercase_counter = Counter()
uppercase_counter = Counter()

for sub_name, sub_data in subs.items():
    count = len(re.findall(rf'\b{sub_name}\b', program_code, re.IGNORECASE))
    if sub_name[0].islower():
        lowercase_counter[(sub_name, sub_data["sub_kind"])] += count
    else:
        uppercase_counter[(sub_name, sub_data["sub_kind"])] += count

# Identify undeclared internal functions
internal_function_counter = Counter()
internal_numeric_functions = re.findall(r'\b(\w+)\(', program_code)
internal_string_functions = re.findall(r'\b(\w+)\$\(', program_code)

for func_name in internal_numeric_functions:
    if func_name not in exclusion_set:
        internal_function_counter[(func_name, "numeric")] += 1

for func_name in internal_string_functions:
    if func_name + "$" not in exclusion_set:
        internal_function_counter[(func_name + "$", "string")] += 1

# Sort and structure for HTML output
lowercase_sorted = lowercase_counter.most_common()
uppercase_sorted = uppercase_counter.most_common()
internal_sorted = internal_function_counter.most_common()

# Debug Output: Display parsed subroutines, params, locals, and dims
print("\033[91mDeclared Subroutines with Params, Locals, and Dims:\033[0m")
for sub_name, sub_data in subs.items():
    params_str = f"({', '.join(sub_data['params'].keys())})" if sub_data["params"] else ""
    locals_str = f"local {', '.join(sub_data['locals'].keys())}" if sub_data["locals"] else ""
    dims_str = f"dim {', '.join(f'{dim}{sub_data['dims'][dim]['dimensions']}' for dim in sub_data['dims'])}" if sub_data["dims"] else ""
    print(f"{sub_name}{params_str}")
    if locals_str:
        print(f"    {locals_str}")
    if dims_str:
        print(f"    {dims_str}")
    print()  # Add empty line between subs for clarity

# Generate HTML report with lowercase, uppercase, and internal function sections
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Subroutine Usage Analysis</title>
    <style>
        body { font-family: Arial, sans-serif; }
        h1 { text-align: center; }
        table { width: 80%; margin: 20px auto; border-collapse: collapse; }
        th, td { padding: 8px 12px; border: 1px solid #ccc; text-align: left; }
        th { background-color: #f4f4f4; }
        .indent { padding-left: 20px; white-space: pre; }
    </style>
</head>
<body>
    <h1>Subroutine Usage Analysis</h1>
    <h2>Lowercase Subroutines</h2>
    <table>
        <tr><th>Type</th><th>Subroutine Details</th><th>Usage Count</th></tr>
"""

for (sub_name, sub_kind), count in lowercase_sorted:
    params_str = f"({', '.join(subs[sub_name]['params'].keys())})" if subs[sub_name]["params"] else ""
    locals_str = f"local {', '.join(subs[sub_name]['locals'].keys())}" if subs[sub_name]["locals"] else ""
    dims_str = f"dim {', '.join(f'{dim}{subs[sub_name]['dims'][dim]['dimensions']}' for dim in subs[sub_name]['dims'])}" if subs[sub_name]["dims"] else ""
    details = f"{escape(sub_name)}{params_str}"
    if locals_str:
        details += f"<br>&nbsp;&nbsp;&nbsp;&nbsp;{locals_str}"
    if dims_str:
        details += f"<br>&nbsp;&nbsp;&nbsp;&nbsp;{dims_str}"
    html_content += f"<tr><td>{sub_kind}</td><td class='indent'>{details}</td><td>{count}</td></tr>"

html_content += """
    </table>
    <h2>Uppercase Subroutines</h2>
    <table>
        <tr><th>Type</th><th>Subroutine Details</th><th>Usage Count</th></tr>
"""

for (sub_name, sub_kind), count in uppercase_sorted:
    params_str = f"({', '.join(subs[sub_name]['params'].keys())})" if subs[sub_name]["params"] else ""
    locals_str = f"local {', '.join(subs[sub_name]['locals'].keys())}" if subs[sub_name]["locals"] else ""
    dims_str = f"dim {', '.join(f'{dim}{subs[sub_name]['dims'][dim]['dimensions']}' for dim in subs[sub_name]['dims'])}" if subs[sub_name]["dims"] else ""
    details = f"{escape(sub_name)}{params_str}"
    if locals_str:
        details += f"<br>&nbsp;&nbsp;&nbsp;&nbsp;{locals_str}"
    if dims_str:
        details += f"<br>&nbsp;&nbsp;&nbsp;&nbsp;{dims_str}"
    html_content += f"<tr><td>{sub_kind}</td><td class='indent'>{details}</td><td>{count}</td></tr>"

html_content += """
    </table>
    <h2>Undeclared Internal Functions</h2>
    <table>
        <tr><th>Type</th><th>Function</th><th>Usage Count</th></tr>
"""

for (func_name, func_type), count in internal_sorted:
    html_content += f"<tr><td>{func_type}</td><td>{escape(func_name)}</td><td>{count}</td></tr>"

html_content += """
    </table>
</body>
</html>
"""

# Write HTML to file
output_file_path = os.path.join(os.getcwd(), "subroutine_usage_analysis.html")
with open(output_file_path, "w") as file:
    file.write(html_content)

print(f"HTML report '{output_file_path}' generated successfully.")
