release: prepare v1.37.0 with excel-automation and capture-screen

2026-03-02 20:01:18 +08:00
parent 2896870061
commit 4f07976825
16 changed files with 1629 additions and 18 deletions
--- a/excel-automation/scripts/create_formatted_excel.py
+++ b/excel-automation/scripts/create_formatted_excel.py
@@ -0,0 +1,259 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = ["openpyxl"]
+# ///
+"""
+Create a professionally formatted Excel workbook with investment banking
+standard styling.
+
+Usage:
+    uv run scripts/create_formatted_excel.py [output_path]
+
+This is a reusable template. Adapt the data section for your use case.
+"""
+
+import sys
+from pathlib import Path
+from openpyxl import Workbook
+from openpyxl.styles import (
+    Alignment,
+    Border,
+    Font,
+    PatternFill,
+    Side,
+)
+from openpyxl.formatting.rule import ColorScaleRule
+from openpyxl.utils import get_column_letter
+
+
+# ── Color Palette (Investment Banking Standard) ──────────────────────
+
+# Fonts
+BLUE_FONT = Font(color="0000FF", size=10, name="Calibri")
+BLUE_FONT_BOLD = Font(color="0000FF", size=10, name="Calibri", bold=True)
+BLACK_FONT = Font(color="000000", size=10, name="Calibri")
+BLACK_FONT_BOLD = Font(color="000000", size=10, name="Calibri", bold=True)
+GREEN_FONT = Font(color="008000", size=10, name="Calibri")
+GREEN_FONT_BOLD = Font(color="008000", size=10, name="Calibri", bold=True)
+WHITE_FONT_BOLD = Font(color="FFFFFF", size=10, name="Calibri", bold=True)
+HEADER_FONT = Font(color="FFFFFF", size=12, name="Calibri", bold=True)
+TITLE_FONT = Font(color="1F4E79", size=14, name="Calibri", bold=True)
+SUBTITLE_FONT = Font(color="404040", size=10, name="Calibri", italic=True)
+
+# Fills
+DARK_BLUE_FILL = PatternFill("solid", fgColor="4472C4")
+LIGHT_BLUE_FILL = PatternFill("solid", fgColor="D9E1F2")
+INPUT_GREEN_FILL = PatternFill("solid", fgColor="E2EFDA")
+WHITE_FILL = PatternFill("solid", fgColor="FFFFFF")
+LIGHT_GRAY_FILL = PatternFill("solid", fgColor="F2F2F2")
+
+# Sensitivity gradient fills (manual, for when conditional formatting isn't suitable)
+SENS_DEEP_RED = PatternFill("solid", fgColor="F4CCCC")
+SENS_LIGHT_RED = PatternFill("solid", fgColor="FCE4D6")
+SENS_NEUTRAL = PatternFill("solid", fgColor="FFF2CC")
+SENS_LIGHT_GREEN = PatternFill("solid", fgColor="D9EAD3")
+SENS_DEEP_GREEN = PatternFill("solid", fgColor="B6D7A8")
+
+# Borders
+THIN_BORDER = Border(bottom=Side(style="thin", color="B2B2B2"))
+BOTTOM_MEDIUM = Border(bottom=Side(style="medium", color="000000"))
+BOTTOM_DOUBLE = Border(bottom=Side(style="double", color="000000"))
+ALL_THIN = Border(
+    left=Side(style="thin", color="B2B2B2"),
+    right=Side(style="thin", color="B2B2B2"),
+    top=Side(style="thin", color="B2B2B2"),
+    bottom=Side(style="thin", color="B2B2B2"),
+)
+
+# Alignment
+CENTER = Alignment(horizontal="center", vertical="center")
+RIGHT = Alignment(horizontal="right", vertical="center")
+LEFT = Alignment(horizontal="left", vertical="center")
+
+
+# ── Helper Functions ─────────────────────────────────────────────────
+
+def apply_header_row(ws, row, labels, start_col=1):
+    """Apply dark blue header styling to a row of labels."""
+    for i, label in enumerate(labels):
+        cell = ws.cell(row=row, column=start_col + i, value=label)
+        cell.font = WHITE_FONT_BOLD
+        cell.fill = DARK_BLUE_FILL
+        cell.alignment = CENTER
+
+
+def apply_data_row(ws, row, values, start_col=1, font=None, number_format=None,
+                   fill=None, border=None):
+    """Write a row of values with consistent formatting."""
+    font = font or BLACK_FONT
+    for i, val in enumerate(values):
+        cell = ws.cell(row=row, column=start_col + i, value=val)
+        cell.font = font
+        if number_format:
+            cell.number_format = number_format
+        if fill:
+            cell.fill = fill
+        if border:
+            cell.border = border
+        cell.alignment = RIGHT if isinstance(val, (int, float)) else LEFT
+
+
+def apply_input_cell(ws, row, col, value, number_format=None):
+    """Style a cell as user input (blue font, green fill)."""
+    cell = ws.cell(row=row, column=col, value=value)
+    cell.font = BLUE_FONT
+    cell.fill = INPUT_GREEN_FILL
+    if number_format:
+        cell.number_format = number_format
+    return cell
+
+
+def add_sensitivity_table(ws, start_row, start_col, row_header, col_header,
+                          row_values, col_values, data_matrix):
+    """
+    Create a sensitivity table with conditional formatting.
+
+    Args:
+        ws: Worksheet
+        start_row/start_col: Top-left corner of the table
+        row_header/col_header: Labels for the axes
+        row_values: List of values for rows (e.g., WACC rates)
+        col_values: List of values for columns (e.g., growth rates)
+        data_matrix: 2D list of result values
+    """
+    # Column header label
+    ws.cell(row=start_row, column=start_col + 1, value=col_header).font = BLACK_FONT_BOLD
+
+    # Column values
+    for j, cv in enumerate(col_values):
+        cell = ws.cell(row=start_row, column=start_col + 1 + j, value=cv)
+        cell.font = BLUE_FONT_BOLD
+        cell.alignment = CENTER
+
+    # Row header label
+    ws.cell(row=start_row + 1, column=start_col, value=row_header).font = BLACK_FONT_BOLD
+
+    # Data cells
+    for i, rv in enumerate(row_values):
+        # Row label
+        cell = ws.cell(row=start_row + 1 + i, column=start_col, value=rv)
+        cell.font = BLUE_FONT_BOLD
+        cell.alignment = CENTER
+
+        for j, dv in enumerate(data_matrix[i]):
+            cell = ws.cell(row=start_row + 1 + i, column=start_col + 1 + j, value=dv)
+            cell.font = BLACK_FONT
+            cell.number_format = '$#,##0'
+            cell.alignment = CENTER
+            cell.border = ALL_THIN
+
+    # Apply conditional formatting (red-yellow-green gradient)
+    data_range = (
+        f"{get_column_letter(start_col + 1)}{start_row + 1}:"
+        f"{get_column_letter(start_col + len(col_values))}{start_row + len(row_values)}"
+    )
+    rule = ColorScaleRule(
+        start_type="min", start_color="F8696B",
+        mid_type="percentile", mid_value=50, mid_color="FFEB84",
+        end_type="max", end_color="63BE7B",
+    )
+    ws.conditional_formatting.add(data_range, rule)
+
+
+def auto_column_widths(ws, min_width=10, max_width=20):
+    """Auto-adjust column widths based on content.
+
+    CJK characters occupy ~2 character widths in Excel, so we count them
+    as 2 instead of 1 to avoid truncated columns.
+    """
+    for col_cells in ws.columns:
+        max_len = 0
+        col_letter = get_column_letter(col_cells[0].column)
+        for cell in col_cells:
+            if cell.value:
+                s = str(cell.value)
+                # CJK chars (U+4E00–U+9FFF, fullwidth, etc.) occupy ~2 widths
+                width = sum(2 if '\u4e00' <= c <= '\u9fff' or
+                            '\u3000' <= c <= '\u303f' or
+                            '\uff00' <= c <= '\uffef' else 1
+                            for c in s)
+                max_len = max(max_len, width)
+        ws.column_dimensions[col_letter].width = max(min_width, min(max_len + 2, max_width))
+
+
+# ── Example: Create a DCF Summary ───────────────────────────────────
+
+def create_example_workbook(output_path: str):
+    """Create an example professionally formatted Excel workbook."""
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "DCF Summary"
+
+    # Title
+    ws.cell(row=1, column=1, value="DCF Valuation Summary").font = TITLE_FONT
+    ws.cell(row=2, column=1, value="Example Company — Base Case").font = SUBTITLE_FONT
+
+    # Key assumptions header
+    apply_header_row(ws, 4, ["Parameter", "Value", "Source"])
+
+    # Key assumptions data
+    assumptions = [
+        ("WACC", 0.10, "Calculated"),
+        ("Terminal Growth Rate", 0.03, "Assumption"),
+        ("Shares Outstanding (M)", 2580, "10-K Filing"),
+        ("Net Debt ($M)", 28000, "Balance Sheet"),
+    ]
+    for i, (param, value, source) in enumerate(assumptions):
+        r = 5 + i
+        ws.cell(row=r, column=1, value=param).font = BLACK_FONT
+        apply_input_cell(ws, r, 2, value,
+                         number_format='0.0%' if isinstance(value, float) and value < 1 else '#,##0')
+        ws.cell(row=r, column=3, value=source).font = GREEN_FONT
+
+    # Separator
+    for col in range(1, 4):
+        ws.cell(row=9, column=col).border = BOTTOM_MEDIUM
+
+    # Valuation output
+    ws.cell(row=10, column=1, value="Implied Share Price").font = BLACK_FONT_BOLD
+    cell = ws.cell(row=10, column=2, value=580)
+    cell.font = BLACK_FONT_BOLD
+    cell.number_format = '$#,##0'
+    cell.border = BOTTOM_DOUBLE
+
+    # Sensitivity table
+    ws.cell(row=12, column=1, value="Sensitivity Analysis").font = TITLE_FONT
+
+    wacc_values = [0.08, 0.09, 0.10, 0.11, 0.12]
+    growth_values = [0.01, 0.02, 0.03, 0.04, 0.05]
+    # Example data matrix (WACC rows x Growth cols)
+    data_matrix = [
+        [720, 780, 850, 940, 1050],
+        [640, 690, 740, 800, 870],
+        [570, 610, 650, 700, 750],
+        [510, 540, 580, 620, 660],
+        [460, 490, 520, 550, 580],
+    ]
+
+    add_sensitivity_table(
+        ws, start_row=14, start_col=1,
+        row_header="WACC", col_header="Terminal Growth Rate",
+        row_values=wacc_values, col_values=growth_values,
+        data_matrix=data_matrix,
+    )
+
+    # Format WACC/growth as percentages
+    for r in range(15, 20):
+        ws.cell(row=r, column=1).number_format = '0.0%'
+    for c in range(2, 7):
+        ws.cell(row=14, column=c).number_format = '0.0%'
+
+    auto_column_widths(ws)
+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+    wb.save(output_path)
+    print(f"Created: {output_path}")
+
+
+if __name__ == "__main__":
+    output = sys.argv[1] if len(sys.argv) > 1 else "example_output.xlsx"
+    create_example_workbook(output)
--- a/excel-automation/scripts/parse_complex_excel.py
+++ b/excel-automation/scripts/parse_complex_excel.py
@@ -0,0 +1,278 @@
+# /// script
+# requires-python = ">=3.11"
+# dependencies = []
+# ///
+"""
+Parse complex xlsx/xlsm files using stdlib zipfile + xml.etree.
+
+No external dependencies required — uses only Python standard library.
+
+Usage:
+    uv run scripts/parse_complex_excel.py <excel_file> [sheet_name]
+
+This handles files that openpyxl cannot open (corrupted DefinedNames,
+complex VBA macros, investment bank financial models).
+"""
+
+import json
+import re
+import subprocess
+import sys
+import xml.etree.ElementTree as ET
+import zipfile
+from pathlib import Path
+
+# XML namespaces used in Office Open XML
+MAIN_NS = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
+REL_NS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
+RELS_NS = 'http://schemas.openxmlformats.org/package/2006/relationships'
+
+
+def verify_format(file_path: str) -> str:
+    """Verify actual file format using the `file` command."""
+    result = subprocess.run(
+        ['file', '--brief', file_path],
+        capture_output=True, text=True
+    )
+    return result.stdout.strip()
+
+
+def list_sheets(zf: zipfile.ZipFile) -> list[dict]:
+    """List all sheet names and their physical XML paths."""
+    wb_xml = ET.fromstring(zf.read('xl/workbook.xml'))
+    sheets_el = wb_xml.findall(f'.//{{{MAIN_NS}}}sheet')
+
+    rels_xml = ET.fromstring(zf.read('xl/_rels/workbook.xml.rels'))
+    rid_to_path = {}
+    for rel in rels_xml.findall(f'{{{RELS_NS}}}Relationship'):
+        rid_to_path[rel.get('Id')] = 'xl/' + rel.get('Target')
+
+    sheets = []
+    for s in sheets_el:
+        name = s.get('name')
+        rid = s.get(f'{{{REL_NS}}}id')
+        path = rid_to_path.get(rid, '?')
+        sheets.append({'name': name, 'rId': rid, 'path': path})
+
+    return sheets
+
+
+def get_sheet_path(zf: zipfile.ZipFile, sheet_name: str) -> str:
+    """Resolve a sheet name to its physical XML path inside the ZIP."""
+    # Step 1: workbook.xml — find rId for the named sheet
+    wb_xml = ET.fromstring(zf.read('xl/workbook.xml'))
+    sheets = wb_xml.findall(f'.//{{{MAIN_NS}}}sheet')
+    rid = None
+    for s in sheets:
+        if s.get('name') == sheet_name:
+            rid = s.get(f'{{{REL_NS}}}id')
+            break
+    if not rid:
+        available = [s.get('name') for s in sheets]
+        raise ValueError(
+            f"Sheet '{sheet_name}' not found. Available: {available}"
+        )
+
+    # Step 2: workbook.xml.rels — map rId to file path
+    rels_xml = ET.fromstring(zf.read('xl/_rels/workbook.xml.rels'))
+    for rel in rels_xml.findall(f'{{{RELS_NS}}}Relationship'):
+        if rel.get('Id') == rid:
+            return 'xl/' + rel.get('Target')
+
+    raise ValueError(f"No file mapping for {rid}")
+
+
+def build_shared_strings(zf: zipfile.ZipFile) -> list[str]:
+    """Build the shared strings lookup table."""
+    shared = []
+    try:
+        ss_xml = ET.fromstring(zf.read('xl/sharedStrings.xml'))
+        for si in ss_xml.findall(f'{{{MAIN_NS}}}si'):
+            shared.append(''.join(si.itertext()))
+    except KeyError:
+        pass  # No shared strings in this file
+    return shared
+
+
+def parse_cell_ref(ref: str) -> tuple[str, int]:
+    """Parse 'AB123' into ('AB', 123)."""
+    match = re.match(r'^([A-Z]+)(\d+)$', ref)
+    if not match:
+        return ref, 0
+    return match.group(1), int(match.group(2))
+
+
+def extract_cells(zf: zipfile.ZipFile, sheet_path: str,
+                  shared: list[str]) -> dict[str, any]:
+    """Extract all cell values from a sheet XML."""
+    sheet_xml = ET.fromstring(zf.read(sheet_path))
+    rows = sheet_xml.findall(f'.//{{{MAIN_NS}}}row')
+
+    data = {}
+    for row in rows:
+        for cell in row.findall(f'{{{MAIN_NS}}}c'):
+            ref = cell.get('r')
+            cell_type = cell.get('t')  # "s" = shared string, None = number
+            val_el = cell.find(f'{{{MAIN_NS}}}v')
+
+            if val_el is not None and val_el.text:
+                if cell_type == 's':
+                    idx = int(val_el.text)
+                    data[ref] = shared[idx] if idx < len(shared) else f'[SSI:{idx}]'
+                elif cell_type == 'b':
+                    data[ref] = bool(int(val_el.text))
+                else:
+                    try:
+                        num = float(val_el.text)
+                        data[ref] = int(num) if num == int(num) else num
+                    except ValueError:
+                        data[ref] = val_el.text
+
+    return data
+
+
+def extract_rows(cells: dict, start_row: int = 1,
+                 end_row: int | None = None) -> list[dict]:
+    """Organize cells into row-based structure for easier consumption."""
+    # Determine row range
+    all_rows = set()
+    for ref in cells:
+        _, row_num = parse_cell_ref(ref)
+        if row_num > 0:
+            all_rows.add(row_num)
+
+    if not all_rows:
+        return []
+
+    start = max(start_row, min(all_rows))
+    end = min(end_row, max(all_rows)) if end_row else max(all_rows)
+
+    rows = []
+    for r in range(start, end + 1):
+        row_cells = {
+            ref: val for ref, val in cells.items()
+            if parse_cell_ref(ref)[1] == r
+        }
+        if row_cells:
+            rows.append({'row': r, 'cells': row_cells})
+
+    return rows
+
+
+def fix_defined_names(input_path: str, output_path: str) -> int:
+    """
+    Remove corrupted DefinedNames entries (containing "Formula removed")
+    and repackage the file.
+
+    Returns the number of removed entries.
+    """
+    import shutil
+    import tempfile
+
+    with tempfile.TemporaryDirectory() as tmp_str:
+        tmp = Path(tmp_str)
+
+        # Extract
+        with zipfile.ZipFile(input_path, 'r') as zf:
+            zf.extractall(tmp)
+
+        # Fix workbook.xml
+        wb_path = tmp / 'xl' / 'workbook.xml'
+        tree = ET.parse(wb_path)
+        root = tree.getroot()
+
+        ns = {'main': MAIN_NS}
+        defined_names = root.find('.//main:definedNames', ns)
+        removed = 0
+        if defined_names is not None:
+            for name in list(defined_names):
+                if name.text and "Formula removed" in name.text:
+                    defined_names.remove(name)
+                    removed += 1
+
+        tree.write(wb_path, encoding='utf-8', xml_declaration=True)
+
+        # Repackage
+        with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+            for fp in tmp.rglob('*'):
+                if fp.is_file():
+                    zf.write(fp, fp.relative_to(tmp))
+
+    return removed
+
+
+# ── CLI Entry Point ──────────────────────────────────────────────────
+
+def main():
+    if len(sys.argv) < 2:
+        print("Usage: parse_complex_excel.py <excel_file> [sheet_name]")
+        print("\nExamples:")
+        print("  parse_complex_excel.py model.xlsm           # List all sheets")
+        print("  parse_complex_excel.py model.xlsm DCF       # Extract DCF sheet")
+        print("  parse_complex_excel.py model.xlsm --fix     # Fix corrupted names")
+        sys.exit(1)
+
+    file_path = sys.argv[1]
+    path = Path(file_path)
+
+    if not path.exists():
+        print(f"File not found: {file_path}")
+        sys.exit(1)
+
+    # Verify format
+    fmt = verify_format(file_path)
+    print(f"File: {path.name}")
+    print(f"Format: {fmt}")
+
+    # "Microsoft Excel 2007+" = ZIP-based xlsx/xlsm
+    # "Zip archive" = generic ZIP (also valid)
+    # "Composite Document File" = old BIFF .xls format
+    is_zip_based = any(kw in fmt.lower() for kw in ['zip', 'excel 2007', 'ooxml'])
+    if not is_zip_based:
+        print("WARNING: File is not ZIP-based xlsx/xlsm.")
+        if 'composite' in fmt.lower() or 'biff' in fmt.lower():
+            print("This appears to be an old .xls (BIFF format). Use xlrd instead.")
+        else:
+            print(f"Unexpected format. If it should be xlsx/xlsm, check the file.")
+        sys.exit(1)
+
+    # Handle --fix flag
+    if len(sys.argv) > 2 and sys.argv[2] == '--fix':
+        out_path = str(path.with_stem(path.stem + '_fixed'))
+        removed = fix_defined_names(file_path, out_path)
+        print(f"Removed {removed} corrupted DefinedNames entries.")
+        print(f"Fixed file: {out_path}")
+        sys.exit(0)
+
+    with zipfile.ZipFile(file_path, 'r') as zf:
+        # List sheets
+        sheets = list_sheets(zf)
+        print(f"\nSheets ({len(sheets)}):")
+        for i, s in enumerate(sheets, 1):
+            print(f"  {i}. {s['name']} → {s['path']}")
+
+        # If sheet name given, extract it
+        if len(sys.argv) > 2:
+            sheet_name = sys.argv[2]
+            print(f"\nExtracting sheet: {sheet_name}")
+
+            sheet_path = get_sheet_path(zf, sheet_name)
+            shared = build_shared_strings(zf)
+            cells = extract_cells(zf, sheet_path, shared)
+
+            print(f"Total cells: {len(cells)}")
+
+            # Show first 20 rows
+            rows = extract_rows(cells, start_row=1, end_row=20)
+            for row in rows:
+                print(f"  Row {row['row']:3d}: ", end="")
+                items = sorted(row['cells'].items(),
+                               key=lambda x: parse_cell_ref(x[0]))
+                for ref, val in items[:8]:
+                    val_str = str(val)[:25]
+                    print(f"{ref}={val_str}  ", end="")
+                print()
+
+
+if __name__ == "__main__":
+    main()