release: prepare v1.37.0 with excel-automation and capture-screen

This commit is contained in:
daymade
2026-03-02 20:01:18 +08:00
parent 2896870061
commit 4f07976825
16 changed files with 1629 additions and 18 deletions

View File

@@ -0,0 +1,259 @@
# /// script
# requires-python = ">=3.11"
# dependencies = ["openpyxl"]
# ///
"""
Create a professionally formatted Excel workbook with investment banking
standard styling.
Usage:
uv run scripts/create_formatted_excel.py [output_path]
This is a reusable template. Adapt the data section for your use case.
"""
import sys
from pathlib import Path
from openpyxl import Workbook
from openpyxl.styles import (
Alignment,
Border,
Font,
PatternFill,
Side,
)
from openpyxl.formatting.rule import ColorScaleRule
from openpyxl.utils import get_column_letter
# ── Color Palette (Investment Banking Standard) ──────────────────────
# Fonts
BLUE_FONT = Font(color="0000FF", size=10, name="Calibri")
BLUE_FONT_BOLD = Font(color="0000FF", size=10, name="Calibri", bold=True)
BLACK_FONT = Font(color="000000", size=10, name="Calibri")
BLACK_FONT_BOLD = Font(color="000000", size=10, name="Calibri", bold=True)
GREEN_FONT = Font(color="008000", size=10, name="Calibri")
GREEN_FONT_BOLD = Font(color="008000", size=10, name="Calibri", bold=True)
WHITE_FONT_BOLD = Font(color="FFFFFF", size=10, name="Calibri", bold=True)
HEADER_FONT = Font(color="FFFFFF", size=12, name="Calibri", bold=True)
TITLE_FONT = Font(color="1F4E79", size=14, name="Calibri", bold=True)
SUBTITLE_FONT = Font(color="404040", size=10, name="Calibri", italic=True)
# Fills
DARK_BLUE_FILL = PatternFill("solid", fgColor="4472C4")
LIGHT_BLUE_FILL = PatternFill("solid", fgColor="D9E1F2")
INPUT_GREEN_FILL = PatternFill("solid", fgColor="E2EFDA")
WHITE_FILL = PatternFill("solid", fgColor="FFFFFF")
LIGHT_GRAY_FILL = PatternFill("solid", fgColor="F2F2F2")
# Sensitivity gradient fills (manual, for when conditional formatting isn't suitable)
SENS_DEEP_RED = PatternFill("solid", fgColor="F4CCCC")
SENS_LIGHT_RED = PatternFill("solid", fgColor="FCE4D6")
SENS_NEUTRAL = PatternFill("solid", fgColor="FFF2CC")
SENS_LIGHT_GREEN = PatternFill("solid", fgColor="D9EAD3")
SENS_DEEP_GREEN = PatternFill("solid", fgColor="B6D7A8")
# Borders
THIN_BORDER = Border(bottom=Side(style="thin", color="B2B2B2"))
BOTTOM_MEDIUM = Border(bottom=Side(style="medium", color="000000"))
BOTTOM_DOUBLE = Border(bottom=Side(style="double", color="000000"))
ALL_THIN = Border(
left=Side(style="thin", color="B2B2B2"),
right=Side(style="thin", color="B2B2B2"),
top=Side(style="thin", color="B2B2B2"),
bottom=Side(style="thin", color="B2B2B2"),
)
# Alignment
CENTER = Alignment(horizontal="center", vertical="center")
RIGHT = Alignment(horizontal="right", vertical="center")
LEFT = Alignment(horizontal="left", vertical="center")
# ── Helper Functions ─────────────────────────────────────────────────
def apply_header_row(ws, row, labels, start_col=1):
"""Apply dark blue header styling to a row of labels."""
for i, label in enumerate(labels):
cell = ws.cell(row=row, column=start_col + i, value=label)
cell.font = WHITE_FONT_BOLD
cell.fill = DARK_BLUE_FILL
cell.alignment = CENTER
def apply_data_row(ws, row, values, start_col=1, font=None, number_format=None,
fill=None, border=None):
"""Write a row of values with consistent formatting."""
font = font or BLACK_FONT
for i, val in enumerate(values):
cell = ws.cell(row=row, column=start_col + i, value=val)
cell.font = font
if number_format:
cell.number_format = number_format
if fill:
cell.fill = fill
if border:
cell.border = border
cell.alignment = RIGHT if isinstance(val, (int, float)) else LEFT
def apply_input_cell(ws, row, col, value, number_format=None):
"""Style a cell as user input (blue font, green fill)."""
cell = ws.cell(row=row, column=col, value=value)
cell.font = BLUE_FONT
cell.fill = INPUT_GREEN_FILL
if number_format:
cell.number_format = number_format
return cell
def add_sensitivity_table(ws, start_row, start_col, row_header, col_header,
row_values, col_values, data_matrix):
"""
Create a sensitivity table with conditional formatting.
Args:
ws: Worksheet
start_row/start_col: Top-left corner of the table
row_header/col_header: Labels for the axes
row_values: List of values for rows (e.g., WACC rates)
col_values: List of values for columns (e.g., growth rates)
data_matrix: 2D list of result values
"""
# Column header label
ws.cell(row=start_row, column=start_col + 1, value=col_header).font = BLACK_FONT_BOLD
# Column values
for j, cv in enumerate(col_values):
cell = ws.cell(row=start_row, column=start_col + 1 + j, value=cv)
cell.font = BLUE_FONT_BOLD
cell.alignment = CENTER
# Row header label
ws.cell(row=start_row + 1, column=start_col, value=row_header).font = BLACK_FONT_BOLD
# Data cells
for i, rv in enumerate(row_values):
# Row label
cell = ws.cell(row=start_row + 1 + i, column=start_col, value=rv)
cell.font = BLUE_FONT_BOLD
cell.alignment = CENTER
for j, dv in enumerate(data_matrix[i]):
cell = ws.cell(row=start_row + 1 + i, column=start_col + 1 + j, value=dv)
cell.font = BLACK_FONT
cell.number_format = '$#,##0'
cell.alignment = CENTER
cell.border = ALL_THIN
# Apply conditional formatting (red-yellow-green gradient)
data_range = (
f"{get_column_letter(start_col + 1)}{start_row + 1}:"
f"{get_column_letter(start_col + len(col_values))}{start_row + len(row_values)}"
)
rule = ColorScaleRule(
start_type="min", start_color="F8696B",
mid_type="percentile", mid_value=50, mid_color="FFEB84",
end_type="max", end_color="63BE7B",
)
ws.conditional_formatting.add(data_range, rule)
def auto_column_widths(ws, min_width=10, max_width=20):
"""Auto-adjust column widths based on content.
CJK characters occupy ~2 character widths in Excel, so we count them
as 2 instead of 1 to avoid truncated columns.
"""
for col_cells in ws.columns:
max_len = 0
col_letter = get_column_letter(col_cells[0].column)
for cell in col_cells:
if cell.value:
s = str(cell.value)
# CJK chars (U+4E00U+9FFF, fullwidth, etc.) occupy ~2 widths
width = sum(2 if '\u4e00' <= c <= '\u9fff' or
'\u3000' <= c <= '\u303f' or
'\uff00' <= c <= '\uffef' else 1
for c in s)
max_len = max(max_len, width)
ws.column_dimensions[col_letter].width = max(min_width, min(max_len + 2, max_width))
# ── Example: Create a DCF Summary ───────────────────────────────────
def create_example_workbook(output_path: str):
"""Create an example professionally formatted Excel workbook."""
wb = Workbook()
ws = wb.active
ws.title = "DCF Summary"
# Title
ws.cell(row=1, column=1, value="DCF Valuation Summary").font = TITLE_FONT
ws.cell(row=2, column=1, value="Example Company — Base Case").font = SUBTITLE_FONT
# Key assumptions header
apply_header_row(ws, 4, ["Parameter", "Value", "Source"])
# Key assumptions data
assumptions = [
("WACC", 0.10, "Calculated"),
("Terminal Growth Rate", 0.03, "Assumption"),
("Shares Outstanding (M)", 2580, "10-K Filing"),
("Net Debt ($M)", 28000, "Balance Sheet"),
]
for i, (param, value, source) in enumerate(assumptions):
r = 5 + i
ws.cell(row=r, column=1, value=param).font = BLACK_FONT
apply_input_cell(ws, r, 2, value,
number_format='0.0%' if isinstance(value, float) and value < 1 else '#,##0')
ws.cell(row=r, column=3, value=source).font = GREEN_FONT
# Separator
for col in range(1, 4):
ws.cell(row=9, column=col).border = BOTTOM_MEDIUM
# Valuation output
ws.cell(row=10, column=1, value="Implied Share Price").font = BLACK_FONT_BOLD
cell = ws.cell(row=10, column=2, value=580)
cell.font = BLACK_FONT_BOLD
cell.number_format = '$#,##0'
cell.border = BOTTOM_DOUBLE
# Sensitivity table
ws.cell(row=12, column=1, value="Sensitivity Analysis").font = TITLE_FONT
wacc_values = [0.08, 0.09, 0.10, 0.11, 0.12]
growth_values = [0.01, 0.02, 0.03, 0.04, 0.05]
# Example data matrix (WACC rows x Growth cols)
data_matrix = [
[720, 780, 850, 940, 1050],
[640, 690, 740, 800, 870],
[570, 610, 650, 700, 750],
[510, 540, 580, 620, 660],
[460, 490, 520, 550, 580],
]
add_sensitivity_table(
ws, start_row=14, start_col=1,
row_header="WACC", col_header="Terminal Growth Rate",
row_values=wacc_values, col_values=growth_values,
data_matrix=data_matrix,
)
# Format WACC/growth as percentages
for r in range(15, 20):
ws.cell(row=r, column=1).number_format = '0.0%'
for c in range(2, 7):
ws.cell(row=14, column=c).number_format = '0.0%'
auto_column_widths(ws)
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
print(f"Created: {output_path}")
if __name__ == "__main__":
output = sys.argv[1] if len(sys.argv) > 1 else "example_output.xlsx"
create_example_workbook(output)

View File

@@ -0,0 +1,278 @@
# /// script
# requires-python = ">=3.11"
# dependencies = []
# ///
"""
Parse complex xlsx/xlsm files using stdlib zipfile + xml.etree.
No external dependencies required — uses only Python standard library.
Usage:
uv run scripts/parse_complex_excel.py <excel_file> [sheet_name]
This handles files that openpyxl cannot open (corrupted DefinedNames,
complex VBA macros, investment bank financial models).
"""
import json
import re
import subprocess
import sys
import xml.etree.ElementTree as ET
import zipfile
from pathlib import Path
# XML namespaces used in Office Open XML
MAIN_NS = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
REL_NS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
RELS_NS = 'http://schemas.openxmlformats.org/package/2006/relationships'
def verify_format(file_path: str) -> str:
"""Verify actual file format using the `file` command."""
result = subprocess.run(
['file', '--brief', file_path],
capture_output=True, text=True
)
return result.stdout.strip()
def list_sheets(zf: zipfile.ZipFile) -> list[dict]:
"""List all sheet names and their physical XML paths."""
wb_xml = ET.fromstring(zf.read('xl/workbook.xml'))
sheets_el = wb_xml.findall(f'.//{{{MAIN_NS}}}sheet')
rels_xml = ET.fromstring(zf.read('xl/_rels/workbook.xml.rels'))
rid_to_path = {}
for rel in rels_xml.findall(f'{{{RELS_NS}}}Relationship'):
rid_to_path[rel.get('Id')] = 'xl/' + rel.get('Target')
sheets = []
for s in sheets_el:
name = s.get('name')
rid = s.get(f'{{{REL_NS}}}id')
path = rid_to_path.get(rid, '?')
sheets.append({'name': name, 'rId': rid, 'path': path})
return sheets
def get_sheet_path(zf: zipfile.ZipFile, sheet_name: str) -> str:
"""Resolve a sheet name to its physical XML path inside the ZIP."""
# Step 1: workbook.xml — find rId for the named sheet
wb_xml = ET.fromstring(zf.read('xl/workbook.xml'))
sheets = wb_xml.findall(f'.//{{{MAIN_NS}}}sheet')
rid = None
for s in sheets:
if s.get('name') == sheet_name:
rid = s.get(f'{{{REL_NS}}}id')
break
if not rid:
available = [s.get('name') for s in sheets]
raise ValueError(
f"Sheet '{sheet_name}' not found. Available: {available}"
)
# Step 2: workbook.xml.rels — map rId to file path
rels_xml = ET.fromstring(zf.read('xl/_rels/workbook.xml.rels'))
for rel in rels_xml.findall(f'{{{RELS_NS}}}Relationship'):
if rel.get('Id') == rid:
return 'xl/' + rel.get('Target')
raise ValueError(f"No file mapping for {rid}")
def build_shared_strings(zf: zipfile.ZipFile) -> list[str]:
"""Build the shared strings lookup table."""
shared = []
try:
ss_xml = ET.fromstring(zf.read('xl/sharedStrings.xml'))
for si in ss_xml.findall(f'{{{MAIN_NS}}}si'):
shared.append(''.join(si.itertext()))
except KeyError:
pass # No shared strings in this file
return shared
def parse_cell_ref(ref: str) -> tuple[str, int]:
"""Parse 'AB123' into ('AB', 123)."""
match = re.match(r'^([A-Z]+)(\d+)$', ref)
if not match:
return ref, 0
return match.group(1), int(match.group(2))
def extract_cells(zf: zipfile.ZipFile, sheet_path: str,
shared: list[str]) -> dict[str, any]:
"""Extract all cell values from a sheet XML."""
sheet_xml = ET.fromstring(zf.read(sheet_path))
rows = sheet_xml.findall(f'.//{{{MAIN_NS}}}row')
data = {}
for row in rows:
for cell in row.findall(f'{{{MAIN_NS}}}c'):
ref = cell.get('r')
cell_type = cell.get('t') # "s" = shared string, None = number
val_el = cell.find(f'{{{MAIN_NS}}}v')
if val_el is not None and val_el.text:
if cell_type == 's':
idx = int(val_el.text)
data[ref] = shared[idx] if idx < len(shared) else f'[SSI:{idx}]'
elif cell_type == 'b':
data[ref] = bool(int(val_el.text))
else:
try:
num = float(val_el.text)
data[ref] = int(num) if num == int(num) else num
except ValueError:
data[ref] = val_el.text
return data
def extract_rows(cells: dict, start_row: int = 1,
end_row: int | None = None) -> list[dict]:
"""Organize cells into row-based structure for easier consumption."""
# Determine row range
all_rows = set()
for ref in cells:
_, row_num = parse_cell_ref(ref)
if row_num > 0:
all_rows.add(row_num)
if not all_rows:
return []
start = max(start_row, min(all_rows))
end = min(end_row, max(all_rows)) if end_row else max(all_rows)
rows = []
for r in range(start, end + 1):
row_cells = {
ref: val for ref, val in cells.items()
if parse_cell_ref(ref)[1] == r
}
if row_cells:
rows.append({'row': r, 'cells': row_cells})
return rows
def fix_defined_names(input_path: str, output_path: str) -> int:
"""
Remove corrupted DefinedNames entries (containing "Formula removed")
and repackage the file.
Returns the number of removed entries.
"""
import shutil
import tempfile
with tempfile.TemporaryDirectory() as tmp_str:
tmp = Path(tmp_str)
# Extract
with zipfile.ZipFile(input_path, 'r') as zf:
zf.extractall(tmp)
# Fix workbook.xml
wb_path = tmp / 'xl' / 'workbook.xml'
tree = ET.parse(wb_path)
root = tree.getroot()
ns = {'main': MAIN_NS}
defined_names = root.find('.//main:definedNames', ns)
removed = 0
if defined_names is not None:
for name in list(defined_names):
if name.text and "Formula removed" in name.text:
defined_names.remove(name)
removed += 1
tree.write(wb_path, encoding='utf-8', xml_declaration=True)
# Repackage
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
for fp in tmp.rglob('*'):
if fp.is_file():
zf.write(fp, fp.relative_to(tmp))
return removed
# ── CLI Entry Point ──────────────────────────────────────────────────
def main():
if len(sys.argv) < 2:
print("Usage: parse_complex_excel.py <excel_file> [sheet_name]")
print("\nExamples:")
print(" parse_complex_excel.py model.xlsm # List all sheets")
print(" parse_complex_excel.py model.xlsm DCF # Extract DCF sheet")
print(" parse_complex_excel.py model.xlsm --fix # Fix corrupted names")
sys.exit(1)
file_path = sys.argv[1]
path = Path(file_path)
if not path.exists():
print(f"File not found: {file_path}")
sys.exit(1)
# Verify format
fmt = verify_format(file_path)
print(f"File: {path.name}")
print(f"Format: {fmt}")
# "Microsoft Excel 2007+" = ZIP-based xlsx/xlsm
# "Zip archive" = generic ZIP (also valid)
# "Composite Document File" = old BIFF .xls format
is_zip_based = any(kw in fmt.lower() for kw in ['zip', 'excel 2007', 'ooxml'])
if not is_zip_based:
print("WARNING: File is not ZIP-based xlsx/xlsm.")
if 'composite' in fmt.lower() or 'biff' in fmt.lower():
print("This appears to be an old .xls (BIFF format). Use xlrd instead.")
else:
print(f"Unexpected format. If it should be xlsx/xlsm, check the file.")
sys.exit(1)
# Handle --fix flag
if len(sys.argv) > 2 and sys.argv[2] == '--fix':
out_path = str(path.with_stem(path.stem + '_fixed'))
removed = fix_defined_names(file_path, out_path)
print(f"Removed {removed} corrupted DefinedNames entries.")
print(f"Fixed file: {out_path}")
sys.exit(0)
with zipfile.ZipFile(file_path, 'r') as zf:
# List sheets
sheets = list_sheets(zf)
print(f"\nSheets ({len(sheets)}):")
for i, s in enumerate(sheets, 1):
print(f" {i}. {s['name']}{s['path']}")
# If sheet name given, extract it
if len(sys.argv) > 2:
sheet_name = sys.argv[2]
print(f"\nExtracting sheet: {sheet_name}")
sheet_path = get_sheet_path(zf, sheet_name)
shared = build_shared_strings(zf)
cells = extract_cells(zf, sheet_path, shared)
print(f"Total cells: {len(cells)}")
# Show first 20 rows
rows = extract_rows(cells, start_row=1, end_row=20)
for row in rows:
print(f" Row {row['row']:3d}: ", end="")
items = sorted(row['cells'].items(),
key=lambda x: parse_cell_ref(x[0]))
for ref, val in items[:8]:
val_str = str(val)[:25]
print(f"{ref}={val_str} ", end="")
print()
if __name__ == "__main__":
main()