Loading...
Loading...
Comprehensive spreadsheet creation, editing, and analysis with support for formulas, formatting, data analysis, and visualization. When Claude needs to work with spreadsheets (.xlsx, .xlsm, .csv, .tsv, etc) for: (1) Creating new spreadsheets with formulas and formatting, (2) Reading or analyzing data, (3) Modify existing spreadsheets while preserving formulas, (4) Data analysis and visualization in spreadsheets, or (5) Recalculating formulas
npx skill4agent add appautomaton/document-skills xlsxrecalc.pyimport pandas as pd
# Read Excel
df = pd.read_excel('file.xlsx') # Default: first sheet
all_sheets = pd.read_excel('file.xlsx', sheet_name=None) # All sheets as dict
# Analyze
df.head() # Preview data
df.info() # Column info
df.describe() # Statistics
# Write Excel
df.to_excel('output.xlsx', index=False)# Bad: Calculating in Python and hardcoding result
total = df['Sales'].sum()
sheet['B10'] = total # Hardcodes 5000
# Bad: Computing growth rate in Python
growth = (df.iloc[-1]['Revenue'] - df.iloc[0]['Revenue']) / df.iloc[0]['Revenue']
sheet['C5'] = growth # Hardcodes 0.15
# Bad: Python calculation for average
avg = sum(values) / len(values)
sheet['D20'] = avg # Hardcodes 42.5# Good: Let Excel calculate the sum
sheet['B10'] = '=SUM(B2:B9)'
# Good: Growth rate as Excel formula
sheet['C5'] = '=(C4-C2)/C2'
# Good: Average using Excel function
sheet['D20'] = '=AVERAGE(D2:D19)'python recalc.py output.xlsxstatuserrors_founderror_summary#REF!#DIV/0!#VALUE!#NAME?# Using openpyxl for formulas and formatting
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment
wb = Workbook()
sheet = wb.active
# Add data
sheet['A1'] = 'Hello'
sheet['B1'] = 'World'
sheet.append(['Row', 'of', 'data'])
# Add formula
sheet['B2'] = '=SUM(A1:A10)'
# Formatting
sheet['A1'].font = Font(bold=True, color='FF0000')
sheet['A1'].fill = PatternFill('solid', start_color='FFFF00')
sheet['A1'].alignment = Alignment(horizontal='center')
# Column width
sheet.column_dimensions['A'].width = 20
wb.save('output.xlsx')# Using openpyxl to preserve formulas and formatting
from openpyxl import load_workbook
# Load existing file
wb = load_workbook('existing.xlsx')
sheet = wb.active # or wb['SheetName'] for specific sheet
# Working with multiple sheets
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
print(f"Sheet: {sheet_name}")
# Modify cells
sheet['A1'] = 'New Value'
sheet.insert_rows(2) # Insert row at position 2
sheet.delete_cols(3) # Delete column 3
# Add new sheet
new_sheet = wb.create_sheet('NewSheet')
new_sheet['A1'] = 'Data'
wb.save('modified.xlsx')recalc.pypython recalc.py <excel_file> [timeout_seconds]python recalc.py output.xlsx 30pd.notna()/{
"status": "success", // or "errors_found"
"total_errors": 0, // Total error count
"total_formulas": 42, // Number of formulas in file
"error_summary": { // Only present if errors found
"#REF!": {
"count": 2,
"locations": ["Sheet1!B5", "Sheet1!C10"]
}
}
}data_only=Trueload_workbook('file.xlsx', data_only=True)data_only=Trueread_only=Truewrite_only=Truepd.read_excel('file.xlsx', dtype={'id': str})pd.read_excel('file.xlsx', usecols=['A', 'C', 'E'])pd.read_excel('file.xlsx', parse_dates=['date_column'])import pandas as pd
excel_file = pd.ExcelFile("workbook.xlsx")
for sheet_name in excel_file.sheet_names:
df = pd.read_excel(excel_file, sheet_name=sheet_name)
print(f"{sheet_name}: {len(df)} rows")import pandas as pd
df = pd.read_excel("sales_data.xlsx")
pivot = pd.pivot_table(
df,
values="sales",
index="region",
columns="product",
aggfunc="sum",
fill_value=0
)
pivot.to_excel("pivot_report.xlsx")df = pd.read_excel("sales.xlsx")
# Group and sum
sales_by_region = df.groupby("region")["sales"].sum()
# Multiple aggregations
summary = df.groupby("region").agg({
"sales": "sum",
"quantity": "mean",
"profit": ["min", "max"]
})# Simple filter
high_sales = df[df["sales"] > 10000]
# Multiple conditions
filtered = df[(df["region"] == "West") & (df["sales"] > 5000)]
# Calculate new columns
df["profit_margin"] = (df["revenue"] - df["cost"]) / df["revenue"]
# Sort
df_sorted = df.sort_values("sales", ascending=False)import pandas as pd
df = pd.read_excel("messy_data.xlsx")
# Remove duplicates
df = df.drop_duplicates()
# Handle missing values
df = df.fillna(0) # Fill with value
df = df.dropna() # Drop rows with missing values
df = df.dropna(subset=["important_col"]) # Drop only if specific column is null
# Remove whitespace from strings
df["name"] = df["name"].str.strip()
# Convert data types
df["date"] = pd.to_datetime(df["date"])
df["amount"] = pd.to_numeric(df["amount"], errors="coerce")
# Save cleaned data
df.to_excel("cleaned_data.xlsx", index=False)import pandas as pd
# Concatenate files vertically (stack rows)
df1 = pd.read_excel("sales_q1.xlsx")
df2 = pd.read_excel("sales_q2.xlsx")
combined = pd.concat([df1, df2], ignore_index=True)
# Merge on common column (like SQL JOIN)
customers = pd.read_excel("customers.xlsx")
sales = pd.read_excel("sales.xlsx")
merged = pd.merge(sales, customers, on="customer_id", how="left")
merged.to_excel("merged_data.xlsx", index=False)import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel("data.xlsx")
# Bar chart
df.plot(x="category", y="value", kind="bar")
plt.title("Sales by Category")
plt.xlabel("Category")
plt.ylabel("Sales")
plt.tight_layout()
plt.savefig("bar_chart.png")
plt.close()
# Pie chart
df.set_index("category")["value"].plot(kind="pie", autopct="%1.1f%%")
plt.title("Market Share")
plt.ylabel("")
plt.savefig("pie_chart.png")
plt.close()
# Line chart
df.plot(x="date", y="revenue", kind="line")
plt.savefig("trend.png")
plt.close()import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Font
df = pd.DataFrame({
"Product": ["A", "B", "C"],
"Sales": [100, 200, 150]
})
df.to_excel("formatted.xlsx", index=False)
wb = load_workbook("formatted.xlsx")
ws = wb.active
# Define fills
red_fill = PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid")
green_fill = PatternFill(start_color="00FF00", end_color="00FF00", fill_type="solid")
# Apply conditional formatting
for row in range(2, len(df) + 2):
cell = ws[f"B{row}"]
if cell.value < 150:
cell.fill = red_fill
else:
cell.fill = green_fill
# Bold headers
for cell in ws[1]:
cell.font = Font(bold=True)
wb.save("formatted.xlsx")import pandas as pd
# Read only specific columns
df = pd.read_excel("large.xlsx", usecols=["A", "C", "E"])
# Read in chunks for very large files
for chunk in pd.read_excel("huge.xlsx", chunksize=10000):
# Process each chunk
process(chunk)
# Specify dtypes to avoid inference overhead
df = pd.read_excel("data.xlsx", dtype={"id": str, "amount": float})
# For openpyxl with large files
from openpyxl import load_workbook
wb = load_workbook("large.xlsx", read_only=True) # Read-only modeimport pandas as pd
df = pd.DataFrame({"Product": ["Widget A", "Widget B"], "Sales": [100, 200]})
writer = pd.ExcelWriter("output.xlsx", engine="openpyxl")
df.to_excel(writer, sheet_name="Sales", index=False)
worksheet = writer.sheets["Sales"]
for column in worksheet.columns:
max_length = 0
column_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except:
pass
worksheet.column_dimensions[column_letter].width = max_length + 2
writer.close()