Loading...
Loading...
Process PDF files - extract text, create PDFs, merge documents. Use when user asks to read PDF, create PDF, or work with PDF files.
npx skill4agent add shareai-lab/learn-claude-code pdf# Using pdftotext (poppler-utils)
pdftotext input.pdf - # Output to stdout
pdftotext input.pdf output.txt # Output to file
# If pdftotext not available, try:
python3 -c "
import fitz # PyMuPDF
doc = fitz.open('input.pdf')
for page in doc:
print(page.get_text())
"import fitz # pip install pymupdf
doc = fitz.open("input.pdf")
print(f"Pages: {len(doc)}")
print(f"Metadata: {doc.metadata}")
for i, page in enumerate(doc):
text = page.get_text()
print(f"--- Page {i+1} ---")
print(text)# Using pandoc
pandoc input.md -o output.pdf
# With custom styling
pandoc input.md -o output.pdf --pdf-engine=xelatex -V geometry:margin=1infrom reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
c = canvas.Canvas("output.pdf", pagesize=letter)
c.drawString(100, 750, "Hello, PDF!")
c.save()# Using wkhtmltopdf
wkhtmltopdf input.html output.pdf
# Or with Python
python3 -c "
import pdfkit
pdfkit.from_file('input.html', 'output.pdf')
"import fitz
result = fitz.open()
for pdf_path in ["file1.pdf", "file2.pdf", "file3.pdf"]:
doc = fitz.open(pdf_path)
result.insert_pdf(doc)
result.save("merged.pdf")import fitz
doc = fitz.open("input.pdf")
for i in range(len(doc)):
single = fitz.open()
single.insert_pdf(doc, from_page=i, to_page=i)
single.save(f"page_{i+1}.pdf")| Task | Library | Install |
|---|---|---|
| Read/Write/Merge | PyMuPDF | |
| Create from scratch | ReportLab | |
| HTML to PDF | pdfkit | |
| Text extraction | pdftotext | |
pytesseract