import base64
import os
import sys
from opperai import Opper
from pydantic import BaseModel, Field
opper = Opper(http_bearer=os.getenv("OPPER_API_KEY"))
class InvoiceData(BaseModel):
invoice_number: str = Field(description="The invoice number")
total_amount: float = Field(description="The total amount in dollars")
vendor_name: str = Field(description="The name of the vendor or company")
date: str = Field(description="The invoice date")
items: list = Field(description="List of items and their prices")
def extract_invoice_data(pdf_path: str) -> dict:
media_input = f"data:application/pdf;base64,{base64.b64encode(open(pdf_path, 'rb').read()).decode('utf-8')}"
response = opper.call(
name="extract_invoice",
instructions="Extract structured invoice data from the PDF document",
input={
"_opper_media_input": media_input,
},
output_schema=InvoiceData.model_json_schema()
)
return response.json_payload
def main():
if len(sys.argv) < 2:
print("Usage: python extract_invoice.py <path_to_pdf>")
return
pdf_path = sys.argv[1]
result = extract_invoice_data(pdf_path)
print(result)
if __name__ == "__main__":
main()
# Output:
# {
# 'invoice_number': 'INV-2024-001',
# 'total_amount': 1250.00,
# 'vendor_name': 'Tech Solutions Inc.',
# 'date': '2024-01-15',
# 'items': [
# {'item': 'Software License', 'price': 1000.00},
# {'item': 'Support Services', 'price': 250.00}
# ]
# }