OpenAI's Batch API provides a 50% cost reduction on all model pricing for requests that do not need real-time responses. You submit a JSONL file of requests, OpenAI processes them within 24 hours, and you pay half the standard per-token rate. For any workload that does not require an immediate response — data labeling, bulk analysis, nightly reports, content moderation — this is the most straightforward cost reduction available.
How the Batch API Works
The Batch API is not a different endpoint. It is the same models (GPT-4o, GPT-4o-mini, text-embedding-3-small, and others) with a different pricing model in exchange for relaxed latency requirements.
The workflow:
- Create a JSONL file where each line is one API request
- Upload the file to OpenAI's Files API
- Create a batch job referencing the uploaded file
- Poll the batch job status (or set a callback)
- When complete, download the results JSONL file
from openai import OpenAI
import json
client = OpenAI()
# Step 1: Create request JSONL
requests = [
{
"custom_id": "request-1",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "Classify the sentiment of the following text as positive, negative, or neutral."},
{"role": "user", "content": "The product arrived on time and works perfectly."}
],
"max_tokens": 10
}
},
{
"custom_id": "request-2",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": "gpt-4o-mini",
"messages": [
{"role": "system", "content": "Classify the sentiment of the following text as positive, negative, or neutral."},
{"role": "user", "content": "Terrible experience, would not recommend."}
],
"max_tokens": 10
}
}
]
# Write to JSONL file
with open("batch_requests.jsonl", "w") as f:
for req in requests:
f.write(json.dumps(req) + "\n")
# Step 2: Upload the file
with open("batch_requests.jsonl", "rb") as f:
batch_file = client.files.create(file=f, purpose="batch")
# Step 3: Create batch job
batch_job = client.batches.create(
input_file_id=batch_file.id,
endpoint="/v1/chat/completions",
completion_window="24h"
)
print(f"Batch job created: {batch_job.id}")
Checking Status and Retrieving Results
import time
def wait_for_batch(batch_id: str, poll_interval: int = 60):
while True:
batch = client.batches.retrieve(batch_id)
print(f"Status: {batch.status}, completed: {batch.request_counts.completed}/{batch.request_counts.total}")
if batch.status == "completed":
return batch
elif batch.status in ["failed", "expired", "cancelled"]:
raise Exception(f"Batch failed with status: {batch.status}")
time.sleep(poll_interval)
# Wait for completion (in production, use a scheduled job or webhook)
completed_batch = wait_for_batch(batch_job.id)
# Download results
result_file = client.files.content(completed_batch.output_file_id)
results = [json.loads(line) for line in result_file.text.strip().split("\n")]
for result in results:
custom_id = result["custom_id"]
response_content = result["response"]["body"]["choices"][0]["message"]["content"]
print(f"{custom_id}: {response_content}")