- Add date_added to all 950+ skills for complete tracking - Update version to 6.5.0 in package.json and README - Regenerate all indexes and catalog - Sync all generated files Features from merged PR #150: - Stars/Upvotes system for community-driven discovery - Auto-update mechanism via START_APP.bat - Interactive Prompt Builder - Date tracking badges - Smart auto-categorization All skills validated and indexed. Made-with: Cursor
329 lines
9.0 KiB
Markdown
329 lines
9.0 KiB
Markdown
---
|
|
name: azure-ai-document-intelligence-ts
|
|
description: "Extract text, tables, and structured data from documents using Azure Document Intelligence (@azure-rest/ai-document-intelligence). Use when processing invoices, receipts, IDs, forms, or building cu..."
|
|
risk: unknown
|
|
source: community
|
|
date_added: "2026-02-27"
|
|
---
|
|
|
|
# Azure Document Intelligence REST SDK for TypeScript
|
|
|
|
Extract text, tables, and structured data from documents using prebuilt and custom models.
|
|
|
|
## Installation
|
|
|
|
```bash
|
|
npm install @azure-rest/ai-document-intelligence @azure/identity
|
|
```
|
|
|
|
## Environment Variables
|
|
|
|
```bash
|
|
DOCUMENT_INTELLIGENCE_ENDPOINT=https://<resource>.cognitiveservices.azure.com
|
|
DOCUMENT_INTELLIGENCE_API_KEY=<api-key>
|
|
```
|
|
|
|
## Authentication
|
|
|
|
**Important**: This is a REST client. `DocumentIntelligence` is a **function**, not a class.
|
|
|
|
### DefaultAzureCredential
|
|
|
|
```typescript
|
|
import DocumentIntelligence from "@azure-rest/ai-document-intelligence";
|
|
import { DefaultAzureCredential } from "@azure/identity";
|
|
|
|
const client = DocumentIntelligence(
|
|
process.env.DOCUMENT_INTELLIGENCE_ENDPOINT!,
|
|
new DefaultAzureCredential()
|
|
);
|
|
```
|
|
|
|
### API Key
|
|
|
|
```typescript
|
|
import DocumentIntelligence from "@azure-rest/ai-document-intelligence";
|
|
|
|
const client = DocumentIntelligence(
|
|
process.env.DOCUMENT_INTELLIGENCE_ENDPOINT!,
|
|
{ key: process.env.DOCUMENT_INTELLIGENCE_API_KEY! }
|
|
);
|
|
```
|
|
|
|
## Analyze Document (URL)
|
|
|
|
```typescript
|
|
import DocumentIntelligence, {
|
|
isUnexpected,
|
|
getLongRunningPoller,
|
|
AnalyzeOperationOutput
|
|
} from "@azure-rest/ai-document-intelligence";
|
|
|
|
const initialResponse = await client
|
|
.path("/documentModels/{modelId}:analyze", "prebuilt-layout")
|
|
.post({
|
|
contentType: "application/json",
|
|
body: {
|
|
urlSource: "https://example.com/document.pdf"
|
|
},
|
|
queryParameters: { locale: "en-US" }
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;
|
|
|
|
console.log("Pages:", result.analyzeResult?.pages?.length);
|
|
console.log("Tables:", result.analyzeResult?.tables?.length);
|
|
```
|
|
|
|
## Analyze Document (Local File)
|
|
|
|
```typescript
|
|
import { readFile } from "node:fs/promises";
|
|
|
|
const fileBuffer = await readFile("./document.pdf");
|
|
const base64Source = fileBuffer.toString("base64");
|
|
|
|
const initialResponse = await client
|
|
.path("/documentModels/{modelId}:analyze", "prebuilt-invoice")
|
|
.post({
|
|
contentType: "application/json",
|
|
body: { base64Source }
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;
|
|
```
|
|
|
|
## Prebuilt Models
|
|
|
|
| Model ID | Description |
|
|
|----------|-------------|
|
|
| `prebuilt-read` | OCR - text and language extraction |
|
|
| `prebuilt-layout` | Text, tables, selection marks, structure |
|
|
| `prebuilt-invoice` | Invoice fields |
|
|
| `prebuilt-receipt` | Receipt fields |
|
|
| `prebuilt-idDocument` | ID document fields |
|
|
| `prebuilt-tax.us.w2` | W-2 tax form fields |
|
|
| `prebuilt-healthInsuranceCard.us` | Health insurance card fields |
|
|
| `prebuilt-contract` | Contract fields |
|
|
| `prebuilt-bankStatement.us` | Bank statement fields |
|
|
|
|
## Extract Invoice Fields
|
|
|
|
```typescript
|
|
const initialResponse = await client
|
|
.path("/documentModels/{modelId}:analyze", "prebuilt-invoice")
|
|
.post({
|
|
contentType: "application/json",
|
|
body: { urlSource: invoiceUrl }
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;
|
|
|
|
const invoice = result.analyzeResult?.documents?.[0];
|
|
if (invoice) {
|
|
console.log("Vendor:", invoice.fields?.VendorName?.content);
|
|
console.log("Total:", invoice.fields?.InvoiceTotal?.content);
|
|
console.log("Due Date:", invoice.fields?.DueDate?.content);
|
|
}
|
|
```
|
|
|
|
## Extract Receipt Fields
|
|
|
|
```typescript
|
|
const initialResponse = await client
|
|
.path("/documentModels/{modelId}:analyze", "prebuilt-receipt")
|
|
.post({
|
|
contentType: "application/json",
|
|
body: { urlSource: receiptUrl }
|
|
});
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;
|
|
|
|
const receipt = result.analyzeResult?.documents?.[0];
|
|
if (receipt) {
|
|
console.log("Merchant:", receipt.fields?.MerchantName?.content);
|
|
console.log("Total:", receipt.fields?.Total?.content);
|
|
|
|
for (const item of receipt.fields?.Items?.values || []) {
|
|
console.log("Item:", item.properties?.Description?.content);
|
|
console.log("Price:", item.properties?.TotalPrice?.content);
|
|
}
|
|
}
|
|
```
|
|
|
|
## List Document Models
|
|
|
|
```typescript
|
|
import DocumentIntelligence, { isUnexpected, paginate } from "@azure-rest/ai-document-intelligence";
|
|
|
|
const response = await client.path("/documentModels").get();
|
|
|
|
if (isUnexpected(response)) {
|
|
throw response.body.error;
|
|
}
|
|
|
|
for await (const model of paginate(client, response)) {
|
|
console.log(model.modelId);
|
|
}
|
|
```
|
|
|
|
## Build Custom Model
|
|
|
|
```typescript
|
|
const initialResponse = await client.path("/documentModels:build").post({
|
|
body: {
|
|
modelId: "my-custom-model",
|
|
description: "Custom model for purchase orders",
|
|
buildMode: "template", // or "neural"
|
|
azureBlobSource: {
|
|
containerUrl: process.env.TRAINING_CONTAINER_SAS_URL!,
|
|
prefix: "training-data/"
|
|
}
|
|
}
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = await poller.pollUntilDone();
|
|
console.log("Model built:", result.body);
|
|
```
|
|
|
|
## Build Document Classifier
|
|
|
|
```typescript
|
|
import { DocumentClassifierBuildOperationDetailsOutput } from "@azure-rest/ai-document-intelligence";
|
|
|
|
const containerSasUrl = process.env.TRAINING_CONTAINER_SAS_URL!;
|
|
|
|
const initialResponse = await client.path("/documentClassifiers:build").post({
|
|
body: {
|
|
classifierId: "my-classifier",
|
|
description: "Invoice vs Receipt classifier",
|
|
docTypes: {
|
|
invoices: {
|
|
azureBlobSource: { containerUrl: containerSasUrl, prefix: "invoices/" }
|
|
},
|
|
receipts: {
|
|
azureBlobSource: { containerUrl: containerSasUrl, prefix: "receipts/" }
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = (await poller.pollUntilDone()).body as DocumentClassifierBuildOperationDetailsOutput;
|
|
console.log("Classifier:", result.result?.classifierId);
|
|
```
|
|
|
|
## Classify Document
|
|
|
|
```typescript
|
|
const initialResponse = await client
|
|
.path("/documentClassifiers/{classifierId}:analyze", "my-classifier")
|
|
.post({
|
|
contentType: "application/json",
|
|
body: { urlSource: documentUrl },
|
|
queryParameters: { split: "auto" }
|
|
});
|
|
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
const result = await poller.pollUntilDone();
|
|
console.log("Classification:", result.body.analyzeResult?.documents);
|
|
```
|
|
|
|
## Get Service Info
|
|
|
|
```typescript
|
|
const response = await client.path("/info").get();
|
|
|
|
if (isUnexpected(response)) {
|
|
throw response.body.error;
|
|
}
|
|
|
|
console.log("Custom model limit:", response.body.customDocumentModels.limit);
|
|
console.log("Custom model count:", response.body.customDocumentModels.count);
|
|
```
|
|
|
|
## Polling Pattern
|
|
|
|
```typescript
|
|
import DocumentIntelligence, {
|
|
isUnexpected,
|
|
getLongRunningPoller,
|
|
AnalyzeOperationOutput
|
|
} from "@azure-rest/ai-document-intelligence";
|
|
|
|
// 1. Start operation
|
|
const initialResponse = await client
|
|
.path("/documentModels/{modelId}:analyze", "prebuilt-layout")
|
|
.post({ contentType: "application/json", body: { urlSource } });
|
|
|
|
// 2. Check for errors
|
|
if (isUnexpected(initialResponse)) {
|
|
throw initialResponse.body.error;
|
|
}
|
|
|
|
// 3. Create poller
|
|
const poller = getLongRunningPoller(client, initialResponse);
|
|
|
|
// 4. Optional: Monitor progress
|
|
poller.onProgress((state) => {
|
|
console.log("Status:", state.status);
|
|
});
|
|
|
|
// 5. Wait for completion
|
|
const result = (await poller.pollUntilDone()).body as AnalyzeOperationOutput;
|
|
```
|
|
|
|
## Key Types
|
|
|
|
```typescript
|
|
import DocumentIntelligence, {
|
|
isUnexpected,
|
|
getLongRunningPoller,
|
|
paginate,
|
|
parseResultIdFromResponse,
|
|
AnalyzeOperationOutput,
|
|
DocumentClassifierBuildOperationDetailsOutput
|
|
} from "@azure-rest/ai-document-intelligence";
|
|
```
|
|
|
|
## Best Practices
|
|
|
|
1. **Use getLongRunningPoller()** - Document analysis is async, always poll for results
|
|
2. **Check isUnexpected()** - Type guard for proper error handling
|
|
3. **Choose the right model** - Use prebuilt models when possible, custom for specialized docs
|
|
4. **Handle confidence scores** - Fields have confidence values, set thresholds for your use case
|
|
5. **Use pagination** - Use `paginate()` helper for listing models
|
|
6. **Prefer neural mode** - For custom models, neural handles more variation than template
|
|
|
|
## When to Use
|
|
This skill is applicable to execute the workflow or actions described in the overview.
|