Skip to main content
DELETE
/
api
/
corpora
/
{id}

Overview

Permanently delete a corpus and all its associated resources. This operation cascades through all files, strings, URLs, and social media imports, removing storage objects and vector embeddings from Qdrant.
Irreversible Operation: This action cannot be undone. All indexed data, resources, and query history will be permanently deleted.
Alternative: If you want to temporarily disable access, set is_published: false via PATCH instead of deleting.

Authentication

Requires valid JWT token or session authentication. You must own the target corpus.

Path Parameters

id
UUID
required
Identifier of the corpus to delete.Example: 8d0f0a5d-4b5e-4c09-9db6-0e9d2aa8a9fd

Example request

curl -X DELETE https://{your-host}/api/corpora/8d0f0a5d-4b5e-4c09-9db6-0e9d2aa8a9fd/ \
  -H "Authorization: Bearer $SOAR_LABS_TOKEN"

Response Codes

204
No Content
Deletion successful. No response body is returned.
403
Forbidden
The corpus belongs to another user. Cannot delete corpora you don’t own.
404
Not Found
Corpus doesn’t exist or you don’t have access to it.
409
Conflict
Backend failed to remove remote index or storage objects. Retry once and contact support if the issue persists.Possible causes:
  • Vector database connection timeout
  • Storage service unavailable
  • Partial deletion occurred

What Gets Deleted

The delete operation removes:
  • Corpus metadata - Name, description, settings
  • All resources - Files, strings, URLs, social media imports
  • Storage objects - Uploaded files and processed content
  • Vector embeddings - All chunks removed from Qdrant
  • Query history - All past queries and retrievals for this corpus
  • Index data - Vector database collections and metadata
Total data loss: Everything associated with the corpus is permanently deleted across all storage systems.

Safe Deletion Workflow

Verify what will be deleted before proceeding:
# 1. Check corpus details
corpus = requests.get(
    f"{base_url}/api/corpora/{corpus_id}/",
    headers=headers
).json()

print(f"Corpus: {corpus['corpora_name']}")
print(f"Created: {corpus['created_at']}")
print(f"Size: {corpus['size_on_disk'] / (1024*1024):.2f} MB")

# 2. List all resources
files = requests.get(
    f"{base_url}/api/data/files/?corpora={corpus_id}",
    headers=headers
).json()
print(f"Files: {len(files['results'])}")

strings = requests.get(
    f"{base_url}/api/data/strings/?corpora={corpus_id}",
    headers=headers
).json()
print(f"Strings: {len(strings['results'])}")

urls = requests.get(
    f"{base_url}/api/data/urls/?corpora={corpus_id}",
    headers=headers
).json()
print(f"URLs: {len(urls['results'])}")

# 3. Confirm deletion
if input("Delete this corpus? (yes/no): ") == "yes":
    requests.delete(
        f"{base_url}/api/corpora/{corpus_id}/",
        headers=headers
    )
    print("Corpus deleted")
Export data before permanent deletion:
import json
from datetime import datetime

# Get corpus and all resources
corpus = requests.get(
    f"{base_url}/api/corpora/{corpus_id}/",
    headers=headers
).json()

resources = {
    "corpus": corpus,
    "files": requests.get(
        f"{base_url}/api/data/files/?corpora={corpus_id}",
        headers=headers
    ).json()["results"],
    "strings": requests.get(
        f"{base_url}/api/data/strings/?corpora={corpus_id}",
        headers=headers
    ).json()["results"],
    "urls": requests.get(
        f"{base_url}/api/data/urls/?corpora={corpus_id}",
        headers=headers
    ).json()["results"]
}

# Save backup
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_file = f"corpus_backup_{corpus['corpora_name']}_{timestamp}.json"

with open(backup_file, 'w') as f:
    json.dump(resources, f, indent=2)

print(f"Backup saved to {backup_file}")

# Now safe to delete
requests.delete(f"{base_url}/api/corpora/{corpus_id}/", headers=headers)
Instead of deleting, make the corpus private and add an archive marker:
# Mark as archived instead of deleting
requests.patch(
    f"{base_url}/api/corpora/{corpus_id}/",
    headers=headers,
    json={
        "is_published": False,
        "description": f"[ARCHIVED {datetime.now().date()}] {corpus['description']}"
    }
)

# Corpus remains accessible but hidden
# Can be unarchived later by removing the marker
Benefits:
  • Reversible operation
  • Data preserved for future reference
  • Can restore if needed
  • Maintains audit trail
Handle 409 errors gracefully:
import time

def safe_delete_corpus(base_url, headers, corpus_id, max_retries=3):
    """Delete corpus with retry logic for transient failures."""
    for attempt in range(max_retries):
        try:
            response = requests.delete(
                f"{base_url}/api/corpora/{corpus_id}/",
                headers=headers,
                timeout=60
            )

            if response.status_code == 204:
                print("Corpus deleted successfully")
                return True
            elif response.status_code == 409:
                if attempt < max_retries - 1:
                    wait_time = 2 ** attempt  # Exponential backoff
                    print(f"Deletion conflict. Retrying in {wait_time}s...")
                    time.sleep(wait_time)
                else:
                    print("Deletion failed after retries. Contact support.")
                    return False
            else:
                response.raise_for_status()

        except requests.RequestException as e:
            print(f"Error: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                return False

    return False
Delete multiple corpora safely:
corpus_ids = ["id1", "id2", "id3"]
deleted = []
failed = []

for corpus_id in corpus_ids:
    try:
        # Optional: Verify corpus name before deleting
        corpus = requests.get(
            f"{base_url}/api/corpora/{corpus_id}/",
            headers=headers
        ).json()

        print(f"Deleting {corpus['corpora_name']}...")

        response = requests.delete(
            f"{base_url}/api/corpora/{corpus_id}/",
            headers=headers
        )

        if response.status_code == 204:
            deleted.append(corpus_id)
            print(f"✓ Deleted {corpus['corpora_name']}")
        else:
            failed.append((corpus_id, response.status_code))
            print(f"✗ Failed: {response.status_code}")

    except Exception as e:
        failed.append((corpus_id, str(e)))
        print(f"✗ Error: {e}")

print(f"\nSummary: {len(deleted)} deleted, {len(failed)} failed")
Security: Deletion requests for corpora owned by other users return 403 Forbidden without revealing whether the corpus exists.
Production Best Practice: Implement a two-step deletion process: mark for deletion first, then actually delete after a grace period (e.g., 30 days).

Client examples

import os
import requests

BASE_URL = "https://your-soar-instance.com"
TOKEN = os.environ["SOAR_LABS_TOKEN"]
CORPUS_ID = "8d0f0a5d-4b5e-4c09-9db6-0e9d2aa8a9fd"

response = requests.delete(
    f"{BASE_URL}/api/corpora/{CORPUS_ID}/",
    headers={"Authorization": f"Bearer {TOKEN}"},
    timeout=30,
)
if response.status_code != 204:
    response.raise_for_status()

Authorizations

Authorization
string
header
required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Path Parameters

id
string<uuid>
required

A UUID string identifying this Corpora.

Response

204

No response body