Repository

Model repository management

Functions

Function	Description
list_models()	List all cached model IDs (both Talu-local and HuggingFace).
list_files()	List files in a model repository.
resolve_path()	Resolve a model URI to a local filesystem path.
cache_path()	Get local cache path for a model.
is_cached()	Check if a model is in local cache.
size()	Get size of cached models in bytes.
delete()	Delete a model from local cache.
clear()	Delete all models from local cache.
fetch()	Fetch a model from remote to local cache.
fetch_file()	Fetch a single file from a model repository.
search()	Search for models on the remote source.
is_model_id()	Check if a string looks like a model ID.
cache_dir()	Get the cache directory path.

def list_models

`talu.repository.list_models() → Iterator[str]`

List all cached model IDs (both Talu-local and HuggingFace).

Talu-local models are yielded first, then HuggingFace cached models.

Yields

str Model IDs in the local cache (e.g., "Qwen/Qwen3-0.6B").

Example

>>> from talu.repository import list_models, size
>>> models = list(list_models())
>>> for model_id in models:
...     size_mb = size(model_id) / 1e6
...     print(f"{model_id}: {size_mb:.1f} MB")  # doctest: +SKIP

def list_files

`talu.repository.list_files(ref: str, token: str | None = None) → Iterator[str]`

List files in a model repository.

Parameters

ref : str

Repository identifier. Can be:

Model ID string: "Qwen/Qwen3-0.6B" - Lists files from remote hub or local cache.
Local path: "./models/my-model" or "/abs/path/to/model" - Lists files in that directory.

token : str, optional

API token for private models. Falls back to HF_TOKEN env var.

Yields

str Filenames in the model repository.

Example

>>> from talu.repository import list_files
>>> files = list(list_files("Qwen/Qwen3-0.6B"))  # doctest: +SKIP
>>> "config.json" in files  # doctest: +SKIP
True

def resolve_path

`talu.repository.resolve_path( uri: str, offline: bool = False, token: str | None = None, endpoint_url: str | None = None ) → str`

Resolve a model URI to a local filesystem path.

Parameters

uri : str: Model URI or identifier (e.g., "Qwen/Qwen3-0.6B").
offline : bool: If True, do not use network; requires cached/local availability.
token : str, optional: API token for private models. Falls back to HF_TOKEN env var.
endpoint_url : str, optional: Custom HuggingFace endpoint URL (overrides HF_ENDPOINT env var).

Returns

str Resolved local filesystem path.

Raises

IOError: If the path cannot be resolved.

Example

>>> from talu.repository import resolve_path
>>> path = resolve_path("Qwen/Qwen3-0.6B")  # doctest: +SKIP

def cache_path

`talu.repository.cache_path(model_id: str) → str | None`

Get local cache path for a model.

This is a cache lookup only - does NOT fetch missing models.

Parameters

model_id : str: Model ID (e.g., "Qwen/Qwen3-0.6B").

Returns

str or None Path to cached model directory, or None if not cached.

Example

>>> from talu.repository import cache_path
>>> path = cache_path("Qwen/Qwen3-0.6B")
>>> if path:
...     print("Model is cached")  # doctest: +SKIP

def is_cached

`talu.repository.is_cached(model_id: str) → bool`

Check if a model is in local cache.

This is an explicit cache-only check. No network requests are made.

Parameters

model_id : str: Model ID.

Returns

bool True if model is cached locally with valid weights.

Example

>>> from talu.repository import is_cached, cache_path
>>> if is_cached("Qwen/Qwen3-0.6B"):
...     path = cache_path("Qwen/Qwen3-0.6B")  # doctest: +SKIP

def size

`talu.repository.size(model_id: str | None = None) → int`

Get size of cached models in bytes.

Parameters

model_id : str, optional: Specific model to check. If None, returns total cache size.

Returns

int Size in bytes.

Example

>>> from talu.repository import size
>>> total = size()  # Total cache size
>>> model_size = size("Qwen/Qwen3-0.6B")  # doctest: +SKIP

def delete

`talu.repository.delete(model_id: str) → bool`

Delete a model from local cache.

Parameters

model_id : str: Model ID.

Returns

bool True if model was deleted, False if not cached.

Example

>>> from talu.repository import delete
>>> deleted = delete("Qwen/Qwen3-0.6B")  # doctest: +SKIP

def clear

`talu.repository.clear() → int`

Delete all models from local cache.

Returns

int Number of models deleted.

Example

>>> from talu.repository import clear
>>> count = clear()  # Deletes all cached models  # doctest: +SKIP

def fetch

`talu.repository.fetch( model_id: str, force: bool = False, on_progress: Callable[[int, int, str], None] | None = None, token: str | None = None, endpoint_url: str | None = None ) → str | None`

Fetch a model from remote to local cache.

Downloads the model if not already cached (or if force=True).

Parameters

model_id : str: Model ID (e.g., "Qwen/Qwen3-0.6B").
force : bool, optional: Force re-fetch even if cached. Default False.
on_progress : callable, optional: Progress callback: fn(downloaded_bytes, total_bytes, filename).
token : str, optional: API token for private models. Falls back to HF_TOKEN env var.
endpoint_url : str, optional: Custom HuggingFace endpoint URL (overrides HF_ENDPOINT env var).

Returns

str or None Path to fetched model, or None on error.

Example

>>> from talu.repository import fetch
>>> path = fetch("Qwen/Qwen3-0.6B")  # doctest: +SKIP

def fetch_file

`talu.repository.fetch_file( model_id: str, filename: str, force: bool = False, token: str | None = None, endpoint_url: str | None = None ) → str | None`

Fetch a single file from a model repository.

Downloads one file (e.g., "config.json") without fetching the full model weights.

Parameters

model_id : str: Model ID (e.g., "Qwen/Qwen3-0.6B").
filename : str: Name of file to fetch (e.g., "config.json").
force : bool, optional: Force re-download even if cached. Default False.
token : str, optional: API token for private models. Falls back to HF_TOKEN env var.
endpoint_url : str, optional: Custom HuggingFace endpoint URL.

Returns

str or None Path to fetched file, or None on error.

Example

>>> from talu.repository import fetch_file
>>> path = fetch_file("Qwen/Qwen3-0.6B", "config.json")  # doctest: +SKIP

def search

`talu.repository.search( query: str, limit: int = 10, token: str | None = None, endpoint_url: str | None = None ) → Iterator[str]`

Search for models on the remote source.

Searches for text-generation models matching the query.

Parameters

query : str: Search query (e.g., "qwen", "llama").
limit : int, optional: Maximum number of results. Default 10.
token : str, optional: API token. Falls back to HF_TOKEN env var.
endpoint_url : str, optional: Custom HuggingFace endpoint URL (overrides HF_ENDPOINT env var).

Yields

str Model IDs matching the search query.

Example

>>> from talu.repository import search
>>> results = list(search("qwen", limit=5))  # doctest: +SKIP

def is_model_id

`talu.repository.is_model_id(path: str) → bool`

Check if a string looks like a model ID.

Parameters

path : str: String to check.

Returns

bool True if it looks like "org/model" format.

Example

>>> from talu.repository import is_model_id
>>> is_model_id("Qwen/Qwen3-0.6B")
True
>>> is_model_id("/path/to/model")
False

def cache_dir

`talu.repository.cache_dir() → str`

Get the cache directory path.

Returns

str Path to the hub cache (e.g., ~/.cache/huggingface/hub).

Raises

IOError: If the cache home directory cannot be determined.

Repository

Functions

def list_models

talu.repository.list_models() → Iterator[str]

Yields

Example

def list_files

talu.repository.list_files(ref: str, token: str | None = None) → Iterator[str]

Parameters

Yields

Example

def resolve_path

talu.repository.resolve_path( uri: str, offline: bool = False, token: str | None = None, endpoint_url: str | None = None) → str

Parameters

Returns

Raises

Example

def cache_path

talu.repository.cache_path(model_id: str) → str | None

Parameters

Returns

Example

def is_cached

talu.repository.is_cached(model_id: str) → bool

Parameters

Returns

Example

def size

talu.repository.size(model_id: str | None = None) → int

Parameters

Returns

Example

def delete

talu.repository.delete(model_id: str) → bool

Parameters

Returns

Example

def clear

talu.repository.clear() → int

Returns

Example

def fetch

talu.repository.fetch( model_id: str, force: bool = False, on_progress: Callable[[int, int, str], None] | None = None, token: str | None = None, endpoint_url: str | None = None) → str | None

Parameters

Returns

Example

def fetch_file

talu.repository.fetch_file( model_id: str, filename: str, force: bool = False, token: str | None = None, endpoint_url: str | None = None) → str | None

Parameters

Returns

Example

def search

talu.repository.search( query: str, limit: int = 10, token: str | None = None, endpoint_url: str | None = None) → Iterator[str]

Parameters

Yields

Example

def is_model_id

talu.repository.is_model_id(path: str) → bool

Parameters

Returns

Example

def cache_dir

talu.repository.cache_dir() → str

Returns

Raises

`talu.repository.list_models() → Iterator[str]`

`talu.repository.list_files(ref: str, token: str | None = None) → Iterator[str]`

`talu.repository.resolve_path( uri: str, offline: bool = False, token: str | None = None, endpoint_url: str | None = None ) → str`

`talu.repository.cache_path(model_id: str) → str | None`

`talu.repository.is_cached(model_id: str) → bool`

`talu.repository.size(model_id: str | None = None) → int`

`talu.repository.delete(model_id: str) → bool`

`talu.repository.clear() → int`

`talu.repository.fetch( model_id: str, force: bool = False, on_progress: Callable[[int, int, str], None] | None = None, token: str | None = None, endpoint_url: str | None = None ) → str | None`

`talu.repository.fetch_file( model_id: str, filename: str, force: bool = False, token: str | None = None, endpoint_url: str | None = None ) → str | None`

`talu.repository.search( query: str, limit: int = 10, token: str | None = None, endpoint_url: str | None = None ) → Iterator[str]`

`talu.repository.is_model_id(path: str) → bool`

`talu.repository.cache_dir() → str`