initial commit

This commit is contained in:
2026-01-18 22:01:50 +01:00
parent ab25613358
commit c40874d7f0
20 changed files with 6287 additions and 11 deletions

638
scripts/hf-llm-install.py Executable file
View File

@@ -0,0 +1,638 @@
#!/usr/bin/env python3
"""
HuggingFace LLM Installer for Ollama
Automatically downloads GGUF files from HuggingFace and creates Ollama models.
Features:
- SHA256 checksum verification
- Disk space checking
- Dry run mode
- Parallel processing
- Skip existing models
"""
import argparse
import hashlib
import json
import os
import re
import shutil
import subprocess
import sys
import tempfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import urlparse
import urllib.request
def parse_model_name_from_gguf(gguf_filename):
"""
Parse model name and tag from GGUF filename.
Args:
gguf_filename: Name of the GGUF file
Returns:
Tuple of (model_base, tag, full_name) or (filename, 'latest', filename) if parsing fails
"""
filename_stem = Path(gguf_filename).stem.lower()
# Split on hyphens
parts = filename_stem.split('-')
if len(parts) >= 3:
# Find where the size variant starts (e.g., "0.5b", "3b", "8b", "14b")
base_parts = []
tag_parts = []
found_variant = False
for part in parts:
# Check if this looks like a size variant (e.g., "3b", "8b", "0.5b")
if not found_variant and re.match(r'^\d+(\.\d+)?b$', part):
found_variant = True
tag_parts.append(part)
elif found_variant:
# Include everything after the variant (including quantization)
tag_parts.append(part)
else:
# Before the variant = base name
base_parts.append(part)
if base_parts and tag_parts:
model_base = '-'.join(base_parts)
model_tag = '-'.join(tag_parts)
full_name = f"{model_base}:{model_tag}"
return (model_base, model_tag, full_name)
# Fallback to filename without extension
return (filename_stem, 'latest', filename_stem)
def parse_modelfile(modelfile_path):
"""
Parse a Modelfile to extract HuggingFace upstream URL and model info.
Args:
modelfile_path: Path to the .Modelfile
Returns:
dict with model metadata or None if invalid
"""
with open(modelfile_path, 'r') as f:
content = f.read()
# Look for hf_upstream in the header comments
hf_match = re.search(r'#\s*hf_upstream:\s*(https://huggingface\.co/[^\s]+)', content)
if not hf_match:
return None
hf_url = hf_match.group(1)
# Look for optional quantization specification (default: q4_k_m)
quant_match = re.search(r'#\s*quantization:\s*([a-zA-Z0-9_]+)', content)
quantization = quant_match.group(1).upper() if quant_match else 'Q4_K_M'
# Look for optional SHA256 checksum
sha256_match = re.search(r'#\s*sha256:\s*([a-fA-F0-9]{64})', content)
sha256 = sha256_match.group(1) if sha256_match else None
# Look for optional capabilities (comma-separated list)
# Format: # capabilities: tools, vision
capabilities_match = re.search(r'#\s*capabilities:\s*([^\n]+)', content)
capabilities = None
if capabilities_match:
# Parse comma-separated capabilities and clean whitespace
caps_str = capabilities_match.group(1).strip()
capabilities = [cap.strip() for cap in caps_str.split(',') if cap.strip()]
# Check if URL points to a specific GGUF file or just the repo
if hf_url.endswith('.gguf') or '/blob/' in hf_url or '/resolve/' in hf_url:
# Specific file provided - use as-is
resolve_url = hf_url.replace('/blob/', '/resolve/')
gguf_filename = os.path.basename(urlparse(resolve_url).path)
else:
# Repository root provided - construct filename from repo name and quantization
# URL format: https://huggingface.co/{org}/{repo}
url_parts = urlparse(hf_url).path.strip('/').split('/')
if len(url_parts) >= 2:
repo_name = url_parts[1] # e.g., "Ministral-3-3B-Instruct-2512-GGUF"
# Remove -GGUF suffix if present (case-insensitive)
if repo_name.upper().endswith('-GGUF'):
repo_name = repo_name[:-5]
# Construct filename: RepoName-Quantization.gguf
gguf_filename = f"{repo_name}-{quantization}.gguf"
resolve_url = f"{hf_url.rstrip('/')}/resolve/main/{gguf_filename}"
else:
print(f"✗ Invalid HuggingFace URL format: {hf_url}")
return None
# Extract model name and tag from the GGUF filename
# Format: Model-Version-Variant-Year-Quant.gguf -> model:version-variant-year-quant
# Example: Ministral-3-3B-Instruct-2512-Q5_K_M.gguf -> ministral-3:3b-instruct-2512-q5_k_m
model_base, model_tag, model_name = parse_model_name_from_gguf(gguf_filename)
return {
'hf_url': hf_url,
'resolve_url': resolve_url,
'gguf_filename': gguf_filename,
'model_name': model_name,
'modelfile_path': modelfile_path,
'sha256': sha256,
'capabilities': capabilities
}
def get_file_size(url):
"""
Get the size of a file from URL without downloading it.
Args:
url: File URL
Returns:
Size in bytes or None if unavailable
"""
try:
req = urllib.request.Request(url, method='HEAD')
with urllib.request.urlopen(req, timeout=10) as response:
size = response.headers.get('Content-Length')
return int(size) if size else None
except Exception:
return None
def check_disk_space(required_bytes, path='.'):
"""
Check if there's enough disk space available.
Args:
required_bytes: Required space in bytes
path: Path to check space on (default: current directory)
Returns:
Tuple of (has_space, available_bytes, required_bytes)
"""
# Get absolute path to check actual filesystem
abs_path = os.path.abspath(path)
stat = shutil.disk_usage(abs_path)
# Add 10% safety margin
required_with_margin = int(required_bytes * 1.1)
return (stat.free >= required_with_margin, stat.free, required_with_margin)
def calculate_sha256(filepath, chunk_size=8192):
"""
Calculate SHA256 checksum of a file.
Args:
filepath: Path to file
chunk_size: Bytes to read at once
Returns:
SHA256 hex digest
"""
sha256_hash = hashlib.sha256()
with open(filepath, 'rb') as f:
for chunk in iter(lambda: f.read(chunk_size), b''):
sha256_hash.update(chunk)
return sha256_hash.hexdigest()
def verify_checksum(filepath, expected_sha256):
"""
Verify file checksum matches expected value.
Args:
filepath: Path to file
expected_sha256: Expected SHA256 hash
Returns:
True if match, False otherwise
"""
print(f" Verifying checksum...")
actual = calculate_sha256(filepath)
if actual.lower() == expected_sha256.lower():
print(f" ✓ Checksum verified: {actual[:16]}...")
return True
else:
print(f" ✗ Checksum mismatch!")
print(f" Expected: {expected_sha256}")
print(f" Actual: {actual}")
return False
def get_existing_models():
"""
Get list of existing Ollama models.
Returns:
Set of model names
"""
try:
result = subprocess.run(
['ollama', 'list'],
capture_output=True,
text=True,
check=True
)
# Parse output to get model names
# Format: NAME ID SIZE MODIFIED
models = set()
for line in result.stdout.strip().split('\n')[1:]: # Skip header
if line.strip():
# Get first column (name)
name = line.split()[0]
# Remove tag if present
base_name = name.split(':')[0]
models.add(base_name)
return models
except (subprocess.CalledProcessError, FileNotFoundError):
return set()
def download_file(url, dest_path, filename, should_cancel=None, progress_callback=None):
"""
Download a file from URL to destination with progress indication.
Args:
url: Source URL
dest_path: Destination file path
filename: Name for display purposes
should_cancel: Optional callback function that returns True if download should be cancelled
progress_callback: Optional callback function to report progress messages
"""
def log(msg):
"""Helper to print and optionally call progress callback."""
print(msg)
if progress_callback:
progress_callback(msg)
log(f"Downloading {filename}...")
log(f" From: {url}")
log(f" To: {dest_path}")
def show_progress(block_num, block_size, total_size):
# Check for cancellation
if should_cancel and should_cancel():
raise InterruptedError("Download cancelled")
downloaded = block_num * block_size
if total_size > 0:
percent = min(100, downloaded * 100 / total_size)
mb_downloaded = downloaded / (1024 * 1024)
mb_total = total_size / (1024 * 1024)
msg = f"\r Progress: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)"
print(msg, end='')
if progress_callback:
progress_callback(f"Progress: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)")
try:
urllib.request.urlretrieve(url, dest_path, show_progress)
print() # New line after progress
log(f"✓ Download complete")
except Exception as e:
print(f"\n✗ Download failed: {e}")
if progress_callback:
progress_callback(f"✗ Download failed: {e}")
raise
def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None):
"""
Create an Ollama model from the Modelfile and GGUF file.
Args:
modelfile_path: Path to the .Modelfile
gguf_path: Path to the downloaded GGUF file
model_name: Name for the Ollama model
capabilities: Optional list of capabilities to add (e.g., ['tools', 'vision'])
"""
print(f"\nCreating Ollama model: {model_name}")
# Note: Capabilities are detected from the GGUF file metadata by Ollama automatically
if capabilities:
print(f" Expected capabilities from GGUF metadata: {', '.join(capabilities)}")
# Read the Modelfile and update the FROM path to point to the downloaded GGUF
with open(modelfile_path, 'r') as f:
modelfile_content = f.read()
# Replace the FROM line to use the actual GGUF path
# Handle both relative paths like "./filename.gguf" and URLs like "https://..."
original_content = modelfile_content
modelfile_content = re.sub(
r'FROM\s+(?:\./[^\s]+\.gguf|https?://[^\n]+)',
f'FROM {gguf_path}',
modelfile_content
)
# Debug: check if replacement happened
if original_content == modelfile_content:
print(f" WARNING: FROM line was not replaced!")
print(f" Looking for pattern in: {original_content[:200]}")
else:
print(f" ✓ Replaced FROM line with local path: {gguf_path}")
# Create a temporary Modelfile with the correct path
with tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) as tmp_modelfile:
tmp_modelfile.write(modelfile_content)
tmp_modelfile_path = tmp_modelfile.name
try:
# Run ollama create
cmd = ['ollama', 'create', model_name, '-f', tmp_modelfile_path]
print(f" Running: {' '.join(cmd)}")
result = subprocess.run(
cmd,
capture_output=True,
text=True
)
if result.returncode == 0:
print(f"✓ Model '{model_name}' created successfully")
if result.stdout:
print(f" {result.stdout.strip()}")
else:
print(f"✗ Failed to create model")
if result.stderr:
print(f" Error: {result.stderr.strip()}")
raise subprocess.CalledProcessError(result.returncode, cmd)
finally:
# Clean up temporary Modelfile
os.unlink(tmp_modelfile_path)
def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_models=None, should_cancel=None, progress_callback=None):
"""
Install a single model from a Modelfile.
Args:
modelfile_path: Path to the .Modelfile
dry_run: If True, only simulate installation
skip_existing: If True, skip models already in Ollama
existing_models: Set of existing model names
should_cancel: Optional callback function that returns True if installation should be cancelled
progress_callback: Optional callback function to report progress messages
Returns:
Tuple of (success: bool, skipped: bool, model_name: str)
"""
def log(msg):
"""Helper to print and optionally call progress callback."""
print(msg)
if progress_callback:
progress_callback(msg)
log(f"\n{'='*80}")
log(f"Processing: {modelfile_path}")
log(f"{'='*80}")
# Parse the Modelfile
model_info = parse_modelfile(modelfile_path)
if not model_info:
log(f"✗ No hf_upstream found in {modelfile_path}")
return (False, False, None)
log(f"Model name: {model_info['model_name']}")
log(f"GGUF file: {model_info['gguf_filename']}")
if model_info['sha256']:
log(f"SHA256: {model_info['sha256'][:16]}...")
if model_info.get('capabilities'):
log(f"Capabilities: {', '.join(model_info['capabilities'])}")
# Check if model already exists
if skip_existing and existing_models and model_info['model_name'] in existing_models:
log(f"⊘ Model '{model_info['model_name']}' already exists, skipping")
return (True, True, model_info['model_name'])
# Get file size and check disk space
file_size = get_file_size(model_info['resolve_url'])
if file_size:
size_gb = file_size / (1024**3)
log(f"File size: {size_gb:.2f} GB")
if not dry_run:
has_space, available, required = check_disk_space(file_size)
if not has_space:
log(f"✗ Insufficient disk space!")
log(f" Required: {required / (1024**3):.2f} GB (with 10% margin)")
log(f" Available: {available / (1024**3):.2f} GB")
return (False, False, model_info['model_name'])
else:
log(f"✓ Disk space check passed ({available / (1024**3):.2f} GB available)")
if dry_run:
log(f"\n[DRY RUN] Would download and install model: {model_info['model_name']}")
return (True, False, model_info['model_name'])
# Create temporary directory for download
with tempfile.TemporaryDirectory() as tmp_dir:
gguf_path = os.path.join(tmp_dir, model_info['gguf_filename'])
try:
# Download the GGUF file
download_file(model_info['resolve_url'], gguf_path, model_info['gguf_filename'], should_cancel, progress_callback)
# Verify checksum if provided
if model_info['sha256']:
if not verify_checksum(gguf_path, model_info['sha256']):
print(f"✗ Checksum verification failed!")
return (False, False, model_info['model_name'])
# Create the Ollama model
create_ollama_model(
modelfile_path,
gguf_path,
model_info['model_name'],
model_info.get('capabilities')
)
print(f"\n✓ Successfully installed model: {model_info['model_name']}")
return (True, False, model_info['model_name'])
except Exception as e:
print(f"\n✗ Failed to install model: {e}")
return (False, False, model_info['model_name'])
def install_model_wrapper(args):
"""Wrapper for parallel execution."""
return install_model(*args)
def main():
parser = argparse.ArgumentParser(
description='Install Ollama models from HuggingFace using Modelfiles',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Install a single model
%(prog)s path/to/model.Modelfile
# Install all models in the default repo directory
%(prog)s
# Dry run to see what would be installed
%(prog)s --dry-run
# Skip models that already exist
%(prog)s --skip-existing
# Install with 3 parallel downloads
%(prog)s --parallel 3
"""
)
parser.add_argument(
'modelfile',
nargs='?',
help='Path to a specific .Modelfile to install (optional)'
)
parser.add_argument(
'--dir',
default='./modelfile-repo',
help='Directory containing .Modelfile files (default: ./modelfile-repo)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Simulate installation without downloading or creating models'
)
parser.add_argument(
'--skip-existing',
action='store_true',
help='Skip models that already exist in Ollama'
)
parser.add_argument(
'--parallel',
type=int,
default=1,
metavar='N',
help='Number of parallel downloads/installations (default: 1)'
)
args = parser.parse_args()
# Validate parallel argument
if args.parallel < 1:
print("✗ Error: --parallel must be at least 1")
sys.exit(1)
# Check if ollama is available
try:
subprocess.run(['ollama', '--version'], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("✗ Error: 'ollama' command not found. Please install Ollama first.")
print(" Visit: https://ollama.ai")
sys.exit(1)
# Get existing models if skip_existing is enabled
existing_models = None
if args.skip_existing:
existing_models = get_existing_models()
if existing_models:
print(f"Found {len(existing_models)} existing model(s)")
# Determine which Modelfiles to process
if args.modelfile:
# Single file mode
modelfile_path = Path(args.modelfile)
if not modelfile_path.exists():
print(f"✗ Error: File not found: {modelfile_path}")
sys.exit(1)
if not modelfile_path.suffix == '.Modelfile':
print(f"✗ Error: File must have .Modelfile extension")
sys.exit(1)
modelfiles = [modelfile_path]
else:
# Batch mode - process all .Modelfile files in directory
modelfile_dir = Path(args.dir)
if not modelfile_dir.exists():
print(f"✗ Error: Directory not found: {modelfile_dir}")
sys.exit(1)
modelfiles = sorted(modelfile_dir.glob('*.Modelfile'))
if not modelfiles:
print(f"✗ No .Modelfile files found in {modelfile_dir}")
sys.exit(1)
print(f"Found {len(modelfiles)} Modelfile(s) to process")
if args.dry_run:
print("\n*** DRY RUN MODE - No files will be downloaded or models created ***\n")
# Process all Modelfiles
results = []
if args.parallel > 1 and len(modelfiles) > 1:
# Parallel processing
print(f"\nUsing {args.parallel} parallel worker(s)")
with ThreadPoolExecutor(max_workers=args.parallel) as executor:
# Submit all tasks
future_to_modelfile = {
executor.submit(
install_model_wrapper,
(modelfile, args.dry_run, args.skip_existing, existing_models)
): modelfile
for modelfile in modelfiles
}
# Collect results as they complete
for future in as_completed(future_to_modelfile):
modelfile = future_to_modelfile[future]
try:
success, skipped, model_name = future.result()
results.append((modelfile.name, success, skipped))
except Exception as e:
print(f"\n✗ Exception processing {modelfile.name}: {e}")
results.append((modelfile.name, False, False))
else:
# Sequential processing
for modelfile in modelfiles:
success, skipped, model_name = install_model(
modelfile,
args.dry_run,
args.skip_existing,
existing_models
)
results.append((modelfile.name, success, skipped))
# Summary
print(f"\n{'='*80}")
print("INSTALLATION SUMMARY")
print(f"{'='*80}")
successful = sum(1 for _, success, skipped in results if success and not skipped)
skipped = sum(1 for _, success, skip in results if skip)
failed = len(results) - successful - skipped
for name, success, skip in results:
if skip:
status = ""
elif success:
status = ""
else:
status = ""
print(f"{status} {name}")
print(f"\nTotal: {len(results)} | Successful: {successful} | Skipped: {skipped} | Failed: {failed}")
if failed > 0:
sys.exit(1)
if __name__ == '__main__':
main()