#!/usr/bin/env python3 """ HuggingFace LLM Installer for Ollama Automatically downloads GGUF files from HuggingFace and creates Ollama models. Features: - SHA256 checksum verification - Disk space checking - Dry run mode - Parallel processing - Skip existing models """ import argparse import hashlib import json import os import re import shutil import subprocess import sys import tempfile from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from urllib.parse import urlparse import urllib.request def parse_model_name_from_gguf(gguf_filename): """ Parse model name and tag from GGUF filename. Args: gguf_filename: Name of the GGUF file Returns: Tuple of (model_base, tag, full_name) or (filename, 'latest', filename) if parsing fails """ filename_stem = Path(gguf_filename).stem.lower() # Split on hyphens parts = filename_stem.split('-') if len(parts) >= 3: # Find where the size variant starts (e.g., "0.5b", "3b", "8b", "14b") base_parts = [] tag_parts = [] found_variant = False for part in parts: # Check if this looks like a size variant (e.g., "3b", "8b", "0.5b") if not found_variant and re.match(r'^\d+(\.\d+)?b$', part): found_variant = True tag_parts.append(part) elif found_variant: # Include everything after the variant (including quantization) tag_parts.append(part) else: # Before the variant = base name base_parts.append(part) if base_parts and tag_parts: model_base = '-'.join(base_parts) model_tag = '-'.join(tag_parts) full_name = f"{model_base}:{model_tag}" return (model_base, model_tag, full_name) # Fallback to filename without extension return (filename_stem, 'latest', filename_stem) def parse_modelfile(modelfile_path): """ Parse a Modelfile to extract HuggingFace upstream URL and model info. Args: modelfile_path: Path to the .Modelfile Returns: dict with model metadata or None if invalid """ with open(modelfile_path, 'r') as f: content = f.read() # Look for hf_upstream in the header comments hf_match = re.search(r'#\s*hf_upstream:\s*(https://huggingface\.co/[^\s]+)', content) if not hf_match: return None hf_url = hf_match.group(1) # Look for optional quantization specification (default: q4_k_m) quant_match = re.search(r'#\s*quantization:\s*([a-zA-Z0-9_]+)', content) quantization = quant_match.group(1).upper() if quant_match else 'Q4_K_M' # Look for optional SHA256 checksum sha256_match = re.search(r'#\s*sha256:\s*([a-fA-F0-9]{64})', content) sha256 = sha256_match.group(1) if sha256_match else None # Look for optional capabilities (comma-separated list) # Format: # capabilities: tools, vision capabilities_match = re.search(r'#\s*capabilities:\s*([^\n]+)', content) capabilities = None if capabilities_match: # Parse comma-separated capabilities and clean whitespace caps_str = capabilities_match.group(1).strip() capabilities = [cap.strip() for cap in caps_str.split(',') if cap.strip()] # Look for optional mmproj (multimodal projection) configuration # Format: # mmproj_url: https://huggingface.co/org/repo mmproj_url_match = re.search(r'#\s*mmproj_url:\s*(https://huggingface\.co/[^\s]+)', content) mmproj_url = mmproj_url_match.group(1) if mmproj_url_match else None # Format: # mmproj_quant: BF16 (or F16, F32) mmproj_quant_match = re.search(r'#\s*mmproj_quant:\s*([a-zA-Z0-9_]+)', content) mmproj_quant = mmproj_quant_match.group(1) if mmproj_quant_match else 'BF16' # Default to BF16 # Format: # mmproj_sha256: mmproj_sha256_match = re.search(r'#\s*mmproj_sha256:\s*([a-fA-F0-9]{64})', content) mmproj_sha256 = mmproj_sha256_match.group(1) if mmproj_sha256_match else None # Check if URL points to a specific GGUF file or just the repo if hf_url.endswith('.gguf') or '/blob/' in hf_url or '/resolve/' in hf_url: # Specific file provided - use as-is resolve_url = hf_url.replace('/blob/', '/resolve/') gguf_filename = os.path.basename(urlparse(resolve_url).path) else: # Repository root provided - construct filename from repo name and quantization # URL format: https://huggingface.co/{org}/{repo} url_parts = urlparse(hf_url).path.strip('/').split('/') if len(url_parts) >= 2: repo_name = url_parts[1] # e.g., "Ministral-3-3B-Instruct-2512-GGUF" # Remove -GGUF suffix if present (case-insensitive) if repo_name.upper().endswith('-GGUF'): repo_name = repo_name[:-5] # Construct filename: RepoName-Quantization.gguf gguf_filename = f"{repo_name}-{quantization}.gguf" resolve_url = f"{hf_url.rstrip('/')}/resolve/main/{gguf_filename}" else: print(f"✗ Invalid HuggingFace URL format: {hf_url}") return None # Extract model name and tag from the GGUF filename # Format: Model-Version-Variant-Year-Quant.gguf -> model:version-variant-year-quant # Example: Ministral-3-3B-Instruct-2512-Q5_K_M.gguf -> ministral-3:3b-instruct-2512-q5_k_m model_base, model_tag, model_name = parse_model_name_from_gguf(gguf_filename) # Construct mmproj info if mmproj_url is provided mmproj_info = None if mmproj_url: # Determine mmproj filename based on URL pattern if mmproj_url.endswith('.gguf') or '/blob/' in mmproj_url or '/resolve/' in mmproj_url: # Specific file provided mmproj_resolve_url = mmproj_url.replace('/blob/', '/resolve/') mmproj_filename = os.path.basename(urlparse(mmproj_resolve_url).path) else: # Repository root - construct filename # Two common patterns: # 1. mmproj-BF16.gguf (unsloth pattern) # 2. ModelName-BF16-mmproj.gguf (mistralai pattern) # Try to detect which pattern by checking the URL url_parts = urlparse(mmproj_url).path.strip('/').split('/') if len(url_parts) >= 2: repo_org = url_parts[0] if repo_org == 'unsloth': # unsloth pattern: mmproj-{QUANT}.gguf mmproj_filename = f"mmproj-{mmproj_quant}.gguf" else: # mistralai/others pattern: extract base name from main repo repo_name = url_parts[1] if repo_name.upper().endswith('-GGUF'): repo_name = repo_name[:-5] mmproj_filename = f"{repo_name}-{mmproj_quant}-mmproj.gguf" mmproj_resolve_url = f"{mmproj_url.rstrip('/')}/resolve/main/{mmproj_filename}" else: print(f"✗ Invalid mmproj URL format: {mmproj_url}") mmproj_resolve_url = None mmproj_filename = None if mmproj_resolve_url and mmproj_filename: mmproj_info = { 'url': mmproj_url, 'resolve_url': mmproj_resolve_url, 'filename': mmproj_filename, 'sha256': mmproj_sha256 } return { 'hf_url': hf_url, 'resolve_url': resolve_url, 'gguf_filename': gguf_filename, 'model_name': model_name, 'modelfile_path': modelfile_path, 'sha256': sha256, 'capabilities': capabilities, 'mmproj': mmproj_info } def get_file_size(url): """ Get the size of a file from URL without downloading it. Args: url: File URL Returns: Size in bytes or None if unavailable """ try: req = urllib.request.Request(url, method='HEAD') with urllib.request.urlopen(req, timeout=10) as response: size = response.headers.get('Content-Length') return int(size) if size else None except Exception: return None def check_disk_space(required_bytes, path='.'): """ Check if there's enough disk space available. Args: required_bytes: Required space in bytes path: Path to check space on (default: current directory) Returns: Tuple of (has_space, available_bytes, required_bytes) """ # Get absolute path to check actual filesystem abs_path = os.path.abspath(path) stat = shutil.disk_usage(abs_path) # Add 10% safety margin required_with_margin = int(required_bytes * 1.1) return (stat.free >= required_with_margin, stat.free, required_with_margin) def calculate_sha256(filepath, chunk_size=8192): """ Calculate SHA256 checksum of a file. Args: filepath: Path to file chunk_size: Bytes to read at once Returns: SHA256 hex digest """ sha256_hash = hashlib.sha256() with open(filepath, 'rb') as f: for chunk in iter(lambda: f.read(chunk_size), b''): sha256_hash.update(chunk) return sha256_hash.hexdigest() def verify_checksum(filepath, expected_sha256): """ Verify file checksum matches expected value. Args: filepath: Path to file expected_sha256: Expected SHA256 hash Returns: True if match, False otherwise """ print(f" Verifying checksum...") actual = calculate_sha256(filepath) if actual.lower() == expected_sha256.lower(): print(f" ✓ Checksum verified: {actual[:16]}...") return True else: print(f" ✗ Checksum mismatch!") print(f" Expected: {expected_sha256}") print(f" Actual: {actual}") return False def get_existing_models(): """ Get list of existing Ollama models. Returns: Set of model names """ try: result = subprocess.run( ['ollama', 'list'], capture_output=True, text=True, check=True ) # Parse output to get model names # Format: NAME ID SIZE MODIFIED models = set() for line in result.stdout.strip().split('\n')[1:]: # Skip header if line.strip(): # Get first column (name) name = line.split()[0] # Remove tag if present base_name = name.split(':')[0] models.add(base_name) return models except (subprocess.CalledProcessError, FileNotFoundError): return set() def download_file(url, dest_path, filename, should_cancel=None, progress_callback=None): """ Download a file from URL to destination with progress indication. Args: url: Source URL dest_path: Destination file path filename: Name for display purposes should_cancel: Optional callback function that returns True if download should be cancelled progress_callback: Optional callback function to report progress messages """ def log(msg): """Helper to print and optionally call progress callback.""" print(msg) if progress_callback: progress_callback(msg) log(f"Downloading {filename}...") log(f" From: {url}") log(f" To: {dest_path}") def show_progress(block_num, block_size, total_size): # Check for cancellation if should_cancel and should_cancel(): raise InterruptedError("Download cancelled") downloaded = block_num * block_size if total_size > 0: percent = min(100, downloaded * 100 / total_size) mb_downloaded = downloaded / (1024 * 1024) mb_total = total_size / (1024 * 1024) msg = f"\r Progress: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)" print(msg, end='') if progress_callback: progress_callback(f"Progress: {percent:.1f}% ({mb_downloaded:.1f}/{mb_total:.1f} MB)") try: urllib.request.urlretrieve(url, dest_path, show_progress) print() # New line after progress log(f"✓ Download complete") except Exception as e: print(f"\n✗ Download failed: {e}") if progress_callback: progress_callback(f"✗ Download failed: {e}") raise def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None, mmproj_path=None): """ Create an Ollama model from the Modelfile and GGUF file. Args: modelfile_path: Path to the .Modelfile gguf_path: Path to the downloaded GGUF file model_name: Name for the Ollama model capabilities: Optional list of capabilities to add (e.g., ['tools', 'vision']) mmproj_path: Optional path to the mmproj file for vision models """ print(f"\nCreating Ollama model: {model_name}") # Note: Capabilities are detected from the GGUF file metadata by Ollama automatically if capabilities: print(f" ℹ Expected capabilities from GGUF metadata: {', '.join(capabilities)}") if mmproj_path: print(f" ℹ Including mmproj file for vision support") # Read the Modelfile and update the FROM path to point to the downloaded GGUF with open(modelfile_path, 'r') as f: modelfile_content = f.read() # Replace the FROM line to use the actual GGUF path # Handle both relative paths like "./filename.gguf" and URLs like "https://..." original_content = modelfile_content modelfile_content = re.sub( r'FROM\s+(?:\./[^\s]+\.gguf|https?://[^\n]+)', f'FROM {gguf_path}', modelfile_content ) # Add mmproj FROM line if provided if mmproj_path: # Add the mmproj FROM line after the main model FROM line modelfile_content = modelfile_content.replace( f'FROM {gguf_path}', f'FROM {gguf_path}\nFROM {mmproj_path}' ) # Debug: check if replacement happened if original_content == modelfile_content: print(f" WARNING: FROM line was not replaced!") print(f" Looking for pattern in: {original_content[:200]}") else: print(f" ✓ Replaced FROM line with local path: {gguf_path}") if mmproj_path: print(f" ✓ Added mmproj FROM line: {mmproj_path}") # Create a temporary Modelfile with the correct path with tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) as tmp_modelfile: tmp_modelfile.write(modelfile_content) tmp_modelfile_path = tmp_modelfile.name try: # Run ollama create cmd = ['ollama', 'create', model_name, '-f', tmp_modelfile_path] print(f" Running: {' '.join(cmd)}") result = subprocess.run( cmd, capture_output=True, text=True ) if result.returncode == 0: # Success - output will be shown by the caller if result.stdout: print(result.stdout.strip()) else: print(f"✗ Failed to create model") if result.stderr: print(f" Error: {result.stderr.strip()}") raise subprocess.CalledProcessError(result.returncode, cmd) finally: # Clean up temporary Modelfile os.unlink(tmp_modelfile_path) def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_models=None, should_cancel=None, progress_callback=None): """ Install a single model from a Modelfile. Args: modelfile_path: Path to the .Modelfile dry_run: If True, only simulate installation skip_existing: If True, skip models already in Ollama existing_models: Set of existing model names should_cancel: Optional callback function that returns True if installation should be cancelled progress_callback: Optional callback function to report progress messages Returns: Tuple of (success: bool, skipped: bool, model_name: str) """ def log(msg): """Helper to print and optionally call progress callback.""" print(msg) if progress_callback: progress_callback(msg) log(f"\n{'='*80}") log(f"Processing: {modelfile_path}") log(f"{'='*80}") # Parse the Modelfile model_info = parse_modelfile(modelfile_path) if not model_info: log(f"✗ No hf_upstream found in {modelfile_path}") return (False, False, None) log(f"Model name: {model_info['model_name']}") log(f"GGUF file: {model_info['gguf_filename']}") if model_info['sha256']: log(f"SHA256: {model_info['sha256'][:16]}...") if model_info.get('capabilities'): log(f"Capabilities: {', '.join(model_info['capabilities'])}") if model_info.get('mmproj'): log(f"MMProj file: {model_info['mmproj']['filename']}") if model_info['mmproj']['sha256']: log(f"MMProj SHA256: {model_info['mmproj']['sha256'][:16]}...") # Check if model already exists if skip_existing and existing_models and model_info['model_name'] in existing_models: log(f"⊘ Model '{model_info['model_name']}' already exists, skipping") return (True, True, model_info['model_name']) # Get file size and check disk space file_size = get_file_size(model_info['resolve_url']) mmproj_file_size = None if model_info.get('mmproj'): mmproj_file_size = get_file_size(model_info['mmproj']['resolve_url']) total_size = file_size or 0 if mmproj_file_size: total_size += mmproj_file_size if file_size: size_gb = file_size / (1024**3) log(f"GGUF file size: {size_gb:.2f} GB") if mmproj_file_size: mmproj_size_gb = mmproj_file_size / (1024**3) log(f"MMProj file size: {mmproj_size_gb:.2f} GB") log(f"Total size: {total_size / (1024**3):.2f} GB") file_size = total_size if not dry_run: has_space, available, required = check_disk_space(file_size) if not has_space: log(f"✗ Insufficient disk space!") log(f" Required: {required / (1024**3):.2f} GB (with 10% margin)") log(f" Available: {available / (1024**3):.2f} GB") return (False, False, model_info['model_name']) else: log(f"✓ Disk space check passed ({available / (1024**3):.2f} GB available)") if dry_run: log(f"\n[DRY RUN] Would download and install model: {model_info['model_name']}") return (True, False, model_info['model_name']) # Create temporary directory for download with tempfile.TemporaryDirectory() as tmp_dir: gguf_path = os.path.join(tmp_dir, model_info['gguf_filename']) mmproj_path = None try: # Download the GGUF file download_file(model_info['resolve_url'], gguf_path, model_info['gguf_filename'], should_cancel, progress_callback) # Verify checksum if provided if model_info['sha256']: if not verify_checksum(gguf_path, model_info['sha256']): print(f"✗ Checksum verification failed!") return (False, False, model_info['model_name']) # Download mmproj file if specified if model_info.get('mmproj'): mmproj_path = os.path.join(tmp_dir, model_info['mmproj']['filename']) download_file( model_info['mmproj']['resolve_url'], mmproj_path, model_info['mmproj']['filename'], should_cancel, progress_callback ) # Verify mmproj checksum if provided if model_info['mmproj']['sha256']: if not verify_checksum(mmproj_path, model_info['mmproj']['sha256']): print(f"✗ MMProj checksum verification failed!") return (False, False, model_info['model_name']) # Create the Ollama model create_ollama_model( modelfile_path, gguf_path, model_info['model_name'], model_info.get('capabilities'), mmproj_path ) print(f"\n✓ Successfully installed model: {model_info['model_name']}") return (True, False, model_info['model_name']) except Exception as e: print(f"\n✗ Failed to install model: {e}") return (False, False, model_info['model_name']) def install_model_wrapper(args): """Wrapper for parallel execution.""" return install_model(*args) def main(): parser = argparse.ArgumentParser( description='Install Ollama models from HuggingFace using Modelfiles', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Install a single model %(prog)s path/to/model.Modelfile # Install all models in the default repo directory %(prog)s # Dry run to see what would be installed %(prog)s --dry-run # Skip models that already exist %(prog)s --skip-existing # Install with 3 parallel downloads %(prog)s --parallel 3 """ ) parser.add_argument( 'modelfile', nargs='?', help='Path to a specific .Modelfile to install (optional)' ) parser.add_argument( '--dir', default='./modelfile-repo', help='Directory containing .Modelfile files (default: ./modelfile-repo)' ) parser.add_argument( '--dry-run', action='store_true', help='Simulate installation without downloading or creating models' ) parser.add_argument( '--skip-existing', action='store_true', help='Skip models that already exist in Ollama' ) parser.add_argument( '--parallel', type=int, default=1, metavar='N', help='Number of parallel downloads/installations (default: 1)' ) args = parser.parse_args() # Validate parallel argument if args.parallel < 1: print("✗ Error: --parallel must be at least 1") sys.exit(1) # Check if ollama is available try: subprocess.run(['ollama', '--version'], capture_output=True, check=True) except (subprocess.CalledProcessError, FileNotFoundError): print("✗ Error: 'ollama' command not found. Please install Ollama first.") print(" Visit: https://ollama.ai") sys.exit(1) # Get existing models if skip_existing is enabled existing_models = None if args.skip_existing: existing_models = get_existing_models() if existing_models: print(f"Found {len(existing_models)} existing model(s)") # Determine which Modelfiles to process if args.modelfile: # Single file mode modelfile_path = Path(args.modelfile) if not modelfile_path.exists(): print(f"✗ Error: File not found: {modelfile_path}") sys.exit(1) if not modelfile_path.suffix == '.Modelfile': print(f"✗ Error: File must have .Modelfile extension") sys.exit(1) modelfiles = [modelfile_path] else: # Batch mode - process all .Modelfile files in directory modelfile_dir = Path(args.dir) if not modelfile_dir.exists(): print(f"✗ Error: Directory not found: {modelfile_dir}") sys.exit(1) modelfiles = sorted(modelfile_dir.glob('*.Modelfile')) if not modelfiles: print(f"✗ No .Modelfile files found in {modelfile_dir}") sys.exit(1) print(f"Found {len(modelfiles)} Modelfile(s) to process") if args.dry_run: print("\n*** DRY RUN MODE - No files will be downloaded or models created ***\n") # Process all Modelfiles results = [] if args.parallel > 1 and len(modelfiles) > 1: # Parallel processing print(f"\nUsing {args.parallel} parallel worker(s)") with ThreadPoolExecutor(max_workers=args.parallel) as executor: # Submit all tasks future_to_modelfile = { executor.submit( install_model_wrapper, (modelfile, args.dry_run, args.skip_existing, existing_models) ): modelfile for modelfile in modelfiles } # Collect results as they complete for future in as_completed(future_to_modelfile): modelfile = future_to_modelfile[future] try: success, skipped, model_name = future.result() results.append((modelfile.name, success, skipped)) except Exception as e: print(f"\n✗ Exception processing {modelfile.name}: {e}") results.append((modelfile.name, False, False)) else: # Sequential processing for modelfile in modelfiles: success, skipped, model_name = install_model( modelfile, args.dry_run, args.skip_existing, existing_models ) results.append((modelfile.name, success, skipped)) # Summary print(f"\n{'='*80}") print("INSTALLATION SUMMARY") print(f"{'='*80}") successful = sum(1 for _, success, skipped in results if success and not skipped) skipped = sum(1 for _, success, skip in results if skip) failed = len(results) - successful - skipped for name, success, skip in results: if skip: status = "⊘" elif success: status = "✓" else: status = "✗" print(f"{status} {name}") print(f"\nTotal: {len(results)} | Successful: {successful} | Skipped: {skipped} | Failed: {failed}") if failed > 0: sys.exit(1) if __name__ == '__main__': main()