mmproj upgrade

This commit is contained in:
2026-01-21 12:13:39 +01:00
parent 8149ac8c8b
commit b83f2e6e38
11 changed files with 258 additions and 31 deletions

View File

@@ -119,6 +119,22 @@ Example:
# capabilities: tools, vision # capabilities: tools, vision
``` ```
**Vision Model Support (MMProj):**
For vision-capable models, you can specify an mmproj (multimodal projection) file that contains the vision encoder. See [MMProj Support Documentation](docs/MMPROJ_SUPPORT.md) for detailed information.
```dockerfile
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-GGUF
# quantization: Q5_K_M
# capabilities: vision, reasoning, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Reasoning-2512-GGUF
# mmproj_quant: BF16
# mmproj_sha256: abc123... (optional)
```
The script will automatically download both the main GGUF and mmproj files, and create an Ollama model with vision support.
**Note:** Capabilities are read from the GGUF file's metadata by Ollama. The `# capabilities:` comment serves as documentation to track expected model features. If a model doesn't show the expected capabilities after installation, it may be due to the GGUF file lacking that metadata. **Note:** Capabilities are read from the GGUF file's metadata by Ollama. The `# capabilities:` comment serves as documentation to track expected model features. If a model doesn't show the expected capabilities after installation, it may be due to the GGUF file lacking that metadata.
The script will: The script will:

View File

@@ -1,6 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512-GGUF/blob/main/Ministral-3-14B-Instruct-2512-Q5_K_M.gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512-GGUF
# capabilities: tools,vision # quantization: Q5_K_M
# capabilities: vision, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Instruct-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-14B-Instruct-2512-Q5_K_M.gguf FROM ./Ministral-3-14B-Instruct-2512-Q5_K_M.gguf
# Specialized parser for Mistral 3 logic # Specialized parser for Mistral 3 logic

View File

@@ -1,7 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-GGUF
# quantization: q5_k_m # quantization: Q5_K_M
# capabilities: tools,vision,thinking # capabilities: vision, reasoning, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Reasoning-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-14B-Reasoning-2512-Q5_K_M.gguf FROM ./Ministral-3-14B-Reasoning-2512-Q5_K_M.gguf
# Specialized parser # Specialized parser

View File

@@ -1,6 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512-GGUF/blob/main/Ministral-3-3B-Instruct-2512-Q5_K_M.gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512-GGUF
# capabilities: tools,vision # quantization: Q5_K_M
# capabilities: vision, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-3B-Instruct-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-3B-Instruct-2512-Q5_K_M.gguf FROM ./Ministral-3-3B-Instruct-2512-Q5_K_M.gguf
# Specialized parser for Mistral 3 logic # Specialized parser for Mistral 3 logic

View File

@@ -1,7 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Reasoning-2512-gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Reasoning-2512-GGUF
# quantization: q5_k_m # quantization: Q5_K_M
# capabilities: tools,vision,thinking # capabilities: vision, reasoning, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-3B-Reasoning-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-3B-Reasoning-2512-Q5_K_M.gguf FROM ./Ministral-3-3B-Reasoning-2512-Q5_K_M.gguf
# Specialized parser # Specialized parser

View File

@@ -1,6 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512-GGUF/blob/main/Ministral-3-8B-Instruct-2512-Q5_K_M.gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512-GGUF
# capabilities: tools,vision # quantization: Q5_K_M
# capabilities: vision, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-8B-Instruct-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-8B-Instruct-2512-Q5_K_M.gguf FROM ./Ministral-3-8B-Instruct-2512-Q5_K_M.gguf
# Specialized parser for Mistral 3 logic # Specialized parser for Mistral 3 logic

View File

@@ -1,7 +1,10 @@
# ollama-utils-metadata # ollama-utils-metadata
# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-gguf # hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-GGUF
# quantization: q5_k_m # quantization: Q5_K_M
# capabilities: tools,vision,thinking # capabilities: vision, reasoning, tools
#
# mmproj_url: https://huggingface.co/unsloth/Ministral-3-8B-Reasoning-2512-GGUF
# mmproj_quant: BF16
FROM ./Ministral-3-8B-Reasoning-2512-Q5_K_M.gguf FROM ./Ministral-3-8B-Reasoning-2512-Q5_K_M.gguf
# Specialized parser # Specialized parser

View File

@@ -105,6 +105,19 @@ def parse_modelfile(modelfile_path):
caps_str = capabilities_match.group(1).strip() caps_str = capabilities_match.group(1).strip()
capabilities = [cap.strip() for cap in caps_str.split(',') if cap.strip()] capabilities = [cap.strip() for cap in caps_str.split(',') if cap.strip()]
# Look for optional mmproj (multimodal projection) configuration
# Format: # mmproj_url: https://huggingface.co/org/repo
mmproj_url_match = re.search(r'#\s*mmproj_url:\s*(https://huggingface\.co/[^\s]+)', content)
mmproj_url = mmproj_url_match.group(1) if mmproj_url_match else None
# Format: # mmproj_quant: BF16 (or F16, F32)
mmproj_quant_match = re.search(r'#\s*mmproj_quant:\s*([a-zA-Z0-9_]+)', content)
mmproj_quant = mmproj_quant_match.group(1) if mmproj_quant_match else 'BF16' # Default to BF16
# Format: # mmproj_sha256: <hash>
mmproj_sha256_match = re.search(r'#\s*mmproj_sha256:\s*([a-fA-F0-9]{64})', content)
mmproj_sha256 = mmproj_sha256_match.group(1) if mmproj_sha256_match else None
# Check if URL points to a specific GGUF file or just the repo # Check if URL points to a specific GGUF file or just the repo
if hf_url.endswith('.gguf') or '/blob/' in hf_url or '/resolve/' in hf_url: if hf_url.endswith('.gguf') or '/blob/' in hf_url or '/resolve/' in hf_url:
# Specific file provided - use as-is # Specific file provided - use as-is
@@ -133,6 +146,47 @@ def parse_modelfile(modelfile_path):
# Example: Ministral-3-3B-Instruct-2512-Q5_K_M.gguf -> ministral-3:3b-instruct-2512-q5_k_m # Example: Ministral-3-3B-Instruct-2512-Q5_K_M.gguf -> ministral-3:3b-instruct-2512-q5_k_m
model_base, model_tag, model_name = parse_model_name_from_gguf(gguf_filename) model_base, model_tag, model_name = parse_model_name_from_gguf(gguf_filename)
# Construct mmproj info if mmproj_url is provided
mmproj_info = None
if mmproj_url:
# Determine mmproj filename based on URL pattern
if mmproj_url.endswith('.gguf') or '/blob/' in mmproj_url or '/resolve/' in mmproj_url:
# Specific file provided
mmproj_resolve_url = mmproj_url.replace('/blob/', '/resolve/')
mmproj_filename = os.path.basename(urlparse(mmproj_resolve_url).path)
else:
# Repository root - construct filename
# Two common patterns:
# 1. mmproj-BF16.gguf (unsloth pattern)
# 2. ModelName-BF16-mmproj.gguf (mistralai pattern)
# Try to detect which pattern by checking the URL
url_parts = urlparse(mmproj_url).path.strip('/').split('/')
if len(url_parts) >= 2:
repo_org = url_parts[0]
if repo_org == 'unsloth':
# unsloth pattern: mmproj-{QUANT}.gguf
mmproj_filename = f"mmproj-{mmproj_quant}.gguf"
else:
# mistralai/others pattern: extract base name from main repo
repo_name = url_parts[1]
if repo_name.upper().endswith('-GGUF'):
repo_name = repo_name[:-5]
mmproj_filename = f"{repo_name}-{mmproj_quant}-mmproj.gguf"
mmproj_resolve_url = f"{mmproj_url.rstrip('/')}/resolve/main/{mmproj_filename}"
else:
print(f"✗ Invalid mmproj URL format: {mmproj_url}")
mmproj_resolve_url = None
mmproj_filename = None
if mmproj_resolve_url and mmproj_filename:
mmproj_info = {
'url': mmproj_url,
'resolve_url': mmproj_resolve_url,
'filename': mmproj_filename,
'sha256': mmproj_sha256
}
return { return {
'hf_url': hf_url, 'hf_url': hf_url,
'resolve_url': resolve_url, 'resolve_url': resolve_url,
@@ -140,7 +194,8 @@ def parse_modelfile(modelfile_path):
'model_name': model_name, 'model_name': model_name,
'modelfile_path': modelfile_path, 'modelfile_path': modelfile_path,
'sha256': sha256, 'sha256': sha256,
'capabilities': capabilities 'capabilities': capabilities,
'mmproj': mmproj_info
} }
@@ -302,7 +357,7 @@ def download_file(url, dest_path, filename, should_cancel=None, progress_callbac
raise raise
def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None): def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None, mmproj_path=None):
""" """
Create an Ollama model from the Modelfile and GGUF file. Create an Ollama model from the Modelfile and GGUF file.
@@ -311,12 +366,15 @@ def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None
gguf_path: Path to the downloaded GGUF file gguf_path: Path to the downloaded GGUF file
model_name: Name for the Ollama model model_name: Name for the Ollama model
capabilities: Optional list of capabilities to add (e.g., ['tools', 'vision']) capabilities: Optional list of capabilities to add (e.g., ['tools', 'vision'])
mmproj_path: Optional path to the mmproj file for vision models
""" """
print(f"\nCreating Ollama model: {model_name}") print(f"\nCreating Ollama model: {model_name}")
# Note: Capabilities are detected from the GGUF file metadata by Ollama automatically # Note: Capabilities are detected from the GGUF file metadata by Ollama automatically
if capabilities: if capabilities:
print(f" Expected capabilities from GGUF metadata: {', '.join(capabilities)}") print(f" Expected capabilities from GGUF metadata: {', '.join(capabilities)}")
if mmproj_path:
print(f" Including mmproj file for vision support")
# Read the Modelfile and update the FROM path to point to the downloaded GGUF # Read the Modelfile and update the FROM path to point to the downloaded GGUF
with open(modelfile_path, 'r') as f: with open(modelfile_path, 'r') as f:
@@ -331,12 +389,22 @@ def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None
modelfile_content modelfile_content
) )
# Add mmproj FROM line if provided
if mmproj_path:
# Add the mmproj FROM line after the main model FROM line
modelfile_content = modelfile_content.replace(
f'FROM {gguf_path}',
f'FROM {gguf_path}\nFROM {mmproj_path}'
)
# Debug: check if replacement happened # Debug: check if replacement happened
if original_content == modelfile_content: if original_content == modelfile_content:
print(f" WARNING: FROM line was not replaced!") print(f" WARNING: FROM line was not replaced!")
print(f" Looking for pattern in: {original_content[:200]}") print(f" Looking for pattern in: {original_content[:200]}")
else: else:
print(f" ✓ Replaced FROM line with local path: {gguf_path}") print(f" ✓ Replaced FROM line with local path: {gguf_path}")
if mmproj_path:
print(f" ✓ Added mmproj FROM line: {mmproj_path}")
# Create a temporary Modelfile with the correct path # Create a temporary Modelfile with the correct path
with tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) as tmp_modelfile: with tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) as tmp_modelfile:
@@ -405,6 +473,10 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
log(f"SHA256: {model_info['sha256'][:16]}...") log(f"SHA256: {model_info['sha256'][:16]}...")
if model_info.get('capabilities'): if model_info.get('capabilities'):
log(f"Capabilities: {', '.join(model_info['capabilities'])}") log(f"Capabilities: {', '.join(model_info['capabilities'])}")
if model_info.get('mmproj'):
log(f"MMProj file: {model_info['mmproj']['filename']}")
if model_info['mmproj']['sha256']:
log(f"MMProj SHA256: {model_info['mmproj']['sha256'][:16]}...")
# Check if model already exists # Check if model already exists
if skip_existing and existing_models and model_info['model_name'] in existing_models: if skip_existing and existing_models and model_info['model_name'] in existing_models:
@@ -413,9 +485,22 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
# Get file size and check disk space # Get file size and check disk space
file_size = get_file_size(model_info['resolve_url']) file_size = get_file_size(model_info['resolve_url'])
mmproj_file_size = None
if model_info.get('mmproj'):
mmproj_file_size = get_file_size(model_info['mmproj']['resolve_url'])
total_size = file_size or 0
if mmproj_file_size:
total_size += mmproj_file_size
if file_size: if file_size:
size_gb = file_size / (1024**3) size_gb = file_size / (1024**3)
log(f"File size: {size_gb:.2f} GB") log(f"GGUF file size: {size_gb:.2f} GB")
if mmproj_file_size:
mmproj_size_gb = mmproj_file_size / (1024**3)
log(f"MMProj file size: {mmproj_size_gb:.2f} GB")
log(f"Total size: {total_size / (1024**3):.2f} GB")
file_size = total_size
if not dry_run: if not dry_run:
has_space, available, required = check_disk_space(file_size) has_space, available, required = check_disk_space(file_size)
@@ -434,6 +519,7 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
# Create temporary directory for download # Create temporary directory for download
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
gguf_path = os.path.join(tmp_dir, model_info['gguf_filename']) gguf_path = os.path.join(tmp_dir, model_info['gguf_filename'])
mmproj_path = None
try: try:
# Download the GGUF file # Download the GGUF file
@@ -445,12 +531,30 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
print(f"✗ Checksum verification failed!") print(f"✗ Checksum verification failed!")
return (False, False, model_info['model_name']) return (False, False, model_info['model_name'])
# Download mmproj file if specified
if model_info.get('mmproj'):
mmproj_path = os.path.join(tmp_dir, model_info['mmproj']['filename'])
download_file(
model_info['mmproj']['resolve_url'],
mmproj_path,
model_info['mmproj']['filename'],
should_cancel,
progress_callback
)
# Verify mmproj checksum if provided
if model_info['mmproj']['sha256']:
if not verify_checksum(mmproj_path, model_info['mmproj']['sha256']):
print(f"✗ MMProj checksum verification failed!")
return (False, False, model_info['model_name'])
# Create the Ollama model # Create the Ollama model
create_ollama_model( create_ollama_model(
modelfile_path, modelfile_path,
gguf_path, gguf_path,
model_info['model_name'], model_info['model_name'],
model_info.get('capabilities') model_info.get('capabilities'),
mmproj_path
) )
print(f"\n✓ Successfully installed model: {model_info['model_name']}") print(f"\n✓ Successfully installed model: {model_info['model_name']}")

View File

@@ -760,6 +760,14 @@ async function fetchHuggingFaceInfo() {
modelfileSection.dataset.ggufFilename = data.gguf_filename; modelfileSection.dataset.ggufFilename = data.gguf_filename;
modelfileSection.style.display = 'block'; modelfileSection.style.display = 'block';
// Show mmproj info if modelfile includes mmproj configuration
const mmprojInfo = document.getElementById('mmproj-info');
if (data.modelfile_content && data.modelfile_content.includes('# mmproj_url:')) {
mmprojInfo.style.display = 'block';
} else {
mmprojInfo.style.display = 'none';
}
outputBox.innerHTML = '<div class="success-message">Model information fetched! Please review and customize the Modelfile below.</div>'; outputBox.innerHTML = '<div class="success-message">Model information fetched! Please review and customize the Modelfile below.</div>';
} else { } else {
fileSelectSection.style.display = 'none'; fileSelectSection.style.display = 'none';
@@ -811,6 +819,14 @@ async function generateModelfileFromSelection() {
modelfileSection.dataset.ggufFilename = data.gguf_filename; modelfileSection.dataset.ggufFilename = data.gguf_filename;
modelfileSection.style.display = 'block'; modelfileSection.style.display = 'block';
// Show mmproj info if modelfile includes mmproj configuration
const mmprojInfo = document.getElementById('mmproj-info');
if (data.modelfile_content && data.modelfile_content.includes('# mmproj_url:')) {
mmprojInfo.style.display = 'block';
} else {
mmprojInfo.style.display = 'none';
}
fileSelectSection.style.display = 'none'; fileSelectSection.style.display = 'none';
outputBox.innerHTML = '<div class="success-message">Modelfile generated! Please review and customize below.</div>'; outputBox.innerHTML = '<div class="success-message">Modelfile generated! Please review and customize below.</div>';
} else { } else {
@@ -835,6 +851,35 @@ async function createHuggingFaceModel() {
return; return;
} }
// Parse mmproj info from modelfile content
let mmprojUrl = null;
let mmprojFilename = null;
const mmprojUrlMatch = modelfileContent.match(/#\s*mmproj_url:\s*([^\s]+)/);
const mmprojQuantMatch = modelfileContent.match(/#\s*mmproj_quant:\s*([^\s]+)/);
if (mmprojUrlMatch) {
mmprojUrl = mmprojUrlMatch[1];
const mmprojQuant = mmprojQuantMatch ? mmprojQuantMatch[1] : 'BF16';
// Determine mmproj filename based on repo pattern
if (mmprojUrl.includes('/unsloth/')) {
mmprojFilename = `mmproj-${mmprojQuant}.gguf`;
} else {
// Try to extract base name from modelfile content or gguf filename
const baseMatch = ggufFilename.match(/^(.+?)-Q[0-9]/i);
if (baseMatch) {
mmprojFilename = `${baseMatch[1]}-${mmprojQuant}-mmproj.gguf`;
} else {
mmprojFilename = `mmproj-${mmprojQuant}.gguf`;
}
}
// Convert to resolve URL if needed
if (!mmprojUrl.includes('/resolve/')) {
mmprojUrl = `${mmprojUrl}/resolve/main/${mmprojFilename}`;
}
}
try { try {
const response = await fetch('/api/install/huggingface/create', { const response = await fetch('/api/install/huggingface/create', {
method: 'POST', method: 'POST',
@@ -845,7 +890,9 @@ async function createHuggingFaceModel() {
model_name: modelName, model_name: modelName,
modelfile_content: modelfileContent, modelfile_content: modelfileContent,
file_url: fileUrl, file_url: fileUrl,
gguf_filename: ggufFilename gguf_filename: ggufFilename,
mmproj_url: mmprojUrl,
mmproj_filename: mmprojFilename
}) })
}); });

View File

@@ -198,6 +198,13 @@
like tool calling or vision. Ollama detects these automatically from the GGUF file metadata. like tool calling or vision. Ollama detects these automatically from the GGUF file metadata.
This comment helps you track which models support which features. This comment helps you track which models support which features.
</p> </p>
<p class="info-text" id="mmproj-info" style="display: none;">
🖼️ <strong>Vision Models:</strong> This model appears to support vision capabilities.
The <code># mmproj_url:</code> and <code># mmproj_quant:</code> fields specify the multimodal projection file
needed for image processing. Without the mmproj file, you'll get an error:
<em>"failed to process inputs: this model is missing data required for image input"</em>.
The BF16 quantization is recommended for best vision quality (879 MB).
</p>
<div class="form-group"> <div class="form-group">
<label for="hf-model-name">Model Name:</label> <label for="hf-model-name">Model Name:</label>

View File

@@ -288,7 +288,8 @@ def run_install_job(job_id: str, modelfile_path: str):
install_jobs[job_id]['error'] = str(e) install_jobs[job_id]['error'] = str(e)
def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content: str, file_url: str, gguf_filename: str): def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content: str, file_url: str, gguf_filename: str,
mmproj_url: str = None, mmproj_filename: str = None):
"""Run HuggingFace model installation in background thread.""" """Run HuggingFace model installation in background thread."""
with install_lock: with install_lock:
install_jobs[job_id]['status'] = 'running' install_jobs[job_id]['status'] = 'running'
@@ -305,6 +306,7 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
return install_jobs[job_id].get('cancelled', False) return install_jobs[job_id].get('cancelled', False)
temp_gguf = None temp_gguf = None
temp_mmproj = None
temp_modelfile = None temp_modelfile = None
try: try:
@@ -314,16 +316,27 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
temp_gguf.close() temp_gguf.close()
gguf_path = temp_gguf.name gguf_path = temp_gguf.name
mmproj_path = None
if mmproj_url and mmproj_filename:
temp_mmproj = tempfile.NamedTemporaryFile(suffix='.gguf', delete=False)
temp_mmproj.close()
mmproj_path = temp_mmproj.name
temp_modelfile = tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) temp_modelfile = tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False)
temp_modelfile.write(modelfile_content) temp_modelfile.write(modelfile_content)
temp_modelfile.close() temp_modelfile.close()
modelfile_path = temp_modelfile.name modelfile_path = temp_modelfile.name
# Use existing download_file function with callbacks # Download main GGUF file
hf_install_module.download_file(file_url, gguf_path, gguf_filename, should_cancel, update_progress) hf_install_module.download_file(file_url, gguf_path, gguf_filename, should_cancel, update_progress)
# Use existing create_ollama_model function # Download mmproj file if specified
hf_install_module.create_ollama_model(modelfile_path, gguf_path, model_name) if mmproj_path and mmproj_url:
update_progress('Downloading mmproj file for vision support...')
hf_install_module.download_file(mmproj_url, mmproj_path, mmproj_filename, should_cancel, update_progress)
# Create Ollama model with both files
hf_install_module.create_ollama_model(modelfile_path, gguf_path, model_name, mmproj_path=mmproj_path)
# Save Modelfile to repo # Save Modelfile to repo
normalized_name = model_name.replace(':', '-') normalized_name = model_name.replace(':', '-')
@@ -353,6 +366,8 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
# Clean up temp files # Clean up temp files
if temp_gguf and os.path.exists(temp_gguf.name): if temp_gguf and os.path.exists(temp_gguf.name):
os.unlink(temp_gguf.name) os.unlink(temp_gguf.name)
if temp_mmproj and os.path.exists(temp_mmproj.name):
os.unlink(temp_mmproj.name)
if temp_modelfile and os.path.exists(temp_modelfile.name): if temp_modelfile and os.path.exists(temp_modelfile.name):
os.unlink(temp_modelfile.name) os.unlink(temp_modelfile.name)
@@ -707,11 +722,31 @@ def generate_modelfile_response(org: str, repo: str, gguf_filename: str, file_ur
quant_match = re.search(r'[._-](Q[0-9]+_[KLM0-9]+(?:_[LSM])?)', gguf_filename, re.IGNORECASE) quant_match = re.search(r'[._-](Q[0-9]+_[KLM0-9]+(?:_[LSM])?)', gguf_filename, re.IGNORECASE)
quantization = quant_match.group(1).upper() if quant_match else 'unspecified' quantization = quant_match.group(1).upper() if quant_match else 'unspecified'
# Detect if model might support vision (multimodal models)
# Common patterns: ministral-3, qwen-vl, llava, etc.
is_multimodal = any(pattern in repo.lower() for pattern in
['ministral-3', 'qwen-vl', 'qwen2-vl', 'qwen3-vl', 'llava', 'minicpm-v', 'phi-3-vision'])
# Build capabilities list
capabilities = ['tools'] # Most modern models support tools
if is_multimodal:
capabilities.append('vision')
# Build mmproj config if multimodal
mmproj_config = ''
if is_multimodal:
# Try to use unsloth for mmproj (usually has more options)
mmproj_org = 'unsloth' if 'ministral' in repo.lower() or 'qwen' in repo.lower() else org
mmproj_config = f"""#
# mmproj_url: https://huggingface.co/{mmproj_org}/{repo}
# mmproj_quant: BF16
"""
# Create Modelfile skeleton with relative path (like CLI does) # Create Modelfile skeleton with relative path (like CLI does)
modelfile_content = f"""# Modelfile for {full_name} modelfile_content = f"""# Modelfile for {full_name}
# hf_upstream: {file_url} # hf_upstream: {file_url}
# quantization: {quantization} # quantization: {quantization}
# capabilities: tools # capabilities: {', '.join(capabilities)}{mmproj_config}
# sha256: <add_sha256_checksum_here> # sha256: <add_sha256_checksum_here>
FROM ./{gguf_filename} FROM ./{gguf_filename}
@@ -764,6 +799,8 @@ def api_create_from_modelfile():
modelfile_content = data.get('modelfile_content', '') modelfile_content = data.get('modelfile_content', '')
file_url = data.get('file_url', '') file_url = data.get('file_url', '')
gguf_filename = data.get('gguf_filename', '') gguf_filename = data.get('gguf_filename', '')
mmproj_url = data.get('mmproj_url', '').strip() or None
mmproj_filename = data.get('mmproj_filename', '').strip() or None
if not model_name or not modelfile_content or not file_url: if not model_name or not modelfile_content or not file_url:
return jsonify({'error': 'Missing required parameters'}), 400 return jsonify({'error': 'Missing required parameters'}), 400
@@ -785,7 +822,7 @@ def api_create_from_modelfile():
# Start background thread # Start background thread
thread = threading.Thread( thread = threading.Thread(
target=run_huggingface_install_job, target=run_huggingface_install_job,
args=(job_id, model_name, modelfile_content, file_url, gguf_filename) args=(job_id, model_name, modelfile_content, file_url, gguf_filename, mmproj_url, mmproj_filename)
) )
thread.daemon = True thread.daemon = True
thread.start() thread.start()
@@ -795,11 +832,6 @@ def api_create_from_modelfile():
'job_id': job_id, 'job_id': job_id,
'message': 'Installation started' 'message': 'Installation started'
}) })
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/install/modelfile', methods=['POST']) @app.route('/api/install/modelfile', methods=['POST'])
def api_install_from_modelfile(): def api_install_from_modelfile():
"""Start installation of a model from an existing Modelfile as background job.""" """Start installation of a model from an existing Modelfile as background job."""