From b83f2e6e38c86f7c4fa5788baa3f7819744d9feff4b4354b495cb485d34a8a0d Mon Sep 17 00:00:00 2001
From: mstoeck3 <mstoeck3@hs-mittweida.de>
Date: Wed, 21 Jan 2026 12:13:39 +0100
Subject: [PATCH] mmproj upgrade

---
 README.md                                     |  16 +++
 ...stral-3-14b-instruct-2512-q5_k_m.Modelfile |   8 +-
 ...tral-3-14b-reasoning-2512-q5_k_m.Modelfile |   9 +-
 ...istral-3-3b-instruct-2512-q5_k_m.Modelfile |   8 +-
 ...stral-3-3b-reasoning-2512-q5_k_m.Modelfile |   9 +-
 ...istral-3-8b-instruct-2512-q5_k_m.Modelfile |   8 +-
 ...stral-3-8b-reasoning-2512-q5_k_m.Modelfile |   9 +-
 scripts/hf-llm-install.py                     | 112 +++++++++++++++++-
 static/script.js                              |  49 +++++++-
 templates/index.html                          |   7 ++
 web_app.py                                    |  54 +++++++--
 11 files changed, 258 insertions(+), 31 deletions(-)

diff --git a/README.md b/README.md
index 984844d..6e8cf14 100644
--- a/README.md
+++ b/README.md
@@ -119,6 +119,22 @@ Example:
 # capabilities: tools, vision
 ```
 
+**Vision Model Support (MMProj):**
+
+For vision-capable models, you can specify an mmproj (multimodal projection) file that contains the vision encoder. See [MMProj Support Documentation](docs/MMPROJ_SUPPORT.md) for detailed information.
+
+```dockerfile
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, reasoning, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Reasoning-2512-GGUF
+# mmproj_quant: BF16
+# mmproj_sha256: abc123... (optional)
+```
+
+The script will automatically download both the main GGUF and mmproj files, and create an Ollama model with vision support.
+
 **Note:** Capabilities are read from the GGUF file's metadata by Ollama. The `# capabilities:` comment serves as documentation to track expected model features. If a model doesn't show the expected capabilities after installation, it may be due to the GGUF file lacking that metadata.
 
 The script will:
diff --git a/modelfile-repo/ministral-3-14b-instruct-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-14b-instruct-2512-q5_k_m.Modelfile
index 41d9fbb..4605379 100644
--- a/modelfile-repo/ministral-3-14b-instruct-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-14b-instruct-2512-q5_k_m.Modelfile
@@ -1,6 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512-GGUF/blob/main/Ministral-3-14B-Instruct-2512-Q5_K_M.gguf
-# capabilities: tools,vision
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Instruct-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Instruct-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-14B-Instruct-2512-Q5_K_M.gguf
 
 # Specialized parser for Mistral 3 logic
diff --git a/modelfile-repo/ministral-3-14b-reasoning-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-14b-reasoning-2512-q5_k_m.Modelfile
index 3880db6..bc40d98 100644
--- a/modelfile-repo/ministral-3-14b-reasoning-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-14b-reasoning-2512-q5_k_m.Modelfile
@@ -1,7 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-gguf
-# quantization: q5_k_m
-# capabilities: tools,vision,thinking
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-14B-Reasoning-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, reasoning, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-14B-Reasoning-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-14B-Reasoning-2512-Q5_K_M.gguf
 
 # Specialized parser
diff --git a/modelfile-repo/ministral-3-3b-instruct-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-3b-instruct-2512-q5_k_m.Modelfile
index 0a4beaa..2e9e224 100644
--- a/modelfile-repo/ministral-3-3b-instruct-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-3b-instruct-2512-q5_k_m.Modelfile
@@ -1,6 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512-GGUF/blob/main/Ministral-3-3B-Instruct-2512-Q5_K_M.gguf
-# capabilities: tools,vision
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Instruct-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-3B-Instruct-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-3B-Instruct-2512-Q5_K_M.gguf
 
 # Specialized parser for Mistral 3 logic
diff --git a/modelfile-repo/ministral-3-3b-reasoning-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-3b-reasoning-2512-q5_k_m.Modelfile
index 21158dc..0d17af5 100644
--- a/modelfile-repo/ministral-3-3b-reasoning-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-3b-reasoning-2512-q5_k_m.Modelfile
@@ -1,7 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Reasoning-2512-gguf
-# quantization: q5_k_m
-# capabilities: tools,vision,thinking
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-3B-Reasoning-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, reasoning, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-3B-Reasoning-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-3B-Reasoning-2512-Q5_K_M.gguf
 
 # Specialized parser
diff --git a/modelfile-repo/ministral-3-8b-instruct-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-8b-instruct-2512-q5_k_m.Modelfile
index 4fcfaad..28b1e0f 100644
--- a/modelfile-repo/ministral-3-8b-instruct-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-8b-instruct-2512-q5_k_m.Modelfile
@@ -1,6 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512-GGUF/blob/main/Ministral-3-8B-Instruct-2512-Q5_K_M.gguf
-# capabilities: tools,vision
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Instruct-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-8B-Instruct-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-8B-Instruct-2512-Q5_K_M.gguf
 
 # Specialized parser for Mistral 3 logic
diff --git a/modelfile-repo/ministral-3-8b-reasoning-2512-q5_k_m.Modelfile b/modelfile-repo/ministral-3-8b-reasoning-2512-q5_k_m.Modelfile
index 2133e2c..062b436 100644
--- a/modelfile-repo/ministral-3-8b-reasoning-2512-q5_k_m.Modelfile
+++ b/modelfile-repo/ministral-3-8b-reasoning-2512-q5_k_m.Modelfile
@@ -1,7 +1,10 @@
 # ollama-utils-metadata
-# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-gguf
-# quantization: q5_k_m
-# capabilities: tools,vision,thinking
+# hf_upstream: https://huggingface.co/mistralai/Ministral-3-8B-Reasoning-2512-GGUF
+# quantization: Q5_K_M
+# capabilities: vision, reasoning, tools
+# 
+# mmproj_url: https://huggingface.co/unsloth/Ministral-3-8B-Reasoning-2512-GGUF
+# mmproj_quant: BF16
 FROM ./Ministral-3-8B-Reasoning-2512-Q5_K_M.gguf
 
 # Specialized parser
diff --git a/scripts/hf-llm-install.py b/scripts/hf-llm-install.py
index ff86aee..78f8914 100755
--- a/scripts/hf-llm-install.py
+++ b/scripts/hf-llm-install.py
@@ -105,6 +105,19 @@ def parse_modelfile(modelfile_path):
         caps_str = capabilities_match.group(1).strip()
         capabilities = [cap.strip() for cap in caps_str.split(',') if cap.strip()]
     
+    # Look for optional mmproj (multimodal projection) configuration
+    # Format: # mmproj_url: https://huggingface.co/org/repo
+    mmproj_url_match = re.search(r'#\s*mmproj_url:\s*(https://huggingface\.co/[^\s]+)', content)
+    mmproj_url = mmproj_url_match.group(1) if mmproj_url_match else None
+    
+    # Format: # mmproj_quant: BF16 (or F16, F32)
+    mmproj_quant_match = re.search(r'#\s*mmproj_quant:\s*([a-zA-Z0-9_]+)', content)
+    mmproj_quant = mmproj_quant_match.group(1) if mmproj_quant_match else 'BF16'  # Default to BF16
+    
+    # Format: # mmproj_sha256: <hash>
+    mmproj_sha256_match = re.search(r'#\s*mmproj_sha256:\s*([a-fA-F0-9]{64})', content)
+    mmproj_sha256 = mmproj_sha256_match.group(1) if mmproj_sha256_match else None
+    
     # Check if URL points to a specific GGUF file or just the repo
     if hf_url.endswith('.gguf') or '/blob/' in hf_url or '/resolve/' in hf_url:
         # Specific file provided - use as-is
@@ -133,6 +146,47 @@ def parse_modelfile(modelfile_path):
     # Example: Ministral-3-3B-Instruct-2512-Q5_K_M.gguf -> ministral-3:3b-instruct-2512-q5_k_m
     model_base, model_tag, model_name = parse_model_name_from_gguf(gguf_filename)
     
+    # Construct mmproj info if mmproj_url is provided
+    mmproj_info = None
+    if mmproj_url:
+        # Determine mmproj filename based on URL pattern
+        if mmproj_url.endswith('.gguf') or '/blob/' in mmproj_url or '/resolve/' in mmproj_url:
+            # Specific file provided
+            mmproj_resolve_url = mmproj_url.replace('/blob/', '/resolve/')
+            mmproj_filename = os.path.basename(urlparse(mmproj_resolve_url).path)
+        else:
+            # Repository root - construct filename
+            # Two common patterns:
+            # 1. mmproj-BF16.gguf (unsloth pattern)
+            # 2. ModelName-BF16-mmproj.gguf (mistralai pattern)
+            # Try to detect which pattern by checking the URL
+            url_parts = urlparse(mmproj_url).path.strip('/').split('/')
+            if len(url_parts) >= 2:
+                repo_org = url_parts[0]
+                if repo_org == 'unsloth':
+                    # unsloth pattern: mmproj-{QUANT}.gguf
+                    mmproj_filename = f"mmproj-{mmproj_quant}.gguf"
+                else:
+                    # mistralai/others pattern: extract base name from main repo
+                    repo_name = url_parts[1]
+                    if repo_name.upper().endswith('-GGUF'):
+                        repo_name = repo_name[:-5]
+                    mmproj_filename = f"{repo_name}-{mmproj_quant}-mmproj.gguf"
+                
+                mmproj_resolve_url = f"{mmproj_url.rstrip('/')}/resolve/main/{mmproj_filename}"
+            else:
+                print(f"✗ Invalid mmproj URL format: {mmproj_url}")
+                mmproj_resolve_url = None
+                mmproj_filename = None
+        
+        if mmproj_resolve_url and mmproj_filename:
+            mmproj_info = {
+                'url': mmproj_url,
+                'resolve_url': mmproj_resolve_url,
+                'filename': mmproj_filename,
+                'sha256': mmproj_sha256
+            }
+    
     return {
         'hf_url': hf_url,
         'resolve_url': resolve_url,
@@ -140,7 +194,8 @@ def parse_modelfile(modelfile_path):
         'model_name': model_name,
         'modelfile_path': modelfile_path,
         'sha256': sha256,
-        'capabilities': capabilities
+        'capabilities': capabilities,
+        'mmproj': mmproj_info
     }
 
 
@@ -302,7 +357,7 @@ def download_file(url, dest_path, filename, should_cancel=None, progress_callbac
         raise
 
 
-def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None):
+def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None, mmproj_path=None):
     """
     Create an Ollama model from the Modelfile and GGUF file.
     
@@ -311,12 +366,15 @@ def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None
         gguf_path: Path to the downloaded GGUF file
         model_name: Name for the Ollama model
         capabilities: Optional list of capabilities to add (e.g., ['tools', 'vision'])
+        mmproj_path: Optional path to the mmproj file for vision models
     """
     print(f"\nCreating Ollama model: {model_name}")
     
     # Note: Capabilities are detected from the GGUF file metadata by Ollama automatically
     if capabilities:
         print(f"  ℹ Expected capabilities from GGUF metadata: {', '.join(capabilities)}")
+    if mmproj_path:
+        print(f"  ℹ Including mmproj file for vision support")
     
     # Read the Modelfile and update the FROM path to point to the downloaded GGUF
     with open(modelfile_path, 'r') as f:
@@ -331,12 +389,22 @@ def create_ollama_model(modelfile_path, gguf_path, model_name, capabilities=None
         modelfile_content
     )
     
+    # Add mmproj FROM line if provided
+    if mmproj_path:
+        # Add the mmproj FROM line after the main model FROM line
+        modelfile_content = modelfile_content.replace(
+            f'FROM {gguf_path}',
+            f'FROM {gguf_path}\nFROM {mmproj_path}'
+        )
+    
     # Debug: check if replacement happened
     if original_content == modelfile_content:
         print(f"  WARNING: FROM line was not replaced!")
         print(f"  Looking for pattern in: {original_content[:200]}")
     else:
         print(f"  ✓ Replaced FROM line with local path: {gguf_path}")
+        if mmproj_path:
+            print(f"  ✓ Added mmproj FROM line: {mmproj_path}")
     
     # Create a temporary Modelfile with the correct path
     with tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False) as tmp_modelfile:
@@ -405,6 +473,10 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
         log(f"SHA256: {model_info['sha256'][:16]}...")
     if model_info.get('capabilities'):
         log(f"Capabilities: {', '.join(model_info['capabilities'])}")
+    if model_info.get('mmproj'):
+        log(f"MMProj file: {model_info['mmproj']['filename']}")
+        if model_info['mmproj']['sha256']:
+            log(f"MMProj SHA256: {model_info['mmproj']['sha256'][:16]}...")
     
     # Check if model already exists
     if skip_existing and existing_models and model_info['model_name'] in existing_models:
@@ -413,9 +485,22 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
     
     # Get file size and check disk space
     file_size = get_file_size(model_info['resolve_url'])
+    mmproj_file_size = None
+    if model_info.get('mmproj'):
+        mmproj_file_size = get_file_size(model_info['mmproj']['resolve_url'])
+    
+    total_size = file_size or 0
+    if mmproj_file_size:
+        total_size += mmproj_file_size
+    
     if file_size:
         size_gb = file_size / (1024**3)
-        log(f"File size: {size_gb:.2f} GB")
+        log(f"GGUF file size: {size_gb:.2f} GB")
+        if mmproj_file_size:
+            mmproj_size_gb = mmproj_file_size / (1024**3)
+            log(f"MMProj file size: {mmproj_size_gb:.2f} GB")
+            log(f"Total size: {total_size / (1024**3):.2f} GB")
+        file_size = total_size
         
         if not dry_run:
             has_space, available, required = check_disk_space(file_size)
@@ -434,6 +519,7 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
     # Create temporary directory for download
     with tempfile.TemporaryDirectory() as tmp_dir:
         gguf_path = os.path.join(tmp_dir, model_info['gguf_filename'])
+        mmproj_path = None
         
         try:
             # Download the GGUF file
@@ -445,12 +531,30 @@ def install_model(modelfile_path, dry_run=False, skip_existing=False, existing_m
                     print(f"✗ Checksum verification failed!")
                     return (False, False, model_info['model_name'])
             
+            # Download mmproj file if specified
+            if model_info.get('mmproj'):
+                mmproj_path = os.path.join(tmp_dir, model_info['mmproj']['filename'])
+                download_file(
+                    model_info['mmproj']['resolve_url'],
+                    mmproj_path,
+                    model_info['mmproj']['filename'],
+                    should_cancel,
+                    progress_callback
+                )
+                
+                # Verify mmproj checksum if provided
+                if model_info['mmproj']['sha256']:
+                    if not verify_checksum(mmproj_path, model_info['mmproj']['sha256']):
+                        print(f"✗ MMProj checksum verification failed!")
+                        return (False, False, model_info['model_name'])
+            
             # Create the Ollama model
             create_ollama_model(
                 modelfile_path,
                 gguf_path,
                 model_info['model_name'],
-                model_info.get('capabilities')
+                model_info.get('capabilities'),
+                mmproj_path
             )
             
             print(f"\n✓ Successfully installed model: {model_info['model_name']}")
diff --git a/static/script.js b/static/script.js
index 30f46d0..32f3a91 100644
--- a/static/script.js
+++ b/static/script.js
@@ -760,6 +760,14 @@ async function fetchHuggingFaceInfo() {
             modelfileSection.dataset.ggufFilename = data.gguf_filename;
             modelfileSection.style.display = 'block';
             
+            // Show mmproj info if modelfile includes mmproj configuration
+            const mmprojInfo = document.getElementById('mmproj-info');
+            if (data.modelfile_content && data.modelfile_content.includes('# mmproj_url:')) {
+                mmprojInfo.style.display = 'block';
+            } else {
+                mmprojInfo.style.display = 'none';
+            }
+            
             outputBox.innerHTML = '<div class="success-message">Model information fetched! Please review and customize the Modelfile below.</div>';
         } else {
             fileSelectSection.style.display = 'none';
@@ -811,6 +819,14 @@ async function generateModelfileFromSelection() {
             modelfileSection.dataset.ggufFilename = data.gguf_filename;
             modelfileSection.style.display = 'block';
             
+            // Show mmproj info if modelfile includes mmproj configuration
+            const mmprojInfo = document.getElementById('mmproj-info');
+            if (data.modelfile_content && data.modelfile_content.includes('# mmproj_url:')) {
+                mmprojInfo.style.display = 'block';
+            } else {
+                mmprojInfo.style.display = 'none';
+            }
+            
             fileSelectSection.style.display = 'none';
             outputBox.innerHTML = '<div class="success-message">Modelfile generated! Please review and customize below.</div>';
         } else {
@@ -835,6 +851,35 @@ async function createHuggingFaceModel() {
         return;
     }
     
+    // Parse mmproj info from modelfile content
+    let mmprojUrl = null;
+    let mmprojFilename = null;
+    const mmprojUrlMatch = modelfileContent.match(/#\s*mmproj_url:\s*([^\s]+)/);
+    const mmprojQuantMatch = modelfileContent.match(/#\s*mmproj_quant:\s*([^\s]+)/);
+    
+    if (mmprojUrlMatch) {
+        mmprojUrl = mmprojUrlMatch[1];
+        const mmprojQuant = mmprojQuantMatch ? mmprojQuantMatch[1] : 'BF16';
+        
+        // Determine mmproj filename based on repo pattern
+        if (mmprojUrl.includes('/unsloth/')) {
+            mmprojFilename = `mmproj-${mmprojQuant}.gguf`;
+        } else {
+            // Try to extract base name from modelfile content or gguf filename
+            const baseMatch = ggufFilename.match(/^(.+?)-Q[0-9]/i);
+            if (baseMatch) {
+                mmprojFilename = `${baseMatch[1]}-${mmprojQuant}-mmproj.gguf`;
+            } else {
+                mmprojFilename = `mmproj-${mmprojQuant}.gguf`;
+            }
+        }
+        
+        // Convert to resolve URL if needed
+        if (!mmprojUrl.includes('/resolve/')) {
+            mmprojUrl = `${mmprojUrl}/resolve/main/${mmprojFilename}`;
+        }
+    }
+    
     try {
         const response = await fetch('/api/install/huggingface/create', {
             method: 'POST',
@@ -845,7 +890,9 @@ async function createHuggingFaceModel() {
                 model_name: modelName,
                 modelfile_content: modelfileContent,
                 file_url: fileUrl,
-                gguf_filename: ggufFilename
+                gguf_filename: ggufFilename,
+                mmproj_url: mmprojUrl,
+                mmproj_filename: mmprojFilename
             })
         });
         
diff --git a/templates/index.html b/templates/index.html
index f7961e6..1c17c0a 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -198,6 +198,13 @@
                             like tool calling or vision. Ollama detects these automatically from the GGUF file metadata. 
                             This comment helps you track which models support which features.
                         </p>
+                        <p class="info-text" id="mmproj-info" style="display: none;">
+                            🖼️ <strong>Vision Models:</strong> This model appears to support vision capabilities. 
+                            The <code># mmproj_url:</code> and <code># mmproj_quant:</code> fields specify the multimodal projection file 
+                            needed for image processing. Without the mmproj file, you'll get an error: 
+                            <em>"failed to process inputs: this model is missing data required for image input"</em>. 
+                            The BF16 quantization is recommended for best vision quality (879 MB).
+                        </p>
                         
                         <div class="form-group">
                             <label for="hf-model-name">Model Name:</label>
diff --git a/web_app.py b/web_app.py
index b3e140b..966f481 100644
--- a/web_app.py
+++ b/web_app.py
@@ -288,7 +288,8 @@ def run_install_job(job_id: str, modelfile_path: str):
                 install_jobs[job_id]['error'] = str(e)
 
 
-def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content: str, file_url: str, gguf_filename: str):
+def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content: str, file_url: str, gguf_filename: str, 
+                                mmproj_url: str = None, mmproj_filename: str = None):
     """Run HuggingFace model installation in background thread."""
     with install_lock:
         install_jobs[job_id]['status'] = 'running'
@@ -305,6 +306,7 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
             return install_jobs[job_id].get('cancelled', False)
     
     temp_gguf = None
+    temp_mmproj = None
     temp_modelfile = None
     
     try:
@@ -314,16 +316,27 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
         temp_gguf.close()
         gguf_path = temp_gguf.name
         
+        mmproj_path = None
+        if mmproj_url and mmproj_filename:
+            temp_mmproj = tempfile.NamedTemporaryFile(suffix='.gguf', delete=False)
+            temp_mmproj.close()
+            mmproj_path = temp_mmproj.name
+        
         temp_modelfile = tempfile.NamedTemporaryFile(mode='w', suffix='.Modelfile', delete=False)
         temp_modelfile.write(modelfile_content)
         temp_modelfile.close()
         modelfile_path = temp_modelfile.name
         
-        # Use existing download_file function with callbacks
+        # Download main GGUF file
         hf_install_module.download_file(file_url, gguf_path, gguf_filename, should_cancel, update_progress)
         
-        # Use existing create_ollama_model function
-        hf_install_module.create_ollama_model(modelfile_path, gguf_path, model_name)
+        # Download mmproj file if specified
+        if mmproj_path and mmproj_url:
+            update_progress('Downloading mmproj file for vision support...')
+            hf_install_module.download_file(mmproj_url, mmproj_path, mmproj_filename, should_cancel, update_progress)
+        
+        # Create Ollama model with both files
+        hf_install_module.create_ollama_model(modelfile_path, gguf_path, model_name, mmproj_path=mmproj_path)
         
         # Save Modelfile to repo
         normalized_name = model_name.replace(':', '-')
@@ -353,6 +366,8 @@ def run_huggingface_install_job(job_id: str, model_name: str, modelfile_content:
         # Clean up temp files
         if temp_gguf and os.path.exists(temp_gguf.name):
             os.unlink(temp_gguf.name)
+        if temp_mmproj and os.path.exists(temp_mmproj.name):
+            os.unlink(temp_mmproj.name)
         if temp_modelfile and os.path.exists(temp_modelfile.name):
             os.unlink(temp_modelfile.name)
 
@@ -707,11 +722,31 @@ def generate_modelfile_response(org: str, repo: str, gguf_filename: str, file_ur
         quant_match = re.search(r'[._-](Q[0-9]+_[KLM0-9]+(?:_[LSM])?)', gguf_filename, re.IGNORECASE)
         quantization = quant_match.group(1).upper() if quant_match else 'unspecified'
         
+        # Detect if model might support vision (multimodal models)
+        # Common patterns: ministral-3, qwen-vl, llava, etc.
+        is_multimodal = any(pattern in repo.lower() for pattern in 
+            ['ministral-3', 'qwen-vl', 'qwen2-vl', 'qwen3-vl', 'llava', 'minicpm-v', 'phi-3-vision'])
+        
+        # Build capabilities list
+        capabilities = ['tools']  # Most modern models support tools
+        if is_multimodal:
+            capabilities.append('vision')
+        
+        # Build mmproj config if multimodal
+        mmproj_config = ''
+        if is_multimodal:
+            # Try to use unsloth for mmproj (usually has more options)
+            mmproj_org = 'unsloth' if 'ministral' in repo.lower() or 'qwen' in repo.lower() else org
+            mmproj_config = f"""# 
+# mmproj_url: https://huggingface.co/{mmproj_org}/{repo}
+# mmproj_quant: BF16
+"""
+        
         # Create Modelfile skeleton with relative path (like CLI does)
         modelfile_content = f"""# Modelfile for {full_name}
 # hf_upstream: {file_url}
 # quantization: {quantization}
-# capabilities: tools
+# capabilities: {', '.join(capabilities)}{mmproj_config}
 # sha256: <add_sha256_checksum_here>
 
 FROM ./{gguf_filename}
@@ -764,6 +799,8 @@ def api_create_from_modelfile():
     modelfile_content = data.get('modelfile_content', '')
     file_url = data.get('file_url', '')
     gguf_filename = data.get('gguf_filename', '')
+    mmproj_url = data.get('mmproj_url', '').strip() or None
+    mmproj_filename = data.get('mmproj_filename', '').strip() or None
     
     if not model_name or not modelfile_content or not file_url:
         return jsonify({'error': 'Missing required parameters'}), 400
@@ -785,7 +822,7 @@ def api_create_from_modelfile():
         # Start background thread
         thread = threading.Thread(
             target=run_huggingface_install_job,
-            args=(job_id, model_name, modelfile_content, file_url, gguf_filename)
+            args=(job_id, model_name, modelfile_content, file_url, gguf_filename, mmproj_url, mmproj_filename)
         )
         thread.daemon = True
         thread.start()
@@ -795,11 +832,6 @@ def api_create_from_modelfile():
             'job_id': job_id,
             'message': 'Installation started'
         })
-        
-    except Exception as e:
-        return jsonify({'error': str(e)}), 500
-
-
 @app.route('/api/install/modelfile', methods=['POST'])
 def api_install_from_modelfile():
     """Start installation of a model from an existing Modelfile as background job."""