diff --git a/owui-site-crawler.py b/owui-site-crawler.py
index 937be55..3e6b6c9 100644
--- a/owui-site-crawler.py
+++ b/owui-site-crawler.py
@@ -9,23 +9,68 @@ from bs4 import BeautifulSoup
 from markitdown import MarkItDown
 import json
 import logging
+from io import BytesIO
+import re
+import tempfile
+import shutil
+from pathlib import Path
 
 # Configure logging
 logging.basicConfig(level=logging.INFO, 
                     format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 
+# Try to import Selenium, but make it optional
+try:
+    from selenium import webdriver
+    from selenium.webdriver.chrome.options import Options
+    from selenium.webdriver.common.by import By
+    from selenium.webdriver.support.ui import WebDriverWait
+    from selenium.webdriver.support import expected_conditions as EC
+    from selenium.common.exceptions import TimeoutException, WebDriverException
+    SELENIUM_AVAILABLE = True
+except ImportError:
+    SELENIUM_AVAILABLE = False
+    logger.warning("Selenium not available. Falling back to simple crawler.")
+
 class WebScraper:
-    def __init__(self, base_url, max_depth=2, delay=1.0, exclude_patterns=None):
+    def __init__(self, base_url, max_depth=2, delay=1.0, exclude_patterns=None, use_selenium=False):
         self.base_url = base_url
         self.domain = urlparse(base_url).netloc
         self.visited_urls = set()
         self.max_depth = max_depth
         self.delay = delay
         self.exclude_patterns = exclude_patterns or []
-        self.pages = {}  # Dictionary to store URL: HTML content
-        self.session = requests.Session()
+        self.pages = {}
+        self.use_selenium = use_selenium and SELENIUM_AVAILABLE
         
+        if self.use_selenium:
+            self.setup_selenium()
+        else:
+            self.session = requests.Session()
+        
+        self.base_path = urlparse(base_url).path.rstrip('/')
+
+    def setup_selenium(self):
+        """Setup Selenium WebDriver with headless Chrome."""
+        try:
+            chrome_options = Options()
+            chrome_options.add_argument("--headless")
+            chrome_options.add_argument("--no-sandbox")
+            chrome_options.add_argument("--disable-dev-shm-usage")
+            chrome_options.add_argument("--disable-gpu")
+            chrome_options.add_argument("--window-size=1920,1080")
+            chrome_options.add_argument("--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
+            
+            self.driver = webdriver.Chrome(options=chrome_options)
+            self.driver.set_page_load_timeout(30)
+            logger.info("Selenium WebDriver initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Selenium: {e}")
+            logger.info("Falling back to requests")
+            self.use_selenium = False
+            self.session = requests.Session()
+
     def should_exclude(self, url):
         """Check if URL should be excluded based on patterns."""
         for pattern in self.exclude_patterns:
@@ -36,48 +81,102 @@ class WebScraper:
     def is_valid_url(self, url):
         """Check if the URL is valid and belongs to the same domain."""
         parsed = urlparse(url)
-        return bool(parsed.netloc) and parsed.netloc == self.domain
+        if not (parsed.netloc and parsed.netloc == self.domain):
+            return False
+        return parsed.path.startswith(self.base_path)
     
-    def get_links(self, url, html):
-        """Extract all links from the HTML content."""
+    def get_links_selenium(self, url):
+        """Extract all links from the page using Selenium."""
+        try:
+            self.driver.get(url)
+            # Wait for page to load
+            WebDriverWait(self.driver, 10).until(
+                EC.presence_of_element_located((By.TAG_NAME, "body"))
+            )
+            
+            # Try to wait for main content to load
+            try:
+                WebDriverWait(self.driver, 5).until(
+                    EC.presence_of_element_located((By.TAG_NAME, "main"))
+                )
+            except TimeoutException:
+                pass
+            
+            # Get page source after JavaScript execution
+            html = self.driver.page_source
+            self.pages[url] = html
+            
+            # Extract links
+            links = set()
+            for a_tag in self.driver.find_elements(By.TAG_NAME, "a"):
+                href = a_tag.get_attribute("href")
+                if href:
+                    full_url = urljoin(url, href)
+                    if self.is_valid_url(full_url) and not self.should_exclude(full_url):
+                        links.add(full_url)
+            
+            return list(links), html
+            
+        except Exception as e:
+            logger.error(f"Error getting links with Selenium from {url}: {e}")
+            return [], ""
+
+    def get_links_requests(self, url, html):
+        """Extract all links from the HTML content using requests."""
         soup = BeautifulSoup(html, 'html.parser')
+        links = set()
         for a_tag in soup.find_all('a', href=True):
             href = a_tag['href']
-            # Handle relative URLs
             full_url = urljoin(url, href)
-            # Filter URLs to only include those from the same domain
             if self.is_valid_url(full_url) and not self.should_exclude(full_url):
-                yield full_url
+                links.add(full_url)
+        return list(links)
     
+    def get_page_requests(self, url):
+        """Get page content using requests."""
+        try:
+            response = self.session.get(url, timeout=10)
+            if response.status_code == 200:
+                return response.text
+            else:
+                logger.warning(f"Failed to fetch {url}: HTTP {response.status_code}")
+                return None
+        except Exception as e:
+            logger.error(f"Error fetching {url} with requests: {e}")
+            return None
+
     def crawl(self, url=None, depth=0):
         """Crawl the website starting from the URL up to max_depth."""
         if url is None:
             url = self.base_url
             
-        # Stop if we've reached max depth or already visited this URL
         if depth > self.max_depth or url in self.visited_urls:
             return
         
-        # Mark this URL as visited
         self.visited_urls.add(url)
         
         try:
             logger.info(f"Crawling: {url} (Depth: {depth})")
-            response = self.session.get(url, timeout=10)
             
-            if response.status_code == 200:
-                # Store the HTML content
-                self.pages[url] = response.text
-                
-                # Extract and follow links
-                if depth < self.max_depth:
-                    for link in self.get_links(url, response.text):
-                        # Be nice to the server - add delay
-                        time.sleep(self.delay)
-                        self.crawl(link, depth + 1)
+            if self.use_selenium:
+                links, html = self.get_links_selenium(url)
+                if html:
+                    self.pages[url] = html
             else:
-                logger.warning(f"Failed to fetch {url}: HTTP {response.status_code}")
-                
+                html = self.get_page_requests(url)
+                if html:
+                    self.pages[url] = html
+                    links = self.get_links_requests(url, html)
+                else:
+                    links = []
+            
+            # Follow links
+            if depth < self.max_depth and links:
+                logger.info(f"Found {len(links)} links to follow from {url}")
+                for link in links:
+                    time.sleep(self.delay)
+                    self.crawl(link, depth + 1)
+                    
         except Exception as e:
             logger.error(f"Error crawling {url}: {e}")
     
@@ -86,8 +185,10 @@ class WebScraper:
         return self.pages
     
     def close(self):
-        """Close the requests session."""
-        if hasattr(self, 'session') and self.session:
+        """Close the requests session or Selenium driver."""
+        if self.use_selenium and hasattr(self, 'driver'):
+            self.driver.quit()
+        elif hasattr(self, 'session'):
             self.session.close()
 
 
@@ -163,102 +264,94 @@ class OpenWebUIUploader:
             logger.error(f"Error creating knowledge base: {e}")
             raise
     
-    def upload_file(self, kb_id, content, filename, content_type="text/markdown"):
-        """Upload a file to the knowledge base."""
+    def validate_content(self, content, filename):
+        """Validate that content is not empty and has sufficient meaningful text."""
+        if not content or not content.strip():
+            return False, "Content is empty"
+        
+        # Count meaningful lines (not just headers or empty lines)
+        lines = [line.strip() for line in content.split('\n') if line.strip()]
+        meaningful_lines = [line for line in lines if not line.startswith('#') and len(line) > 20]
+        
+        if len(meaningful_lines) < 3:
+            return False, f"Not enough meaningful content ({len(meaningful_lines)} lines)"
+        
+        # Count words in clean content
+        clean_content = re.sub(r'#.*?\n', '', content)
+        clean_content = re.sub(r'```.*?```', '', clean_content, flags=re.DOTALL)
+        clean_content = re.sub(r'`.*?`', '', clean_content)
+        clean_content = re.sub(r'\*.*?\*', '', clean_content)
+        clean_content = clean_content.strip()
+        
+        words = clean_content.split()
+        if len(words) < 50:
+            return False, f"Content too short ({len(words)} words after cleaning)"
+        
+        return True, "Valid content"
+    
+    def upload_file_from_path(self, kb_id, file_path, filename, content_type="text/markdown"):
+        """Upload a file to the knowledge base from a file path."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+        except Exception as e:
+            logger.error(f"Error reading file {file_path}: {e}")
+            return {"status": "error", "reason": f"read_error: {str(e)}"}
+        
+        is_valid, validation_msg = self.validate_content(content, filename)
+        if not is_valid:
+            logger.warning(f"Skipping invalid file {filename}: {validation_msg}")
+            return {"status": "skipped", "reason": validation_msg}
+            
         upload_endpoint = f"{self.base_url}/api/v1/files/"
         
-        # Create a temporary file for the upload
-        temp_file_path = f"/tmp/{filename}"
-        with open(temp_file_path, 'w') as f:
-            f.write(content)
-            
         try:
-            # Use context manager for file upload request
-            with open(temp_file_path, 'rb') as f:
+            with open(file_path, 'rb') as f:
                 files = {'file': (filename, f, content_type)}
-                with self.session.post(
+                upload_response = self.session.post(
                     upload_endpoint,
                     headers={"Authorization": f"Bearer {self.api_token}"},
                     files=files
-                ) as upload_response:
-                    upload_response.raise_for_status()
-                    file_id = upload_response.json().get('id')
+                )
+                upload_response.raise_for_status()
+                file_id = upload_response.json().get('id')
+            
+            if not file_id:
+                raise ValueError("No file ID returned from upload")
                 
-            # Add the file to the knowledge base
             add_file_endpoint = f"{self.base_url}/api/v1/knowledge/{kb_id}/file/add"
-            with self.session.post(
+            add_response = self.session.post(
                 add_file_endpoint,
                 headers={
                     "Authorization": f"Bearer {self.api_token}",
                     "Content-Type": "application/json"
                 },
                 json={'file_id': file_id}
-            ) as add_response:
-                add_response.raise_for_status()
-                return add_response.json()
+            )
+            
+            if add_response.status_code == 400:
+                error_msg = add_response.text
+                if "empty" in error_msg.lower():
+                    logger.warning(f"OpenWebUI rejected file {filename} as empty content")
+                    try:
+                        delete_endpoint = f"{self.base_url}/api/v1/files/{file_id}"
+                        self.session.delete(delete_endpoint)
+                    except:
+                        pass
+                    return {"status": "skipped", "reason": "rejected_as_empty_by_openwebui"}
+                else:
+                    add_response.raise_for_status()
+            
+            add_response.raise_for_status()
+            return add_response.json()
                 
         except requests.exceptions.RequestException as e:
-            logger.error(f"Error uploading file: {e}")
-            raise
-        finally:
-            # Clean up the temporary file
-            if os.path.exists(temp_file_path):
-                os.unlink(temp_file_path)
-    
-    def update_file(self, kb_id, existing_file_id, content, filename, content_type="text/markdown"):
-        """Update an existing file in the knowledge base."""
-        # First upload the new version of the file
-        upload_endpoint = f"{self.base_url}/api/v1/files/"
-        
-        # Create a temporary file for the upload
-        temp_file_path = f"/tmp/{filename}"
-        with open(temp_file_path, 'w') as f:
-            f.write(content)
-            
-        try:
-            # Upload the new file
-            with open(temp_file_path, 'rb') as f:
-                files = {'file': (filename, f, content_type)}
-                with self.session.post(
-                    upload_endpoint,
-                    headers={"Authorization": f"Bearer {self.api_token}"},
-                    files=files
-                ) as upload_response:
-                    upload_response.raise_for_status()
-                    new_file_id = upload_response.json().get('id')
-            
-            # Remove the old file from the knowledge base
-            remove_endpoint = f"{self.base_url}/api/v1/knowledge/{kb_id}/file/remove"
-            with self.session.post(
-                remove_endpoint,
-                headers={
-                    "Authorization": f"Bearer {self.api_token}",
-                    "Content-Type": "application/json"
-                },
-                json={'file_id': existing_file_id}
-            ) as remove_response:
-                remove_response.raise_for_status()
-            
-            # Add the new file to the knowledge base
-            add_endpoint = f"{self.base_url}/api/v1/knowledge/{kb_id}/file/add"
-            with self.session.post(
-                add_endpoint,
-                headers={
-                    "Authorization": f"Bearer {self.api_token}",
-                    "Content-Type": "application/json"
-                },
-                json={'file_id': new_file_id}
-            ) as add_response:
-                add_response.raise_for_status()
-                return add_response.json()
-                
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Error updating file: {e}")
-            raise
-        finally:
-            # Clean up the temporary file
-            if os.path.exists(temp_file_path):
-                os.unlink(temp_file_path)
+            logger.error(f"Error uploading file {filename}: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                if e.response.status_code == 400 and "empty" in str(e.response.text).lower():
+                    logger.warning(f"OpenWebUI rejected file {filename} as empty content")
+                    return {"status": "skipped", "reason": "rejected_as_empty_by_openwebui"}
+            return {"status": "error", "reason": f"upload_error: {str(e)}"}
     
     def close(self):
         """Close the requests session."""
@@ -266,24 +359,82 @@ class OpenWebUIUploader:
             self.session.close()
 
 
+def extract_clean_text(html_content, url):
+    """Extract clean, meaningful text from HTML."""
+    soup = BeautifulSoup(html_content, 'html.parser')
+    
+    # Remove unwanted elements
+    for element in soup(["script", "style", "nav", "header", "footer", "aside", 
+                        "meta", "link", "button", "form", "input", "select"]):
+        element.decompose()
+    
+    # Try different content selectors
+    content_selectors = [
+        'main', 'article', '.content', '#content', '.main', '#main',
+        '.documentation', '#documentation', '.doc', '#doc',
+        '.page', '#page', '.post', '#post',
+        'body'
+    ]
+    
+    content_element = None
+    for selector in content_selectors:
+        content_element = soup.select_one(selector)
+        if content_element:
+            logger.info(f"Found content using selector: {selector}")
+            break
+    
+    if not content_element:
+        content_element = soup
+    
+    # Extract text with structure
+    text_parts = []
+    
+    for element in content_element.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'div']):
+        text = element.get_text(strip=True)
+        if text and len(text) > 10:
+            if element.name.startswith('h'):
+                level = int(element.name[1])
+                text_parts.append(f"{'#' * level} {text}")
+            elif element.name == 'li':
+                text_parts.append(f"- {text}")
+            else:
+                text_parts.append(text)
+    
+    # Fallback to general text extraction
+    if len(text_parts) < 3:
+        text = content_element.get_text()
+        lines = (line.strip() for line in text.splitlines())
+        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+        text_parts = [chunk for chunk in chunks if chunk and len(chunk) > 20]
+    
+    if text_parts:
+        content = f"# Source: {url}\n\n" + "\n\n".join(text_parts)
+        logger.info(f"Extracted {len(text_parts)} content blocks, {len(content)} total chars")
+        return content
+    else:
+        logger.warning(f"No meaningful content extracted from {url}")
+        return f"# Source: {url}\n\n*No meaningful text content could be extracted from this page.*"
+
+
 def convert_to_markdown(html_content, url):
-    """Convert HTML content to Markdown using MarkItDown."""
+    """Convert HTML to Markdown with robust fallbacks."""
+    clean_text = extract_clean_text(html_content, url)
+    if len(clean_text.strip()) > 200:
+        return clean_text
+    
     try:
         md = MarkItDown()
-        
-        # Use BytesIO to provide a binary stream to convert_stream
-        from io import BytesIO
         html_bytes = BytesIO(html_content.encode('utf-8'))
-        
-        # Convert the HTML to Markdown
         result = md.convert_stream(html_bytes, mime_type='text/html')
         
-        # Add a header with the source URL
-        markdown_with_header = f"# {url}\n\n{result.text_content}"
-        return markdown_with_header
+        if result and hasattr(result, 'text_content') and result.text_content:
+            markdown_content = result.text_content.strip()
+            if markdown_content and len(markdown_content) > 200:
+                return f"# Source: {url}\n\n{markdown_content}"
     except Exception as e:
-        logger.error(f"Error converting to markdown: {e}")
-        return f"# {url}\n\nError converting content: {str(e)}"
+        logger.warning(f"MarkItDown failed for {url}: {e}")
+    
+    return clean_text
 
 
 def is_valid_json(content):
@@ -295,6 +446,64 @@ def is_valid_json(content):
         return False
 
 
+def create_unique_filename(url):
+    """Create a unique filename from URL including fragment."""
+    parsed = urlparse(url)
+    
+    path = parsed.path
+    if not path or path == '/':
+        path = 'index'
+    
+    fragment = parsed.fragment
+    if fragment:
+        fragment_clean = re.sub(r'[^a-zA-Z0-9]', '_', fragment)
+        filename = f"{path.strip('/')}_{fragment_clean}"
+    else:
+        filename = path.strip('/')
+    
+    filename = re.sub(r'[^a-zA-Z0-9_.-]', '_', filename)
+    
+    if len(filename) < 5:
+        domain_part = re.sub(r'[^a-zA-Z0-9]', '_', parsed.netloc)
+        filename = f"{domain_part}_{filename}"
+    
+    if not filename.endswith('.md'):
+        filename = f"{filename}.md"
+    
+    return filename
+
+
+def save_files_to_temp_dir(processed_files, temp_dir):
+    """Save processed files to temporary directory."""
+    saved_files = []
+    
+    for file_info in processed_files:
+        try:
+            file_path = os.path.join(temp_dir, file_info['filename'])
+            
+            counter = 1
+            original_path = file_path
+            while os.path.exists(file_path):
+                name, ext = os.path.splitext(original_path)
+                file_path = f"{name}_{counter}{ext}"
+                counter += 1
+            
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write(file_info['content'])
+            
+            saved_files.append({
+                'file_path': file_path,
+                'filename': os.path.basename(file_path),
+                'content_type': file_info['content_type'],
+                'url': file_info['url']
+            })
+            logger.info(f"Saved file to temp directory: {os.path.basename(file_path)}")
+        except Exception as e:
+            logger.error(f"Error saving file {file_info['filename']}: {e}")
+    
+    return saved_files
+
+
 def main():
     parser = argparse.ArgumentParser(description='Scrape a website and create an Open WebUI knowledge base')
     parser.add_argument('--token', '-t', required=True, help='Your OpenWebUI API token')
@@ -308,26 +517,36 @@ def main():
     parser.add_argument('--include-json', '-j', action='store_true', help='Include JSON files and API endpoints')
     parser.add_argument('--update', action='store_true', help='Update existing files in the knowledge base')
     parser.add_argument('--skip-existing', action='store_true', help='Skip existing files in the knowledge base')
+    parser.add_argument('--min-content-length', type=int, default=200, help='Minimum content length to include (default: 200 characters)')
+    parser.add_argument('--keep-temp-files', action='store_true', help='Keep temporary files for debugging')
+    parser.add_argument('--use-selenium', action='store_true', help='Use Selenium for JavaScript-rendered sites')
     
     args = parser.parse_args()
     
-    # Check for conflicting options
     if args.update and args.skip_existing:
         logger.error("Cannot use both --update and --skip-existing flags at the same time")
         return 1
     
-    # Initialize resources that need to be closed
+    # Check if Selenium is requested but not available
+    if args.use_selenium and not SELENIUM_AVAILABLE:
+        logger.warning("Selenium requested but not available. Install with: pip install selenium webdriver-manager")
+        logger.warning("Falling back to simple crawler.")
+        args.use_selenium = False
+    
     scraper = None
     uploader = None
+    temp_dir = None
     
     try:
-        # 1. Crawl the website
         logger.info(f"Starting web crawl of {args.website_url} to depth {args.depth}")
+        logger.info(f"Using {'Selenium' if args.use_selenium else 'simple'} crawler")
+        
         scraper = WebScraper(
             base_url=args.website_url,
             max_depth=args.depth,
             delay=args.delay,
-            exclude_patterns=args.exclude or []
+            exclude_patterns=args.exclude or [],
+            use_selenium=args.use_selenium
         )
         scraper.crawl()
         
@@ -338,46 +557,49 @@ def main():
             logger.error("No pages were crawled. Exiting.")
             return 1
         
-        # 2. Process content (convert HTML to Markdown or handle JSON)
         logger.info("Processing crawled content")
         processed_files = []
+        empty_files = 0
         
         for url, html_content in crawled_pages.items():
-            # For JSON content, preserve it as JSON
+            if not html_content or len(html_content.strip()) < 100:
+                logger.warning(f"Skipping empty page: {url}")
+                empty_files += 1
+                continue
+                
             if url.endswith('.json') or (is_valid_json(html_content) and args.include_json):
                 if is_valid_json(html_content):
                     try:
                         json_obj = json.loads(html_content)
                         pretty_json = json.dumps(json_obj, indent=2)
                         
-                        # Create filename for JSON file
-                        parsed_url = urlparse(url)
-                        filename = f"{parsed_url.netloc}{parsed_url.path}"
-                        filename = filename.replace('/', '_').replace('.', '_')
-                        if not filename.endswith('.json'):
-                            filename = f"{filename}.json"
-                            
-                        processed_files.append({
-                            'content': pretty_json,
-                            'content_type': 'application/json',
-                            'filename': filename,
-                            'url': url
-                        })
-                        logger.info(f"Processed JSON content from {url}")
+                        if len(pretty_json.strip()) >= args.min_content_length:
+                            filename = create_unique_filename(url)
+                            if not filename.endswith('.json'):
+                                filename = f"{filename}.json"
+                                
+                            processed_files.append({
+                                'content': pretty_json,
+                                'content_type': 'application/json',
+                                'filename': filename,
+                                'url': url
+                            })
+                            logger.info(f"Processed JSON content from {url}")
+                        else:
+                            logger.warning(f"Skipping JSON file {url} - content too short")
+                            empty_files += 1
                         continue
                     except ValueError:
-                        # Not valid JSON despite the extension, fall back to Markdown
                         pass
             
-            # For all other content, convert to Markdown
             markdown_content = convert_to_markdown(html_content, url)
             
-            # Create a safe filename
-            parsed_url = urlparse(url)
-            filename = f"{parsed_url.netloc}{parsed_url.path}".replace('/', '_').replace('.', '_')
-            if not filename.endswith('.md'):
-                filename = f"{filename}.md"
-                
+            if not markdown_content or len(markdown_content.strip()) < args.min_content_length:
+                logger.warning(f"Skipping {url} - no extractable content found after conversion")
+                empty_files += 1
+                continue
+            
+            filename = create_unique_filename(url)
             processed_files.append({
                 'content': markdown_content,
                 'content_type': 'text/markdown',
@@ -385,10 +607,38 @@ def main():
                 'url': url
             })
         
-        logger.info(f"Processed {len(processed_files)} files")
+        logger.info(f"Processed {len(processed_files)} files, skipped {empty_files} empty files")
+        
+        if not processed_files:
+            logger.error("No files with valid content were processed. Exiting.")
+            return 1
+        
+        script_dir = Path(__file__).parent
+        temp_dir = script_dir / "temp_webscraper_files"
+        temp_dir.mkdir(exist_ok=True)
+        logger.info(f"Created temporary directory: {temp_dir}")
+        
+        saved_files = save_files_to_temp_dir(processed_files, temp_dir)
+        logger.info(f"Saved {len(saved_files)} files to temporary directory")
+        
+        logger.info("=== DEBUG: File Content Analysis ===")
+        for file_info in saved_files:
+            try:
+                with open(file_info['file_path'], 'r', encoding='utf-8') as f:
+                    content = f.read()
+                    lines = content.split('\n')
+                    meaningful_lines = [line for line in lines if line.strip() and not line.startswith('#') and len(line.strip()) > 20]
+                    
+                    logger.info(f"File: {file_info['filename']}")
+                    logger.info(f"  Total size: {len(content)} chars")
+                    logger.info(f"  Total lines: {len(lines)}")
+                    logger.info(f"  Meaningful lines: {len(meaningful_lines)}")
+                    if meaningful_lines:
+                        logger.info(f"  First meaningful line: {meaningful_lines[0][:100]}{'...' if len(meaningful_lines[0]) > 100 else ''}")
+            except Exception as e:
+                logger.error(f"Error reading saved file {file_info['filename']}: {e}")
+        logger.info("=== END DEBUG ===")
         
-        # 3. Upload to Open WebUI
-        # First check if a knowledge base with the specified name already exists
         uploader = OpenWebUIUploader(args.base_url, args.token)
         
         existing_kb = uploader.get_knowledge_base_by_name(args.kb_name)
@@ -396,7 +646,6 @@ def main():
             kb_id = existing_kb.get('id')
             logger.info(f"Found existing knowledge base '{args.kb_name}' with ID: {kb_id}")
         else:
-            # Create a new knowledge base if none exists with that name
             logger.info(f"Creating new knowledge base '{args.kb_name}' in Open WebUI")
             kb = uploader.create_knowledge_base(args.kb_name, args.kb_purpose)
             kb_id = kb.get('id')
@@ -405,61 +654,46 @@ def main():
                 return 1
             logger.info(f"Created knowledge base with ID: {kb_id}")
         
-        # 4. Upload each file
         success_count = 0
         skip_count = 0
         update_count = 0
         error_count = 0
+        empty_skip_count = 0
         
-        for file_info in processed_files:
+        for file_info in saved_files:
             try:
                 filename = file_info['filename']
+                file_path = file_info['file_path']
                 existing_file_id = uploader.file_exists_in_kb(kb_id, filename)
                 
-                # Handle existing files based on options
-                if existing_file_id:
-                    if args.skip_existing:
-                        logger.info(f"Skipping existing file: {filename}")
-                        skip_count += 1
-                        continue
-                    elif args.update:
-                        logger.info(f"Updating existing file: {filename}")
-                        uploader.update_file(
-                            kb_id, 
-                            existing_file_id, 
-                            file_info['content'], 
-                            filename, 
-                            file_info['content_type']
-                        )
-                        update_count += 1
+                if existing_file_id and args.skip_existing:
+                    logger.info(f"Skipping existing file: {filename}")
+                    skip_count += 1
+                    continue
+                
+                logger.info(f"Uploading file: {filename}")
+                result = uploader.upload_file_from_path(
+                    kb_id, 
+                    file_path, 
+                    filename, 
+                    file_info['content_type']
+                )
+                if isinstance(result, dict) and result.get('status') in ['skipped', 'error']:
+                    if result.get('status') == 'skipped':
+                        empty_skip_count += 1
                     else:
-                        # Default behavior: add as new file
-                        logger.info(f"Adding duplicate file (existing file will remain): {filename}")
-                        uploader.upload_file(
-                            kb_id, 
-                            file_info['content'], 
-                            filename, 
-                            file_info['content_type']
-                        )
-                        success_count += 1
+                        error_count += 1
+                    logger.warning(f"Failed to upload {filename}: {result.get('reason')}")
                 else:
-                    # New file
-                    logger.info(f"Uploading new file: {filename}")
-                    uploader.upload_file(
-                        kb_id, 
-                        file_info['content'], 
-                        filename, 
-                        file_info['content_type']
-                    )
                     success_count += 1
                 
-                # Add a small delay between uploads
                 time.sleep(0.5)
+                
             except Exception as e:
                 logger.error(f"Failed to process {file_info['filename']}: {e}")
                 error_count += 1
         
-        logger.info(f"Upload complete: {success_count} files uploaded, {update_count} files updated, {skip_count} files skipped, {error_count} errors")
+        logger.info(f"Upload complete: {success_count} files uploaded, {update_count} files updated, {skip_count} files skipped, {empty_skip_count} empty/invalid files skipped, {error_count} errors")
         
         return 0
     
@@ -467,11 +701,20 @@ def main():
         logger.error(f"An unexpected error occurred: {e}")
         return 1
     finally:
-        # Ensure all resources are properly closed
         if scraper:
             scraper.close()
         if uploader:
             uploader.close()
+        
+        if temp_dir and temp_dir.exists():
+            if args.keep_temp_files:
+                logger.info(f"Keeping temporary files in: {temp_dir}")
+            else:
+                try:
+                    shutil.rmtree(temp_dir)
+                    logger.info("Cleaned up temporary directory")
+                except Exception as e:
+                    logger.warning(f"Failed to clean up temporary directory {temp_dir}: {e}")
 
 
 if __name__ == "__main__":
diff --git a/requirements.txt b/requirements.txt
index 6afde0a..d97b41d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 requests
 beautifulsoup4
-markitdown[all]
\ No newline at end of file
+markitdown[all]
+selenium
+webdriver-manager
\ No newline at end of file