1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.4 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import subprocess
  81import shutil
  82from pathlib import Path
  83
  84# Regular expressions
  85import re
  86
  87# FTP stuff
  88import ftplib
  89
  90# Date and time
  91import time
  92import stat
  93import datetime
  94
  95# Logging
  96import logging
  97
  98# Unit testing
  99import unittest
 100
 101# Enumerated types (v3.4)
 102from enum import Enum
 103from typing import List, Any
 104
 105# YAML configuration files (a superset of JSON!)
 106import yaml 
 107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 108try:
 109    from yaml import CLoader as Loader
 110except ImportError:
 111    from yaml import Loader
 112
 113# Python syntax highlighter.  See https://pygments.org
 114from pygments import highlight
 115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 117from pygments.formatters import HtmlFormatter
 118
 119
 120# ----------------------------------------------------------------------------
 121#  Custom Top Level Exceptions.
 122# ----------------------------------------------------------------------------
 123
 124class UpdateWebException(Exception):
 125    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 126       Derive from Exception as recommended by Python manual"""
 127    pass
 128
 129# ----------------------------------------------------------------------------
 130#  User settings.
 131# ----------------------------------------------------------------------------
 132
 133class TreeWalkSettings(Enum):
 134    """Enum types for how to walk the directory tree."""
 135    BREADTH_FIRST_SEARCH = 1
 136    DEPTH_FIRST_SEARCH = 2
 137
 138class FileType(Enum):
 139    """'Enum' types for properties of directories and files."""
 140    DIRECTORY = 0
 141    FILE = 1
 142    ON_LOCAL_ONLY = 2
 143    ON_REMOTE_ONLY = 3
 144    ON_BOTH_LOCAL_AND_REMOTE = 4
 145
 146class UserSettings:
 147    """Megatons of user selectable settings."""
 148    # Logging control.
 149    LOGFILENAME = ""
 150    VERBOSE = False  # Verbose mode.  Prints out everything.
 151    CLEAN = False  # Clean the local website only.
 152    UNITTEST = False  # Run a unit test of a function.
 153    MATHJAX = False  # Process and upload MathJax files to server.
 154
 155    # When diving into the MathJax directory, web walking the deep directories
 156    # may exceed Python's default recursion limit of 1000.
 157    RECURSION_DEPTH = 5000
 158    sys.setrecursionlimit(RECURSION_DEPTH)
 159
 160    # Fields in the file information (file_info) structure.
 161    # For example, file_info = 
 162    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 163    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 164    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 165    #      4675]                                        -- File size in bytes.
 166    FILE_NAME = 0
 167    FILE_TYPE = 1
 168    FILE_DATE_TIME = 2
 169    FILE_SIZE = 3
 170
 171    # Server settings.
 172    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 173    SERVER_NAME = None
 174    USER_NAME = None
 175    PASSWORD_NAME = None
 176    FTP_ROOT_NAME = None
 177    FILE_SIZE_LIMIT_NAME = None
 178
 179    # Map month names onto numbers.
 180    monthToNumber = {
 181        'Jan': 1,
 182        'Feb': 2,
 183        'Mar': 3,
 184        'Apr': 4,
 185        'May': 5,
 186        'Jun': 6,
 187        'Jul': 7,
 188        'Aug': 8,
 189        'Sep': 9,
 190        'Oct': 10,
 191        'Nov': 11,
 192        'Dec': 12}
 193
 194    # List of directories to skip over when processing or uploading the web page.
 195    # Some are private but most are dir of temporary files.
 196    # They will be listed as WARNING in the log.
 197    # Examples:
 198    #     My private admin settings directory.
 199    #     Git or SVN local admin directories.
 200    #     Compile build directories fromXCode.
 201    #     PyCharm build directories.
 202    #     Python cache directories.
 203    #     Jupyter checkpoint directories.
 204    #     XCode temporary file crap.
 205    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 206
 207    # List of files to skip when processing or uploading to the web page.
 208    # They will be listed as WARNING in the log.
 209    # Examples:
 210    #     MathJax yml file.
 211    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 212    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 213
 214    # Suffixes for temporary files which will be deleted during the cleanup
 215    # phase.
 216    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 217        \.                           # Match the dot in the file name.
 218                                     # Now begin matching the file name suffix.
 219                                     # (?: non-capturing match for the regex inside the parentheses,
 220                                     #   i.e. matching string cannot be retrieved later.
 221                                     # Now match any of the following file extensions:
 222        (?: o   | obj | lib |        #     Object files generated by C, C++, etc compilers
 223                              pyc |  #     Object file generated by the Python compiler
 224                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 225            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 226            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 227            res | aps | dep | db  |  #     Temp files from VC++ compiler
 228                              jbf |  #     Paintshop Pro
 229                      class | jar |  #     Java compiler
 230                              fas |  #     CLISP compiler
 231                        swp | swo |  #     Vim editor
 232                        toc | aux |  #     TeX auxilliary files (not .synctex.gz or .log)
 233          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 234                       _\.Trashes |  #     macOS recycle bin
 235        gdb_history)                 #     GDB history
 236        $                            #     Now we should see only the end of line.
 237        """
 238
 239    # Special case:  Vim temporary files contain a twiddle anywhere in the
 240    # name.
 241    VIM_TEMP_FILE_EXT = "~"
 242
 243    # Suffixes for temporary directories which should be deleted during the
 244    # cleanup phase.
 245    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 246        (?: Debug | Release |        # C++ compiler
 247           ipch   | \.vs    |        # Temp directories from VC++ compiler
 248        \.Trashes | \.Trash)         # macOS recycle bin
 249        $
 250        """
 251
 252    # File extension for an internally created temporary file.
 253    TEMP_FILE_EXT = ".new"
 254
 255    # Identify source file types.
 256    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 257        (\.                        # Match the filename suffix after the .
 258            (?: html | htm |       # HTML hypertext
 259                css)               # CSS style sheet
 260        $)                         # End of line.
 261    """
 262
 263    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 264        (?: makefile$ |             # Any file called makefile is a source file.
 265                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 266          .vimrc$ |                 # Vim script
 267          (.bashrc$ |               # Bash configuration files.
 268           .bash_profile$ |
 269           .bash_logout$) 
 270          |
 271          (.gitignore$ |             # Git configuration files.
 272           .gitignore_global$ | 
 273           .gitconfig$)
 274          |
 275          (\.                       # Match the filename suffix after the .
 276                                    # Now match any of these suffixes:
 277             (?: 
 278                  c | cpp | h | hpp |   #     C++ and C
 279                  js |                  #     JavaScript
 280                  py |                  #     Python
 281                  lsp |                 #     LISP
 282                  ipynb |               #     Jupyter notebook
 283                  m  |                  #     MATLAB
 284                  FOR | for | f |       #     FORTRAN
 285                  yaml |                #     YAML = JSON superset
 286                  tex |                 #     LaTeX
 287                  txt | dat |           #     Data files
 288                  sh)                   #     Bash
 289             $)                         # End of line.
 290         )
 291         """
 292
 293    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 294    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 295        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 296            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 297            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 298            ^StyleSheet\.css$ )     # I want to list my style sheet.
 299        """
 300
 301    # Files for which we want to generate a syntax highlighted source code listing.
 302    # Uses an f-string combined with a raw-string.
 303    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 304        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 305            {SOURCE_FILE_PATTERN} )
 306        """
 307
 308    # Update my email address.
 309    # This is tricky:  Prevent matching and updating the name within in this
 310    # Python source file by using the character class brackets.
 311    OLD_EMAIL_ADDRESS = r"""
 312        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 313        """
 314    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 315
 316    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 317    # Read patterns and strings from the updateweb.yaml file.
 318    STRING_REPLACEMENT_LIST = []
 319    # Pairs of test strings and their correct match/replacements.
 320    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 321
 322    # Match a copyright line like this:
 323    #     Copyright (C) 1999-2025 by Sean Erik O&#39;Connor.  All Rights Reserved.
 324    # Extract the copyright symbol which can be ascii (C) or HTML &copy; and extract the old year.
 325    TWO_DIGIT_YEAR_FORMAT = "%02d"
 326    COPYRIGHT_LINE = r"""
 327        Copyright                       # Copyright.
 328        \s+                             # One or more spaces.
 329        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 330        \D+                             # Any non-digits.
 331        (?P<old_year>[0-9]+)            # Match and extract the old copyright year, place it into variable 'old_year'
 332        -                               # hyphen
 333        ([0-9]+)                        # New copyright year.
 334        \s+                             # One or more spaces.
 335        by\s+Sean\sErik                 # Start of my name.  This way we don't rewrite somebody else's copyright notice.
 336        """
 337
 338    # Match a line containing the words,
 339    #    last updated YY
 340    # and extract the two digit year YY.
 341    LAST_UPDATED_LINE = r"""
 342        last\s+         # Match the words "last updated"
 343        updated\s+
 344        \d+             # Day number
 345        \s+             # One or more blanks or tab(
 346        [A-Za-z]+       # Month
 347        \s+             # One or more blanks or tabs
 348        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 349        """
 350
 351    # Web server root directory.
 352    DEFAULT_ROOT_DIR = "/"
 353
 354    # The ftp listing occasionally shows a date newer than the actual date. 
 355    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 356    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 357    # Upload the file to be safe.
 358    # How to see the time differences from the log if they are large:
 359    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 360    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 361    # How to see the time differences from the log if they are small and we wait and NOT upload:
 362    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 363    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 364    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 365    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 366
 367    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 368    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 369
 370    # An ftp list command line should be at least this many chars, or we'll
 371    # suspect and error.
 372    MIN_FTP_LINE_LENGTH = 7
 373
 374    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 375    # ftp listings are generally similar to UNIX ls -l listings.
 376    #
 377    # Some examples:
 378    #
 379    # (1) Freeservers ftp listing,
 380    #
 381    #          0        1   2                3           4    5   6   7      8
 382    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 383    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 384    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 385    #
 386    # (2) atspace ftp listing,
 387    #
 388    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 389    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 390    #
 391    FTP_LISTING = r"""
 392        [drwx-]+            # Unix type file mode.
 393        \s+                 # One or more blanks or tabs.
 394        \d+                 # Number of links.
 395        \s+
 396        \w+                 # Owner.
 397        \s+
 398        \w+                 # Group.
 399        \s+
 400        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 401        \s+
 402        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 403        \s+
 404        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 405        \s+
 406        (
 407            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 408            :
 409            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 410            |
 411            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 412                            # extract the year instead.
 413        )
 414        \s+
 415        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 416                                                    # and funny characters.  We must escape some of
 417                                                    # these characters with a backslash, \.
 418        """
 419
 420    # HTML header up to the style sheet.
 421    BASIC_HTML_BEGIN = \
 422        """
 423        <!DOCTYPE html>
 424        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 425        
 426        <head>
 427            <!-- This page uses Unicode characters. -->
 428            <meta charset="utf-8">
 429        
 430            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 431            <meta name="viewport" content="width=device-width, initial-scale=1">
 432        
 433            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 434            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 435        
 436            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 437            <meta name="description" content="Syntax Colored Source Code Listing">
 438        
 439            <!-- Some content management software uses the author's name. -->
 440            <meta name="author" content="Sean Erik O'Connor">
 441        
 442            <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor.  All Rights Reserved.">   
 443        
 444            <!-- Begin style sheet insertion -->
 445            <style>
 446                /* Default settings for all my main web pages. */
 447                body
 448                {
 449                    /* A wide sans-serif font is more readable on the web. */
 450                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 451        
 452                    /* Set the body font size a little smaller than the user's default browser setting. */
 453                    font-size:              0.8em ; 
 454        
 455                    /* Black text is easier to read. */
 456                    color:                  black ;
 457        
 458                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 459                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 460                    line-height:            1.7 ;
 461                }
 462        
 463                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 464        """
 465
 466    # After the style sheet and up to the start of the article in the body.
 467    BASIC_HTML_MIDDLE = \
 468        """
 469            </style>
 470        </head>
 471        
 472        <body>
 473            <article class="content">
 474        """
 475
 476    # After the source code listing, finish the article, body and html document.
 477    BASIC_HTML_END = \
 478        """
 479            </article>
 480        </body>
 481        
 482        </html>
 483        """
 484
 485    def __init__(self):
 486        """Set up the user settings."""
 487
 488        self.local_root_dir = ""
 489
 490        # Import the user settings from the parameter file.
 491        self.get_local_root_dir()
 492        self.get_server_settings()
 493
 494        self.precompile_regular_expressions()
 495
 496    def get_server_settings(self):
 497        """
 498        Read web account private settings from a secret offline parameter file.
 499        These also hold patterns to match and replace in all of our source pages.
 500        """
 501
 502        # Private file which contains my account settings.
 503        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 504        # Recommended by
 505        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 506        try:
 507            stream = open(settings_file_name, "r")
 508        except OSError as detail:
 509            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 510            # Rethrow the exception higher.
 511            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 512        # Read all the YAML documents in the file.
 513        yaml_contents = yaml.load_all(stream, Loader)
 514        yaml_document_list: list[Any] = []
 515        for yaml_doc in yaml_contents:
 516            yaml_document_list.append(yaml_doc)
 517        num_yaml_docs = len(yaml_document_list)
 518        if num_yaml_docs != 2:
 519            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 520            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 521
 522        # Load all the server settings.
 523        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 524        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 525        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 526        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 527        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 528
 529        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 530        self.STRING_REPLACEMENT_LIST = []
 531        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 532        for pat_rep in pat_rep_yaml_list:
 533            # Fetch the regular expression and compile it for speed.
 534            verbose_regex = pat_rep['pattern']
 535            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 536            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 537            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 538            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 539
 540        # Load the test and verify strings.
 541        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 542        for test_verify_string in test_verify_strings_list:
 543            test_string = test_verify_string['test_string'].strip().lstrip()
 544            verify_string = test_verify_string['verify_string'].strip().lstrip()
 545            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 546
 547        print("  ...done!", flush=True)
 548        return
 549
 550    def get_local_root_dir(self):
 551        """Get the local website root directory on this platform."""
 552
 553        # Each platform has a definite directory for the web page.
 554        local_web_dir_path = "/Desktop/Sean/WebSite"
 555
 556        if sys.platform.startswith('darwin'):
 557            self.local_root_dir = str(Path.home()) + local_web_dir_path
 558        # My Cyperpower PC running Ubuntu Linux.
 559        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 560            self.local_root_dir = str(Path.home()) + local_web_dir_path
 561        return
 562
 563    def precompile_regular_expressions(self):
 564        """For speed precompile the regular expression search patterns."""
 565        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 566        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 567        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 568        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 569        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 570        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 571        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 572        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 573        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 574
 575# ----------------------------------------------------------------------------
 576#  Unit test individual functions.
 577# ----------------------------------------------------------------------------
 578
 579class UnitTest(unittest.TestCase):
 580    """Initialize the UnitTest class."""
 581    def setUp(self):
 582        self.user_settings = UserSettings()
 583        self.user_settings.get_local_root_dir()
 584
 585    def tearDown(self):
 586        """Clean up the UnitTest class."""
 587        self.user_settings = None
 588
 589    def test_copyright_updating(self):
 590        """Test copyright line updating to the current year."""
 591        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 592        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 593        line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 594        pat = self.user_settings.COPYRIGHT_LINE
 595        match = pat.search(line_before_update)
 596
 597        if match:
 598            old_year = int(match.group('old_year'))
 599            # Same as call to self.get_current_year():
 600            current_year = int(time.gmtime()[0])
 601            if old_year < current_year:
 602                # We matched and extracted the old copyright symbol into the variable
 603                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 604                # We now insert it back by placing the special syntax
 605                # \g<symbol> into the replacement string.
 606                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
 607                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 608                self.assertEqual(
 609                    line_after_update_actual,
 610                    line_after_update_computed,
 611                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 612            else:
 613                print( "old_year >= current_year" )
 614                self.fail()
 615        else:
 616            print( "no match for copyright pattern" )
 617            self.fail()
 618
 619    def test_extract_filename_from_ftp_listing(self):
 620        """Test parsing an FTP listing."""
 621        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 622        extracted_file_name = "allclasses-frame.html"
 623        pat = self.user_settings.FTP_LISTING
 624        match = pat.search(ftp_line)
 625        if match:
 626            filename = match.group('filename')
 627            self.assertEqual(
 628                filename,
 629                extracted_file_name,
 630                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 631        else:
 632            self.fail()
 633
 634    def test_get_file_time_and_date(self):
 635        """Test getting a file time and date."""
 636        # Point to an old file.
 637        file_name = "./Images/home.png"
 638        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 639        # Get the UTC time.
 640        file_epoch_time = os.path.getmtime(full_file_name)
 641        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 642        # Create a datetime object for the file.
 643        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 644        # Check if the file time matches what we would see if we did ls -l <file_name> and then converted to UTC.
 645        computed = f"file {file_name:s} datetime {d.ctime():s}"
 646        actual = "file ./Images/home.png datetime Tue Jul  1 03:53:16 2025"
 647        self.assertEqual(computed, actual)
 648
 649    def test_set_file_time_and_date(self):
 650        """Test setting a file time and date."""
 651        file_name = "./Images/home.png"
 652        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 653        # Create a temporary file in the same directory.
 654        temp_file_name = "temporal.tmp"
 655        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 656        try:
 657            with open(full_temp_file_name, 'w') as fp:
 658                fp.write("The End of Eternity")
 659        except OSError as detail:
 660            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 661            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 662        # Get the old file time.  Set the temporary file to the same time.
 663        file_stat = os.stat(full_file_name)
 664        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 665        # What is the temporary file's time now?
 666        file_epoch_time = os.path.getmtime(full_temp_file_name)
 667        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 668        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 669        # Is the temporary file time set properly?
 670        computed = f"file {file_name:s} datetime {d.ctime():s}"
 671        actual = "file ./Images/home.png datetime Tue Jul  1 03:53:16 2025"
 672        self.assertEqual(computed, actual)
 673        os.remove(full_temp_file_name)
 674
 675    def test_difference_of_time_and_date(self):
 676        """Test a date difference calculation."""
 677        file_name = "./Images/home.png"
 678        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 679        # Get the UTC time.
 680        file_epoch_time = os.path.getmtime(full_file_name)
 681        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 682        # Create a datetime object for the file.
 683        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 684        # Slightly change the date and time by adding 1 minute.
 685        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 686        time_delta = d2 - d
 687        seconds_different = time_delta.total_seconds()
 688        minutes_different = seconds_different / 60.0
 689        hours_different = minutes_different / 60.0
 690        days_different = hours_different / 24.0
 691        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 692        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 693        self.assertEqual(computed, actual)
 694
 695    def test_pattern_match_dir_to_skip(self):
 696        """Test if skipping certain named directories is recoginizing the dir names."""
 697        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 698        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 699        if pat.search(dir_skip):
 700            self.assertTrue(True)
 701        else:
 702            self.assertTrue(False)
 703
 704    def test_file_name_to_syntax_highlight(self):
 705        """Test if syntax highlighting recognizes file names to highlight."""
 706        file_name1 = "Computer/hello.lsp"
 707        file_name2 = "Computer/life.html"
 708        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 709        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 710            self.assertTrue(True)
 711        else:
 712            self.assertTrue(False)
 713
 714    def test_user_settings(self):
 715        """Test whether user settings are correctly initialized."""
 716        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 717        actual = "File size limit = 50000 K"
 718        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 719
 720    def test_check_replace_substring(self,debug=True):
 721        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 722           For troubleshooting, turn on debug.
 723        """
 724        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 725        # Iterate over all test strings.
 726        for pair in test_verify_pairs:
 727            [test_string, verify_string] = pair
 728            if debug:
 729                print( f">>>>>>> next test string   = {test_string}")
 730                print( f">>>>>>> next verify string = {verify_string}")
 731            # Iterate over all patterns and replacements.
 732            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 733                [pat, rep_string] = match_replace_tuple
 734                print( f"\t-------> next pattern = {pat}") 
 735                print( f"\t-------> next replacement = {rep_string}") 
 736                match = pat.search(test_string)
 737                # The pattern match succeeds.
 738                if match:
 739                    try:
 740                        sub = pat.sub(rep_string, test_string)
 741                        # String replacement succeeds for this pattern/replace pair iteration.
 742                        if debug:
 743                            print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
 744                        test_string = sub
 745                    except IndexError as detail:
 746                        print(f"\t\t.......> Caught an exception: {str(detail):s}.  Replacement failed.")
 747                        if debug:
 748                            self.assertTrue(False)
 749                elif debug:
 750                    print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
 751                # No match, so go on to the next pattern and don't change test_string.
 752            # Done with all pattern/replace on test string.
 753            # Check this test string in the list.
 754            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 755            if debug:
 756                print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
 757
 758# ----------------------------------------------------------------------------
 759#  Command line options.
 760# ----------------------------------------------------------------------------
 761
 762class CommandLineSettings(object):
 763    """Get the command line options."""
 764
 765    def __init__(self, user_settings, raw_args=None):
 766        """Get command line options"""
 767        command_line_parser = argparse.ArgumentParser(
 768            description="updateweb options")
 769
 770        # Log all changes, not just warnings and errors.
 771        command_line_parser.add_argument(
 772            "-v",
 773            "--verbose",
 774            help="Turn on verbose mode to log everything",
 775            action="store_true")
 776
 777        # Clean up the local website only.
 778        command_line_parser.add_argument(
 779            "-c",
 780            "--clean",
 781            help="Do a cleanup on the local web site only.",
 782            action="store_true")
 783
 784        # Also upload MathJax.
 785        command_line_parser.add_argument(
 786            "-m",
 787            "--mathjax",
 788            help="""ALSO upload mathjax directory.\
 789            Do this if you have a new version of MathJax or if have not yet created the /mathjax remote directory on the server.\
 790            Recommend that you run the bash command:     find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 791            action="store_true")
 792
 793        # Run unit tests only.
 794        command_line_parser.add_argument("-t", "--test",
 795                                         help="Run unit tests.",
 796                                         action="store_true")
 797
 798        args = command_line_parser.parse_args(raw_args)
 799
 800        if args.verbose:
 801            user_settings.VERBOSE = True
 802        if args.clean:
 803            user_settings.CLEAN = True
 804        if args.test:
 805            user_settings.UNITTEST = True
 806        if args.mathjax:
 807            user_settings.MATHJAX = True
 808
 809# ----------------------------------------------------------------------------
 810#  Base class which describes my web site overall.
 811# ----------------------------------------------------------------------------
 812
 813class WebSite(object):
 814    """
 815    Abstract class used for analyzing both local and remote (ftp server) websites.
 816    Contains the web-walking functions which traverse the directory structures and files.
 817    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 818    Child classes may define additional functions which only they need.
 819    """
 820
 821    def __init__(self, settings):
 822        """Set up root directories"""
 823
 824        # Import the user settings.
 825        self.user_settings = settings
 826
 827        # Queue keeps track of directories not yet processed.
 828        self.queue = []
 829
 830        # List of all directories traversed.
 831        self.directories = []
 832
 833        # List of files traversed, with file information.
 834        self.files = []
 835
 836        # Find out the root directory and go there.
 837        self.root_dir = self.get_root_dir()
 838        self.go_to_root_dir(self.root_dir)
 839
 840    # This is a Python decorator which says get_current_year is a class function.  And so there is no self first argument, and you can call it without creating an 
 841    # instance of this class.  Call it from anywhere, inside or outside the class, using WebSite.get_current_year().  You could just create a global function instead.)
 842    @staticmethod
 843    def get_current_year():
 844        """Get the current year."""
 845        return int(time.gmtime()[0])
 846
 847    @staticmethod
 848    def get_current_two_digit_year():
 849        """Get the last two digits of the current year."""
 850        return WebSite.get_current_year() % 100
 851
 852    @staticmethod
 853    def is_file_info_type(file_info):
 854        """Check if we have a file information structure or merely a simple file name."""
 855        try:
 856            if isinstance(file_info, list):
 857                return True
 858            elif isinstance(file_info, str):
 859                return False
 860            else:
 861                logging.error("is_file_info_type found a bad type.  Aborting...")
 862                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 863        except TypeError as detail:
 864            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 865            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 866
 867    def get_root_dir(self):
 868        """Subclass:  Put code here to get the root directory"""
 869        return ""
 870
 871    def go_to_root_dir(self, root_dir):
 872        """Subclass:  Put code here to go to the root directory"""
 873        pass  # Pythons's do-nothing statement.
 874
 875    def one_level_down(self, d):
 876        """Subclass:  Fill in with a method which returns a list of the
 877        directories and files immediately beneath dir"""
 878        return [], []
 879
 880    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 881        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 882
 883        # Get all subfiles and subdirectories off this node.
 884        subdirectories, subfiles = self.one_level_down(d)
 885
 886        # Add all the subfiles in order.
 887        for f in subfiles:
 888
 889            name = self.strip_root(f)
 890            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 891
 892            # Some files are private so skip them from consideration.
 893            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 894
 895            if pat.search(name[self.user_settings.FILE_NAME]):
 896                logging.debug( f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 897            # Don't upload any *.log files either;  we are currently writing to this file.
 898            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 899                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 900            else:
 901                # OK to add this file to the list for possible uploading.
 902                self.files.append(name)
 903
 904        # Queue up the subdirectories.
 905        for d in subdirectories:
 906            # Some directories are private such as .git or just temporary file
 907            # caches so skip them from consideration.
 908            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 909            if pat.search(d):
 910                logging.debug(f"Webwalking:  Skipping private dir {d:s}")
 911            else:
 912                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 913                self.queue.append(d)
 914
 915        # Search through the directories.
 916        while len(self.queue) > 0:
 917            # For breadth first search, remove from beginning of queue.
 918            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 919                d = self.queue.pop(0)
 920
 921            # For depth first search, remove from end of queue.
 922            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 923                d = self.queue.pop()
 924            else:
 925                d = self.queue.pop(0)
 926
 927            name = self.strip_root(d)
 928            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 929            self.directories.append(name)
 930
 931            self.walk(d)
 932
 933    def strip_root(self, file_info):
 934        """Return a path, but strip off the root directory"""
 935
 936        root = self.root_dir
 937
 938        # Extract the file name.
 939        if self.is_file_info_type(file_info):
 940            name = file_info[self.user_settings.FILE_NAME]
 941        else:
 942            name = file_info
 943
 944        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 945        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 946        # Art/foo.txt
 947        lenroot = len(root)
 948        if root == self.user_settings.DEFAULT_ROOT_DIR:
 949            pass
 950        else:
 951            lenroot = lenroot + 1
 952
 953        stripped_path = name[lenroot:]
 954
 955        if self.is_file_info_type(file_info):
 956            # Update the file name only.
 957            return [stripped_path,
 958                    file_info[self.user_settings.FILE_TYPE],
 959                    file_info[self.user_settings.FILE_DATE_TIME],
 960                    file_info[self.user_settings.FILE_SIZE]]
 961        else:
 962            return stripped_path
 963
 964    def append_root_dir(self, root_dir, name):
 965        """Append the root directory to a path"""
 966
 967        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
 968        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
 969        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
 970            return root_dir + name
 971        else:
 972            return root_dir + "/" + name
 973
 974    def scan(self):
 975        """Scan the directory tree recursively from the root"""
 976        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
 977        self.walk(self.root_dir)
 978
 979    def modtime(self, f):
 980        """Subclass:  Get file modification time"""
 981        pass
 982
 983    def finish(self):
 984        """Quit web site"""
 985        logging.debug(f"Finished with WebSite object of class {type(self)}")
 986        pass
 987
 988# ----------------------------------------------------------------------------
 989#  Subclass which knows about the local web site on disk.
 990# ----------------------------------------------------------------------------
 991
 992class LocalWebSite(WebSite):
 993    """Walk the local web directory on local disk down from the root.
 994    Clean up temporary files and do other cleanup work."""
 995
 996    def __init__(self, settings):
 997        """Go to web page root and list all files and directories."""
 998
 999        # Initialize the parent class.
1000        WebSite.__init__(self, settings)
1001
1002        self.root_dir = self.get_root_dir()
1003        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1004
1005    def get_root_dir(self):
1006        """Get the name of the root directory"""
1007        return self.user_settings.local_root_dir
1008
1009    def go_to_root_dir(self, root_dir):
1010        """Go to the root directory"""
1011
1012        # Go to the root directory.
1013        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1014        os.chdir(root_dir)
1015
1016        # Read it back.
1017        self.root_dir = os.getcwd()
1018        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1019
1020    def one_level_down(self, d):
1021        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1022        such as time, date and size."""
1023
1024        directories = []
1025        files = []
1026
1027        # Change to current directory.
1028        os.chdir(d)
1029
1030        # List all subdirectories and files.
1031        dir_list = os.listdir(d)
1032
1033        if dir_list:
1034            for line in dir_list:
1035                # Add the full path prefix from the root.
1036                name = self.append_root_dir(d, line)
1037                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1038
1039                # Is it a directory or a file?
1040                if os.path.isdir(name):
1041                    directories.append(name)
1042                elif os.path.isfile(name):
1043                    # First assemble the file information of name, time/date and size into a list.
1044                    # Can index it like an array.  For example,
1045                    # file_info = 
1046                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1047                    #      1,                                           -- Enum type FileType.FILE = 1.
1048                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1049                    #      4675]                                        -- File size in bytes.
1050                    file_info = [name,
1051                                 FileType.FILE,
1052                                 self.get_file_date_time(name),
1053                                 self.get_file_size(name)]
1054                    files.append(file_info)
1055
1056        # Sort the names into order.
1057        if directories:
1058            directories.sort()
1059        if files:
1060            files.sort()
1061
1062        return directories, files
1063
1064    @staticmethod
1065    def get_file_date_time(file_name):
1066        """Get a local file time and date in UTC."""
1067
1068        file_epoch_time = os.path.getmtime(file_name)
1069        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1070        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1071        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1072        return d
1073
1074    @staticmethod
1075    def get_file_size(file_name):
1076        """Get file size in bytes."""
1077        return os.path.getsize(file_name)
1078
1079    @staticmethod
1080    def clean_up_temp_file(temp_file_name, file_name, changed):
1081        """Remove the original file, rename the temporary file name to the original name.
1082        If there are no changes, just remove the temporary file.
1083        """
1084
1085        if changed:
1086            # Remove the old file now that we have the rewritten file.
1087            try:
1088                os.remove(file_name)
1089                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1090            except OSError as detail:
1091                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1092
1093            # Rename the new file to the old file name.
1094            try:
1095                os.rename(temp_file_name, file_name)
1096                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1097            except OSError as detail:
1098                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1099        else:
1100            # No changes?  Remove the temporary file.
1101            try:
1102                os.remove(temp_file_name)
1103                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1104            except OSError as detail:
1105                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1106        return
1107
1108    @staticmethod
1109    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1110        """
1111        Process each line of a file with a list of functions.  Create a new temporary file.
1112
1113        The default list is None which means make an exact copy.
1114        """
1115
1116        # Assume no changes.
1117        changed = False
1118
1119        # Open both input and output files for processing.  Check if we cannot do it.
1120        fin = None
1121        try:
1122            fin = open(in_file_name, "r")
1123        except IOError as detail:
1124            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1125            if fin is not None:
1126                fin.close()
1127            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1128        fout = None
1129        try:
1130            fout = open(out_file_name, "w")
1131        except IOError as detail:
1132            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1133            if fout is not None:
1134                fout.close()
1135            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1136
1137        # Read each line of the file, aborting if there is a read error.
1138        try:
1139            line = fin.readline()
1140
1141            # Rewrite the next line of the file using all the rewrite functions.
1142            while line:
1143                original_line = line
1144                # If we have one or more rewrite functions...
1145                if process_line_function_list is not None:
1146                    # ...apply each rewrite functions to the line, one after the other in order.
1147                    for processLineFunction in process_line_function_list:
1148                        if processLineFunction is not None:
1149                            line = processLineFunction(line)
1150
1151                if original_line != line:
1152                    logging.debug(f"Rewrote the line:    >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1153                    changed = True
1154
1155                fout.write(line)
1156
1157                line = fin.readline()
1158
1159            fin.close()
1160            fout.close()
1161        except IOError as detail:
1162            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1163            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1164
1165        if changed:
1166            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1167                          f"Changes are in temporary copy {out_file_name:s}")
1168
1169        # Return True if any lines were changed.
1170        return changed
1171
1172    def clean(self):
1173        """Scan through all directories and files in the local on disk website and clean them up."""
1174
1175        num_source_files_changed = 0
1176        num_source_files_syntax_highlighted = 0
1177
1178        logging.debug("Cleaning up the local web page.")
1179
1180        if self.directories is None or self.files is None:
1181            logging.error("Web site has no directories or files.  Aborting...")
1182            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1183
1184        for d in self.directories:
1185
1186            if self.is_temp_dir(d):
1187                # Add the full path prefix from the root.
1188                name = self.append_root_dir(self.get_root_dir(), d)
1189                try:
1190                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1191                    shutil.rmtree(name)
1192                except OSError as detail:
1193                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1194
1195        for f in self.files:
1196            # Add the full path prefix from the root.
1197            full_file_name = self.append_root_dir(
1198                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1199
1200            # Remove all temporary files.
1201            if self.is_temp_file(f):
1202                try:
1203                    logging.debug(f"Removing temp file {full_file_name:s}")
1204                    os.remove(full_file_name)
1205                except OSError as detail:
1206                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1207
1208            # Update source code files.
1209            if self.is_source_or_hypertext_file(f):
1210                changed = self.rewrite_source_file(full_file_name)
1211                if changed:
1212                    num_source_files_changed += 1
1213                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1214
1215            # Generate a  syntax highlighted code listing.  
1216            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1217            if self.is_file_to_syntax_highlight(f):
1218                # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1219                syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1220                if syntax_highlighted_file_name is not None:
1221                    logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1222                else:
1223                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1224                num_source_files_syntax_highlighted += 1
1225
1226        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1227        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1228
1229    def is_temp_file(self, file_info):
1230        """Identify a file name as a temporary file"""
1231
1232        file_name = file_info[self.user_settings.FILE_NAME]
1233
1234        # Suffixes and names for temporary files be deleted.
1235        pat = self.user_settings.TEMP_FILE_SUFFIXES
1236        match = pat.search(file_name)
1237        # Remove any files containing twiddles anywhere in the name.
1238        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1239            return True
1240
1241        return False
1242
1243    def is_temp_dir(self, dir_name):
1244        """Identify a name as a temporary directory."""
1245
1246        p = self.user_settings.TEMP_DIR_SUFFIX
1247        return p.search(dir_name)
1248
1249    def is_source_or_hypertext_file(self, file_info):
1250        """ Check if the file name is a source file or a hypertext file."""
1251
1252        file_name = file_info[self.user_settings.FILE_NAME]
1253        p1 = self.user_settings.SOURCE_FILE_PATTERN
1254        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1255        if p1.search(file_name) or p2.search(file_name):
1256            return True
1257        else:
1258            return False
1259
1260    def is_file_to_syntax_highlight(self, file_info):
1261        """Check if this file type should have a syntax highlighted source listing."""
1262
1263        # Take apart the file name.
1264        full_file_name = file_info[self.user_settings.FILE_NAME]
1265        file_name = Path(full_file_name).name
1266
1267        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1268        if p.search(file_name):
1269            return True
1270        else:
1271            return False
1272
1273    def rewrite_substring(self, line):
1274        """Rewrite a line containing a pattern of your choice"""
1275
1276        # Start with the original unchanged line.
1277        rewritten_line = line
1278
1279        # Do the replacements in order from first to last.
1280        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1281            # Get the next pattern match replacement string tuple.
1282            [pat, rep_string] = match_replace_tuple
1283            # Does it match?  Then do string substitution, else leave the line unchanged.
1284            match = pat.search(rewritten_line)
1285            if match:
1286                # Now we have these cases:
1287                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1288                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1289                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1290                try:
1291                    sub = pat.sub(rep_string, rewritten_line)
1292                    rewritten_line = sub
1293                except IndexError as detail:
1294                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1295 
1296        return rewritten_line
1297
1298    def rewrite_email_address_line(self, line):
1299        """Rewrite lines containing old email addresses."""
1300
1301        # Search for the old email address.
1302        pat = self.user_settings.OLD_EMAIL_ADDRESS
1303        match = pat.search(line)
1304
1305        # Replace the old address with my new email address.
1306        if match:
1307            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1308            sub = pat.sub(new_address, line)
1309            line = sub
1310
1311        return line
1312
1313    def rewrite_copyright_line(self, line):
1314        """Rewrite copyright lines if they are out of date."""
1315
1316        # Match the lines,
1317        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1318        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1319        # and pull out the old year and save it.
1320        pat = self.user_settings.COPYRIGHT_LINE
1321        match = pat.search(line)
1322
1323        # Found a match.
1324        if match:
1325            old_year = int(match.group('old_year'))
1326
1327            # Replace the old year with the current year.
1328            # We matched and extracted the old copyright symbol into the variable
1329            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1330            # We now insert it back by placing the special syntax \g<symbol>
1331            # into the replacement string.
1332            if old_year < WebSite.get_current_year():
1333                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1334                sub = pat.sub(new_copyright, line)
1335                line = sub
1336        return line
1337
1338    def rewrite_last_update_line(self, line):
1339        """Rewrite the Last Updated line if the year is out of date."""
1340
1341        # Match the last updated line and pull out the year.
1342        #      last updated 01 Jan 25.
1343        p = self.user_settings.LAST_UPDATED_LINE
1344        m = p.search(line)
1345
1346        if m:
1347            last_update_year = int(m.group('year'))
1348
1349            # Convert to four digit years.
1350            if last_update_year > 90:
1351                last_update_year += 1900
1352            else:
1353                last_update_year += 2000
1354
1355            # If the year is old, rewrite to "01 Jan <current year>".
1356            if last_update_year < WebSite.get_current_year():
1357                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1358                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1359                line = sub
1360
1361        return line
1362
1363    def rewrite_source_file(self, file_name):
1364        """Rewrite copyright lines, last updated lines, etc."""
1365        changed = False
1366
1367        # Create a new temporary file name for the rewritten file.
1368        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1369
1370        # Apply changes to all lines of the temporary file.  Apply change functions in
1371        # the sequence listed.
1372        if self.process_lines_of_file(file_name, temp_file_name,
1373                                      [self.rewrite_copyright_line,
1374                                       self.rewrite_last_update_line,
1375                                       self.rewrite_email_address_line,
1376                                       self.rewrite_substring]):
1377            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1378            changed = True
1379
1380        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1381        self.clean_up_temp_file(temp_file_name, file_name, changed)
1382
1383        return changed
1384
1385    @staticmethod
1386    def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1387        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1388        Keep the same date/time as the original file."""
1389
1390        # kwargs is a dictionary for key, value in kwargs.items():
1391        # for key, value in kwargs.items():
1392        #    if key in kwargs:
1393        #        print( f"kwargs:" )
1394        #        print( f"  key   = |{key}|")
1395        #        print( f"  value = |{value}|" )
1396        dry_run_value = kwargs.get('dry_run') 
1397        dry_run = False
1398        if dry_run_value is not None and dry_run_value is True:
1399            dry_run = True
1400
1401        # Take apart the file name.
1402        file_name_without_extension = Path(source_file_name).stem
1403        file_extension = Path(source_file_name).suffix
1404
1405        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1406        syntax_highlighted_file_name = f"{source_file_name}.html"
1407
1408        # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1409        if file_extension == ".ipynb":
1410            if dry_run:
1411                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1412                return None
1413            # Python manual recommends using the run() command instead of Popen().  See https://docs.python.org/3/library/subprocess.html#subprocess.run
1414            try:
1415                shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1416                # Throw an exception if we can't run the process.  
1417                # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1418                # Since the shell command is a single string, use shell=True in the run() command.
1419                subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1420            except subprocess.CalledProcessError as detail: 
1421                logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s}  Aborting...")
1422                return None
1423        # Otherwise, use the Pygments syntax highlighter.
1424        else:
1425            # First choose the language lexer from the file name itself if there's no extension.
1426            # Dotted file names are treated as the entire file name.
1427            match file_name_without_extension:
1428                case "makefile":
1429                    lexer = MakefileLexer()
1430                case ".bash_profile"|".bashrc"|".bash_logout":
1431                    lexer = BashLexer()
1432                case ".vimrc":
1433                    lexer = VimLexer()
1434                case ".gitignore_global" | ".gitignore" | ".gitconfig":
1435                    lexer = OutputLexer() # No formatting.
1436                case _:
1437                    # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1438                    match file_extension:
1439                        case ".html":
1440                            lexer = HtmlLexer()
1441                        case ".css":
1442                            lexer = CssLexer()
1443                        case ".js":
1444                            lexer = JavascriptLexer()
1445                        case ".sh":
1446                            lexer = BashLexer()
1447                        case ".py":
1448                            lexer = PythonLexer()
1449                        case ".c" | ".h":
1450                            lexer = CLexer()
1451                        case ".hpp" | ".cpp":
1452                            lexer = CppLexer()
1453                        case ".lsp":
1454                            lexer = CommonLispLexer()
1455                        case ".for" | ".FOR" | ".f":
1456                            lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1457                        case ".txt" | ".dat":            # Generic data file;  no formatting.
1458                            lexer = OutputLexer()
1459                        case ".tex":
1460                            lexer = TexLexer()           # LaTeX, TeX, or related files.
1461                        case ".m":
1462                            lexer = MatlabLexer()
1463                        case ".yaml":
1464                            lexer = YamlLexer()
1465                        case _:
1466                            logging.error(f"Can't find a lexer for file {source_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1467                            return None
1468
1469            # Read the source code file into a single string.
1470            try:
1471                with open(source_file_name, 'r') as fp:
1472                    source_file_string = fp.read()
1473            except OSError as detail:
1474                logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1475
1476            # Top level Pygments function generates the HTML for the highlighted code.
1477            highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1478
1479            # The style sheet is always the same for all languages.
1480            style_sheet = HtmlFormatter().get_style_defs('.highlight')
1481
1482            # Write out the syntax colored file.
1483            if dry_run:
1484                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1485                return None
1486            else:
1487                try:
1488                    # Write out the highlighted code listing in HTML with CSS style sheet attached.
1489                    with open(syntax_highlighted_file_name, 'w') as fp:
1490                        fp.write(UserSettings.BASIC_HTML_BEGIN)
1491                        fp.write(style_sheet)
1492                        fp.write(UserSettings.BASIC_HTML_MIDDLE)
1493                        fp.write(highlighted_html_source_file_string)
1494                        fp.write(UserSettings.BASIC_HTML_END)
1495                except OSError as detail:
1496                    logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s}  Aborting...")
1497        # ------- end Pygments syntax highlighter
1498
1499        # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1500        file_stat = os.stat(source_file_name)
1501        os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1502
1503        # Are the original source and the syntax highlighted code the same data and time?
1504        dates_and_times_source_file_name             = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1505        dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1506        if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1507            logging.error(f"Source code and syntax highlighted source don't have the same times.  source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1508            return None
1509
1510        logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1511        return syntax_highlighted_file_name
1512
1513# ----------------------------------------------------------------------------
1514#   Subclass which knows about the remote web site.
1515# ----------------------------------------------------------------------------
1516
1517class RemoteWebSite(WebSite):
1518    """Walk the remote web directory on a web server down from the root.
1519       Use FTP commands:
1520           https://en.wikipedia.org/wiki/List_of_FTP_commands
1521       Use the Python ftp library:
1522           https://docs.python.org/3/library/ftplib.html
1523    """
1524
1525    def __init__(self, user_settings):
1526        """Connect to FTP server and list all files and directories."""
1527
1528        # Root directory of FTP server.
1529        self.root_dir = user_settings.FTP_ROOT_NAME
1530        logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1531
1532        # Connect to FTP server and log in.
1533        try:
1534            # Turn on for troubleshooting ftp on the remote server.
1535            # self.ftp.set_debuglevel( 2 )
1536            # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password =  {user_settings.PASSWORD_NAME}\n")
1537            self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1538            self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1539        # Catch all exceptions with the parent class Exception:  all built-in,
1540        # non-system-exiting exceptions are derived from this class.
1541        except Exception as detail:
1542            # Extract the string message from the exception class with str().
1543            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1544            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1545        else:
1546            logging.debug("Remote web site ftp login succeeded.")
1547
1548        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1549
1550        # Initialize the superclass.
1551        WebSite.__init__(self, user_settings)
1552
1553    def go_to_root_dir(self, root_dir):
1554        """Go to the root directory"""
1555
1556        try:
1557            # Go to the root directory.
1558            self.ftp.cwd(root_dir)
1559            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1560
1561            # Read it back.
1562            self.root_dir = self.ftp.pwd()
1563            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1564
1565        except Exception as detail:
1566            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1567            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1568
1569    def get_root_dir(self):
1570        """Get the root directory name"""
1571
1572        return self.root_dir
1573
1574    def finish(self):
1575        """Quit remote web site"""
1576        logging.debug(f"Finished with WebSite object of class {type(self)}")
1577        try:
1578            self.ftp.quit()
1579        except Exception as detail:
1580            logging.error(f"Cannot ftp quit: {str(detail):s}")
1581
1582    def one_level_down(self, d):
1583        """List files and directories in a subdirectory using ftp"""
1584
1585        directories = []
1586        files = []
1587
1588        try:
1589            # ftp listing from current dir.
1590            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1591            self.ftp.cwd(d)
1592            dir_list = []
1593
1594            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1595            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1596            # Note the second argument requires a callback function.
1597            self.ftp.retrlines('LIST -a', dir_list.append)
1598
1599        except Exception as detail:
1600            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1601            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1602
1603        for line in dir_list:
1604            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1605
1606            # Line should at least have the minimum FTP information.
1607            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1608                # Parse the FTP LIST and put the pieces into file_info.
1609                file_info = self.parse_ftp_list(line)
1610                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1611
1612                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1613                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1614                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1615                    pass
1616                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1617                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1618                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1619                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1620                    directories.append(dirname)
1621                # For a file:  Add the full path prefix from the root to the file name.
1622                else:
1623                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1624                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1625                        {file_info[self.user_settings.FILE_NAME]:s}")
1626                    files.append(file_info)
1627            else:
1628                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1629
1630        directories.sort()
1631        files.sort()
1632
1633        return directories, files
1634
1635    def modtime(self, f):
1636        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1637        modtime = 0
1638
1639        try:
1640            response = self.ftp.sendcmd('MDTM ' + f)
1641            # MDTM returns the last modified time of the file in the format
1642            # "213 YYYYMMDDhhmmss \r\n <error-response>
1643            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1644            # error-response is 550 for info not available, and 500 or 501 if command cannot
1645            # be parsed.
1646            if response[:3] == '213':
1647                modtime = response[4:]
1648        except ftplib.error_perm as detail:
1649            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1650            modtime = 0
1651
1652        return modtime
1653
1654    def parse_ftp_list(self, line):
1655        """Parse the ftp file listing and return file name, datetime and file size.
1656
1657           An FTP LIST command will give output which looks like this for a file:
1658
1659               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1660
1661           and for a directory:
1662
1663                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1664
1665           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1666           We can have problems on New Year's Eve.  For example, the local file date/time is
1667
1668              Mon Jan  1 06:23:12 2018
1669
1670           But the remote file date/time from FTP listing doesn't show a year even though we
1671           know it was written to the server in 2017.
1672
1673               Mon Dec 31 03:02:00
1674
1675           So we default the remote file year to current year 2018 and get
1676
1677               Mon Dec 31 03:02:00 2018
1678
1679           Now we think that the remote file is newer by 363.860278 days.
1680        """
1681
1682        # Find out if we've a directory or a file.
1683        if line[0] == 'd':
1684            dir_or_file = FileType.DIRECTORY
1685        else:
1686            dir_or_file = FileType.FILE
1687
1688        pattern = self.user_settings.FTP_LISTING
1689
1690        # Sensible defaults.
1691        filesize = 0
1692        filename = ""
1693        # Default the time to midnight.
1694        hour = 0
1695        minute = 0
1696        seconds = 0
1697        # Default the date to Jan 1 of the current year.
1698        month = 1
1699        day = 1
1700        year = WebSite.get_current_year()
1701
1702        # Extract time and date from the ftp listing.
1703        match = pattern.search(line)
1704
1705        if match:
1706            filesize = int(match.group('bytes'))
1707            month = self.user_settings.monthToNumber[match.group('mon')]
1708            day = int(match.group('day'))
1709
1710            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1711            if match.group('year'):
1712                year = int(match.group('year'))
1713                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1714            else:
1715                # Remote file listing omits the year.  Default the year to the current UTC time year.
1716                # That may be incorrect (see comments above).
1717                year = WebSite.get_current_year()
1718                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1719
1720            # If the FTP listing has the hour and minute, it will omit the year.
1721            if match.group('hour') and match.group('min'):
1722                hour = int(match.group('hour'))
1723                minute = int(match.group('min'))
1724                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1725
1726            filename = match.group('filename')
1727
1728        # Package up the time and date nicely.
1729        # Note if we didn't get any matches, we'll default the remote date and
1730        # time to Jan 1 midnight of the current year.
1731        d = datetime.datetime(year, month, day, hour, minute, seconds)
1732
1733        return [filename, dir_or_file, d, filesize]
1734
1735# ----------------------------------------------------------------------------
1736#  Class for synchronizing local and remote web sites.
1737# ----------------------------------------------------------------------------
1738
1739class UpdateWeb(object):
1740    """Given previously scanned local and remote directories, update the remote website."""
1741
1742    def __init__(
1743            self,
1744            user_settings,
1745            local_directory_list,
1746            local_file_info,
1747            remote_directory_list,
1748            remote_file_info):
1749        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1750
1751        # Initialize from args.
1752        self.user_settings = user_settings
1753        self.local_directory_list = local_directory_list
1754        self.remote_directory_list = remote_directory_list
1755        self.local_file_info = local_file_info
1756        self.remote_file_info = remote_file_info
1757
1758        # Initialize defaults.
1759        self.local_files_list = []
1760        self.remote_files_list = []
1761        self.local_file_to_size = {}
1762        self.local_file_to_date_time = {}
1763        self.remote_file_to_date_time = {}
1764        self.local_only_dirs = []
1765        self.local_only_files = []
1766        self.remote_only_dirs = []
1767        self.remote_only_files = []
1768        self.common_files = []
1769
1770        # Connect to FTP server and log in.
1771        try:
1772            self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1773            self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1774        except Exception as detail:
1775            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1776            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1777        else:
1778            logging.debug("ftp login succeeded.")
1779
1780        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1781
1782        # Local root directory.
1783        self.local_root_dir = self.user_settings.local_root_dir
1784        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1785
1786        # Root directory of FTP server.
1787        self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1788        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1789
1790        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1791        self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1792
1793        try:
1794            # Go to the root directory.
1795            self.ftp.cwd(self.ftp_root_dir)
1796
1797            # Read it back.
1798            self.ftp_root_dir = self.ftp.pwd()
1799            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1800        except Exception as detail:
1801            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1802
1803    def append_root_dir(self, root_dir, name):
1804        """Append the root directory to a path"""
1805
1806        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1807        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1808        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1809            return root_dir + name
1810        else:
1811            return root_dir + "/" + name
1812
1813    def file_info(self):
1814        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1815        dates, times, and sizes."""
1816
1817        # Extract file names.
1818        self.local_files_list = [
1819            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1820        self.remote_files_list = [
1821            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1822
1823        # Use a dictionary comprehension to create key/value pairs, 
1824        #     (file name, file date/time)
1825        # which map file names onto date/time.
1826        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1827        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1828
1829        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1830        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1831
1832    def update(self):
1833        """Scan through the local website, cleaning it up.
1834        Go to remote website on my servers and synchronize all files."""
1835
1836        self.file_info()
1837
1838        # Which files and directories are different.
1839        self.changes()
1840
1841        # Synchronize with the local web site.
1842        self.synchronize()
1843
1844    def changes(self):
1845        """Find the set of different directories and files on local and remote."""
1846
1847        # Add all directories which are only on local to the dictionary.
1848        dir_to_type = {
1849            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1850
1851        # Scan through all remote directories, adding those only on remote or
1852        # on both.
1853        for d in self.remote_directory_list:
1854            if d in dir_to_type:
1855                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1856            else:
1857                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1858
1859        # Add all files which are only on local to the dictionary.
1860        file_to_type = {
1861            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1862
1863        # Scan through all remote files, adding those only on remote or on
1864        # both.
1865        for f in self.remote_files_list:
1866            if f in file_to_type:
1867                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1868            else:
1869                file_to_type[f] = FileType.ON_REMOTE_ONLY
1870
1871        logging.debug("Raw dictionary dump of directories")
1872        for k, v in dir_to_type.items():
1873            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1874
1875        logging.debug("Raw dictionary dump of files")
1876        for k, v in file_to_type.items():
1877            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1878
1879        # List of directories only on local.  Keep the ordering.
1880        self.local_only_dirs = [
1881            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1882
1883        # List of directories only on remote.  Keep the ordering.
1884        self.remote_only_dirs = [
1885            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1886
1887        # We don't care about common directories, only their changed files, if
1888        # any.
1889
1890        # List of files only on local.  Keep the ordering.
1891        self.local_only_files = [
1892            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1893
1894        # List of files only on remote.  Keep the ordering.
1895        self.remote_only_files = [
1896            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1897
1898        # List of common files on both local and remote.  Keep the ordering.
1899        self.common_files = [
1900            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1901
1902        logging.debug("*** Directories only on local ******************************")
1903        for d in self.local_only_dirs:
1904            logging.debug(f"\t {d:s}")
1905
1906        logging.debug("*** Directories only on remote ******************************")
1907        for d in self.remote_only_dirs:
1908            logging.debug(f"\t {d:s}")
1909
1910        logging.debug("*** Files only on local ******************************")
1911        for f in self.local_only_files:
1912            logging.debug(f"\t {f:s}")
1913
1914        logging.debug("*** Files only on remote ******************************")
1915        for f in self.remote_only_files:
1916            logging.debug(f"\t {f:s}")
1917
1918        logging.debug("*** Common files ******************************")
1919        for f in self.common_files:
1920            logging.debug(f"name {f:s}")
1921            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1922            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1923
1924    def synchronize(self):
1925        """Synchronize files and subdirectories in the remote directory with the local directory."""
1926
1927        # If we have the same files in local and remote, compare their times
1928        # and dates.
1929        for f in self.common_files:
1930            local_file_time = self.local_file_to_date_time[f]
1931            remote_file_time = self.remote_file_to_date_time[f]
1932
1933            # What's the time difference?
1934            time_delta = remote_file_time - local_file_time
1935            # How much difference, either earlier or later?
1936            seconds_different = abs(time_delta.total_seconds())
1937            minutes_different = seconds_different / 60.0
1938            hours_different = minutes_different / 60.0
1939            days_different = hours_different / 24.0
1940
1941            # Assume no upload initially.
1942            upload_to_host = False
1943
1944            logging.debug(f"Common file:  {f:s}.")
1945
1946            # Remote file time is newer.
1947            # Allow 200 characters
1948            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1949
1950            if remote_file_time > local_file_time:
1951                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
1952                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1953                    logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1954                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
1955                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
1956
1957                    # Set the local file to the current time.
1958                    full_file_name = self.append_root_dir(
1959                        self.local_root_dir, f)
1960                    if os.path.exists(full_file_name):
1961                        # Change the access and modify times of the file to the current time.
1962                        os.utime(full_file_name, None)
1963                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1964
1965                    upload_to_host = True
1966                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
1967                else:
1968                    logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1969                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1970                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1971                    upload_to_host = False
1972
1973            # Local file time is newer.
1974            elif local_file_time > remote_file_time:
1975                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
1976                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1977                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer by  {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes].  Uploading to remote server.")
1978                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1979                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1980                    upload_to_host = True
1981                else:
1982                    logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1983                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1984                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1985                    upload_to_host = False
1986
1987            # Cancel the upload if the file is too big for the server.
1988            size = self.local_file_to_size[f]
1989            if size >= self.file_size_limit:
1990                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1991                upload_to_host = False
1992
1993            # Finally do the file upload.
1994            if upload_to_host:
1995                logging.debug(f"Uploading changed file {f:s}")
1996                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
1997                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
1998                self.upload(f)
1999
2000        # Remote directory is not in local.  Delete it.
2001        for d in self.remote_only_dirs:
2002            logging.debug(f"Deleting remote only directory {d:s}")
2003            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2004            self.rmdir(d)
2005
2006        # Local directory missing on remote.  Create it.
2007        # Due to breadth first order scan, we'll create parent directories
2008        # before child directories.
2009        for d in self.local_only_dirs:
2010            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2011            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2012            self.mkdir(d)
2013
2014        # Local file missing on remote.  Upload it.
2015        for f in self.local_only_files:
2016            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2017
2018            #  But cancel the upload if the file is too big for the server.
2019            size = self.local_file_to_size[f]
2020            if size >= self.file_size_limit:
2021                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2022                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2023            else:
2024                logging.debug(f"Uploading new file {f:s}")
2025                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2026                self.upload(f)
2027
2028        # Remote contains a file not present on the local.  Delete the file.
2029        for f in self.remote_only_files:
2030            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2031            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2032            self.del_remote(f)
2033
2034    def del_remote(self, relative_file_path):
2035        """Delete a file using ftp."""
2036
2037        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2038
2039        # Parse the relative file path into file name and relative directory.
2040        relative_dir, file_name = os.path.split(relative_file_path)
2041        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2042        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2043        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2044
2045        try:
2046            # Add the remote root path and go to the remote directory.
2047            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2048            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2049            self.ftp.cwd(remote_dir)
2050        except Exception as detail:
2051            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2052        else:
2053            try:
2054                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2055
2056                # Don't remove zero length file names.
2057                if len(file_name) > 0:
2058                    self.ftp.delete(file_name)
2059                else:
2060                    logging.warning( "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2061            except Exception as detail:
2062                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2063
2064    def mkdir(self, relative_dir):
2065        """Create new remote directory using ftp."""
2066
2067        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2068        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2069
2070        # Parse the relative dir path into prefix dir and suffix dir.
2071        path, d = os.path.split(relative_dir)
2072        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2073        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2074
2075        try:
2076            # Add the remote root path and go to the remote directory.
2077            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2078            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2079            self.ftp.cwd(remote_dir)
2080        except Exception as detail:
2081            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2082        else:
2083            try:
2084                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2085                self.ftp.mkd(d)
2086            except Exception as detail:
2087                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2088
2089    def rmdir(self, relative_dir):
2090        """Delete an empty directory using ftp."""
2091
2092        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2093        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2094
2095        # Parse the relative dir path into prefix dir and suffix dir.
2096        path, d = os.path.split(relative_dir)
2097        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2098        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2099
2100        try:
2101            # Add the remote root path and go to the remote directory.
2102            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2103            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2104            self.ftp.cwd(remote_dir)
2105        except Exception as detail:
2106            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2107        else:
2108            try:
2109                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2110                self.ftp.rmd(d)
2111            except Exception as detail:
2112                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2113
2114    def download(self, relative_file_path):
2115        """Download a binary file using ftp."""
2116
2117        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2118
2119        # Parse the relative file path into file name and relative directory.
2120        relative_dir, file_name = os.path.split(relative_file_path)
2121        logging.debug(f"download():  \tfile name: {file_name:s}")
2122        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2123        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2124
2125        # Add the remote root path and go to the remote directory.
2126        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2127        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2128
2129        try:
2130            self.ftp.cwd(remote_dir)
2131        except Exception as detail:
2132            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2133        else:
2134            # Add the local root path to get the local file name.
2135            # Open local binary file to write into.
2136            local_file_name = self.append_root_dir(
2137                self.local_root_dir, relative_file_path)
2138            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2139            try:
2140                f = open(local_file_name, "wb")
2141                try:
2142                    # Calls f.write() on each block of the binary file.
2143                    # ftp.retrbinary( "RETR " + file_name, f.write )
2144                    pass
2145                except Exception as detail:
2146                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2147                f.close()
2148            except IOError as detail:
2149                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2150
2151    def upload(self, relative_file_path):
2152        """Upload  a binary file using ftp."""
2153
2154        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2155
2156        # Parse the relative file path into file name and relative directory.
2157        relative_dir, file_name = os.path.split(relative_file_path)
2158        logging.debug(f"upload():  \tfile name: {file_name:s}")
2159        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2160        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2161
2162        # Add the remote root path and go to the remote directory.
2163        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2164        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2165
2166        try:
2167            self.ftp.cwd(remote_dir)
2168        except Exception as detail:
2169            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2170        else:
2171            # Add the local root path to get the local file name.
2172            # Open local binary file to read from.
2173            local_file_name = self.append_root_dir(
2174                self.local_root_dir, relative_file_path)
2175            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2176
2177            try:
2178                f = open(local_file_name, "rb")
2179                try:
2180                    # f.read() is called on each block of the binary file until
2181                    # EOF.
2182                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2183                    self.ftp.storbinary("STOR " + file_name, f)
2184                except Exception as detail:
2185                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2186                f.close()
2187            except IOError as detail:
2188                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2189
2190    def finish(self):
2191        """Log out of an ftp session"""
2192        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2193        try:
2194            self.ftp.quit()
2195        except Exception as detail:
2196            logging.error(f"Cannot ftp quit because {str(detail):s}")
2197
2198# ----------------------------------------------------------------------------
2199#  Main function
2200# ----------------------------------------------------------------------------
2201
2202def main(raw_args=None):
2203    """Main program.  Clean up and update my website."""
2204
2205    # Print the obligatory legal notice.
2206    print("""
2207    updateweb Version 7.3 - A Python utility program which maintains my web site.
2208    Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
2209
2210    It deletes temporary files, rewrites old copyright lines and email address
2211    lines in source files, then synchronizes all changes to my web sites.
2212
2213    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2214    GNU General Public License.  This is free software, and you are welcome
2215    to redistribute it under certain conditions; see the GNU General Public
2216    License for details.
2217    """)
2218
2219    # Put ALL the main code into a try block!
2220    try:
2221        # ---------------------------------------------------------------------
2222        #  Load default settings and start logging.
2223        # ---------------------------------------------------------------------
2224
2225        # Default user settings.
2226        user_settings = UserSettings()
2227
2228        print( f"Running main( {raw_args} ) Python version\
2229               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2230               local web directory\
2231               {user_settings.local_root_dir}\n")
2232        # Get command line options such as --verbose.  Pass them back as flags in
2233        # user_settings.
2234        CommandLineSettings(user_settings, raw_args)
2235
2236        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2237        if user_settings.UNITTEST:
2238            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2239            unittest.TextTestRunner(verbosity=2).run(suite)
2240            # We are done!
2241            print("  ...done!", flush=True)
2242            return
2243
2244        # Start logging to file.
2245        if user_settings.VERBOSE:
2246            # Turn on logging for DEBUG and higher:  DEBUG, INFO, WARNING, ERROR, CRITICAL messages.
2247            loglevel = logging.DEBUG
2248        else:
2249            # Turn on logging for WARNING and higher:  WARNING, ERROR and CRITICAL messages.
2250            loglevel = logging.WARNING
2251
2252        # Pick the log file name on the host.
2253        if user_settings.CLEAN:
2254            user_settings.LOGFILENAME = "/private/logLocal.txt"
2255        else:
2256            user_settings.LOGFILENAME = "/private/logRemote.txt"
2257
2258        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2259        if not user_settings.MATHJAX:
2260            user_settings.DIR_TO_SKIP += "|mathjax"
2261        else:
2262            mathJaxPostUploadingAdvice = \
2263                [ "Processing and uploading new or changed mathjax files.",
2264                  "If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client...  ",
2265                  "FTP won't delete remote dir which are nonempty.  You might have to run this program several times to delete all subdirectories before the parent dir can be deleted.  Or you can manually delete with FTP.",
2266                  "If using FileZilla for manual deletion, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ",
2267                "\n"
2268                ]
2269            print( mathJaxPostUploadingAdvice[0], flush=True)
2270            print( mathJaxPostUploadingAdvice[1], flush=True)
2271            print( mathJaxPostUploadingAdvice[2], flush=True)
2272            print( mathJaxPostUploadingAdvice[3], flush=True)
2273            print( mathJaxPostUploadingAdvice[4], flush=True)
2274            logging.debug( mathJaxPostUploadingAdvice[0], flush=True)
2275            logging.debug( mathJaxPostUploadingAdvice[1], flush=True)
2276            logging.debug( mathJaxPostUploadingAdvice[2], flush=True)
2277            logging.debug( mathJaxPostUploadingAdvice[3], flush=True)
2278            logging.debug( mathJaxPostUploadingAdvice[4], flush=True)
2279
2280        # Configure the logging and start it.
2281        logging.basicConfig( level=loglevel, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=user_settings.local_root_dir + user_settings.LOGFILENAME, filemode='w')
2282        logging.debug("********** Begin logging") 
2283
2284        # ---------------------------------------------------------------------
2285        #  Scan the local website, finding out all files and directories.
2286        # ---------------------------------------------------------------------
2287
2288        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2289        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2290        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2291
2292        local = LocalWebSite(user_settings)
2293        local.scan()
2294
2295        # ---------------------------------------------------------------------
2296        # Clean up local website.
2297        # ---------------------------------------------------------------------
2298
2299        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2300        print("Cleaning local web site...  ", end='', flush=True)
2301        logging.debug("========================== Cleaning the local web site")
2302        local.clean()
2303
2304        # We are done with the first scan of the local web site and will dispose of it.
2305        local.finish()
2306        del local
2307
2308        # ---------------------------------------------------------------------
2309        #  Rescan the local website since there will be changes to source
2310        #  files from the clean up stage.
2311        # ---------------------------------------------------------------------
2312
2313        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2314        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2315
2316        local = LocalWebSite(user_settings)
2317
2318        local.scan()
2319
2320        # ---------------------------------------------------------------------
2321        #  List all the local directories and files and their sizes.
2322        # ---------------------------------------------------------------------
2323
2324        # Local website directories.
2325        local_directory_list = local.directories
2326        logging.debug("********** List of all the Local Directories")
2327        for d in local_directory_list:
2328            logging.debug(f"\t {d:s}")
2329
2330        # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2331        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2332        total_number_of_files = len( local_files_name_size_pairs )
2333        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2334        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2335
2336        # Local website filenames only, and their dates and times.
2337        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2338        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2339        for file_datetime_pair in local_file_datetime_pairs:
2340            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2341
2342        # Total number of bytes in the local files.
2343        total_number_of_bytes = 0
2344        for file_size_pair in local_files_name_size_pairs:
2345            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2346            total_number_of_bytes += file_size_pair[1]
2347        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2348
2349        local.finish()
2350
2351        if user_settings.CLEAN:
2352            logging.debug("========================== Done with local file and directory cleanup...")
2353            del local
2354            print("...done!", flush=True)
2355            return
2356
2357        # ---------------------------------------------------------------------
2358        #  Scan the remote hosted web site.
2359        # ---------------------------------------------------------------------
2360
2361        print("Scanning remote web site...  ", end='', flush=True)
2362        logging.debug("========================== Scanning the remote web site...")
2363
2364        # Pick which website to update.
2365        logging.debug("Connecting to primary remote site.")
2366        remote = RemoteWebSite(user_settings)
2367        remote.scan()
2368        remote.finish()
2369
2370        # ---------------------------------------------------------------------
2371        #  List all the remote server directories and files and their sizes.
2372        # ---------------------------------------------------------------------
2373
2374        remote_directory_list = remote.directories
2375        logging.debug("********** Remote Directories")
2376        for d in remote_directory_list:
2377            logging.debug(f"\t {d:s}")
2378
2379        # Local website filenames only, and their sizes in bytes.
2380        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2381        total_number_of_files = len( remote_files_name_size_list )
2382        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2383        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2384        total_number_of_bytes = 0
2385        for file_size in remote_files_name_size_list:
2386            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2387            total_number_of_bytes += file_size[1]
2388        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2389
2390        # ---------------------------------------------------------------------
2391        # Synchronize the local and remote web sites.
2392        # ---------------------------------------------------------------------
2393
2394        print("Synchronizing remote and local web sites...  ", end='', flush=True)
2395        logging.debug("========================= Synchronizing remote and local web sites...")
2396
2397        # Primary website.
2398        logging.debug("Connecting to primary remote site for synchronization.")
2399        sync = UpdateWeb(user_settings,
2400                         local.directories,
2401                         local.files,
2402                         remote.directories,
2403                         remote.files)
2404
2405        sync.update()
2406        sync.finish()
2407
2408        del sync
2409        del remote
2410        del local
2411        print("...done!", flush=True)
2412
2413    except UpdateWebException as detail:
2414        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2415
2416    except RecursionError as detail:
2417        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2418
2419if __name__ == '__main__':
2420    """Python executes all code in this file.  Finally, we come here.  
2421
2422    * If we are executing this file as a standalone Python script, 
2423      the name of the current module is set to __main__ and thus we'll call the main() function.
2424
2425    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2426
2427        import updateweb
2428        updateweb.main(["--test"])"""
2429
2430    main()