1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.1 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import shutil
  81from pathlib import Path
  82
  83# Regular expressions
  84import re
  85
  86# FTP stuff
  87import ftplib
  88
  89# Date and time
  90import time
  91import stat
  92import datetime
  93
  94# Logging
  95import logging
  96
  97# Unit testing
  98import unittest
  99
 100# Enumerated types (v3.4)
 101from enum import Enum
 102from typing import List, Any
 103
 104# YAML configuration files (a superset of JSON!)
 105import yaml 
 106# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 107try:
 108    from yaml import CLoader as Loader
 109except ImportError:
 110    from yaml import Loader
 111
 112# Python syntax highlighter.  See https://pygments.org
 113from pygments import highlight
 114from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 115from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 116from pygments.formatters import HtmlFormatter
 117
 118
 119# ----------------------------------------------------------------------------
 120#  Custom Top Level Exceptions.
 121# ----------------------------------------------------------------------------
 122
 123class UpdateWebException(Exception):
 124    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 125       Derive from Exception as recommended by Python manual"""
 126    pass
 127
 128# ----------------------------------------------------------------------------
 129#  User settings.
 130# ----------------------------------------------------------------------------
 131
 132class TreeWalkSettings(Enum):
 133    """Enum types for how to walk the directory tree."""
 134    BREADTH_FIRST_SEARCH = 1
 135    DEPTH_FIRST_SEARCH = 2
 136
 137class FileType(Enum):
 138    """'Enum' types for properties of directories and files."""
 139    DIRECTORY = 0
 140    FILE = 1
 141    ON_LOCAL_ONLY = 2
 142    ON_REMOTE_ONLY = 3
 143    ON_BOTH_LOCAL_AND_REMOTE = 4
 144
 145class UserSettings:
 146    """Megatons of user selectable settings."""
 147    # Logging control.
 148    LOGFILENAME = ""
 149    VERBOSE = False  # Verbose mode.  Prints out everything.
 150    CLEAN = False  # Clean the local website only.
 151    UNITTEST = False  # Run a unit test of a function.
 152    MATHJAX = False  # Process and upload MathJax files to server.
 153
 154    # When diving into the MathJax directory, web walking the deep directories
 155    # may exceed Python's default recursion limit of 1000.
 156    RECURSION_DEPTH = 5000
 157    sys.setrecursionlimit(RECURSION_DEPTH)
 158
 159    # Fields in the file information (file_info) structure.
 160    # For example, file_info = 
 161    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 162    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 163    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 164    #      4675]                                        -- File size in bytes.
 165    FILE_NAME = 0
 166    FILE_TYPE = 1
 167    FILE_DATE_TIME = 2
 168    FILE_SIZE = 3
 169
 170    # Server settings.
 171    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 172    SERVER_NAME = None
 173    USER_NAME = None
 174    PASSWORD_NAME = None
 175    FTP_ROOT_NAME = None
 176    FILE_SIZE_LIMIT_NAME = None
 177
 178    # Map month names onto numbers.
 179    monthToNumber = {
 180        'Jan': 1,
 181        'Feb': 2,
 182        'Mar': 3,
 183        'Apr': 4,
 184        'May': 5,
 185        'Jun': 6,
 186        'Jul': 7,
 187        'Aug': 8,
 188        'Sep': 9,
 189        'Oct': 10,
 190        'Nov': 11,
 191        'Dec': 12}
 192
 193    # List of directories to skip over when processing or uploading the web page.
 194    # Some are private but most are dir of temporary files.
 195    # They will be listed as WARNING in the log.
 196    # Examples:
 197    #     My private admin settings directory.
 198    #     Git or SVN local admin directories.
 199    #     Compile build directories fromXCode.
 200    #     PyCharm build directories.
 201    #     Python cache directories.
 202    #     Jupyter checkpoint directories.
 203    #     XCode temporary file crap.
 204    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 205
 206    # List of files to skip when processing or uploading to the web page.
 207    # They will be listed as WARNING in the log.
 208    # Examples:
 209    #     MathJax yml file.
 210    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 211    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 212
 213    # Suffixes for temporary files which will be deleted during the cleanup
 214    # phase.
 215    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 216        \.                           # Match the dot in the file name.
 217                                     # Now begin matching the file name suffix.
 218                                     # (?: non-capturing match for the regex inside the parentheses,
 219                                     #   i.e. matching string cannot be retrieved later.
 220                                     # Now match any of the following file extensions:
 221        (?: o   | obj | lib |        #     Object files generated by C, C++, etc compilers
 222                              pyc |  #     Object file generated by the Python compiler
 223                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 224            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 225            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 226            res | aps | dep | db  |  #     Temp files from VC++ compiler
 227                              jbf |  #     Paintshop Pro
 228                      class | jar |  #     Java compiler
 229                              fas |  #     CLISP compiler
 230                        swp | swo |  #     Vim editor
 231                        toc | aux |  #     TeX auxilliary files (not .synctex.gz or .log)
 232          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 233                       _\.Trashes |  #     macOS recycle bin
 234        gdb_history)                 #     GDB history
 235        $                            #     Now we should see only the end of line.
 236        """
 237
 238    # Special case:  Vim temporary files contain a twiddle anywhere in the
 239    # name.
 240    VIM_TEMP_FILE_EXT = "~"
 241
 242    # Suffixes for temporary directories which should be deleted during the
 243    # cleanup phase.
 244    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 245        (?: Debug | Release |        # C++ compiler
 246           ipch   | \.vs    |        # Temp directories from VC++ compiler
 247        \.Trashes | \.Trash)         # macOS recycle bin
 248        $
 249        """
 250
 251    # File extension for an internally created temporary file.
 252    TEMP_FILE_EXT = ".new"
 253
 254    # Identify source file types.
 255    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 256        (\.                        # Match the filename suffix after the .
 257            (?: html | htm |       # HTML hypertext
 258                css)               # CSS style sheet
 259        $)                         # End of line.
 260    """
 261
 262    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 263        (?: makefile$ |             # Any file called makefile is a source file.
 264                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 265          .vimrc$ |                 # Vim script
 266          (.bashrc$ |               # Bash configuration files.
 267           .bash_profile$ |
 268           .bash_logout$) 
 269          |
 270          (.gitignore$ |             # Git configuration files.
 271           .gitignore_global$ | 
 272           .gitconfig$)
 273          |
 274          (\.                       # Match the filename suffix after the .
 275                                    # Now match any of these suffixes:
 276             (?: 
 277                  c | cpp | h | hpp |   #     C++ and C
 278                  js |                  #     Javascript
 279                  py |                  #     Python
 280                  lsp |                 #     LISP
 281                  m  |                  #     MATLAB
 282                  FOR | for | f |       #     FORTRAN
 283                  yaml |                #     YAML = JSON superset
 284                  tex |                 #     LaTeX
 285                  txt | dat |           #     Data files
 286                  sh)                   #     Bash
 287             $)                         # End of line.
 288         )
 289         """
 290
 291    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 292    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 293        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 294            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 295            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 296            ^StyleSheet\.css$ )     # I want to list my style sheet.
 297        """
 298
 299    # Files for which we want to generate a syntax highlighted source code listing.
 300    # Uses an f-string combined with a raw-string.
 301    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 302        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 303            {SOURCE_FILE_PATTERN} )
 304        """
 305
 306    # Update my email address.
 307    # This is tricky:  Prevent matching and updating the name within in this
 308    # Python source file by using the character class brackets.
 309    OLD_EMAIL_ADDRESS = r"""
 310        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 311        """
 312    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 313
 314    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 315    # Read patterns and strings from the updateweb.yaml file.
 316    STRING_REPLACEMENT_LIST = []
 317    # Pairs of test strings and their correct match/replacements.
 318    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 319
 320    # Change all old software version lines of the form
 321    #      Primpoly Version nnnn.nnnn
 322    # to the new software version.
 323    # Note that since we are using raw strings leading and trailing whitespace
 324    # is ignored in both pattern and replacement.
 325    CURRENT_SOFTWARE_VERSION = r"""
 326        Primpoly
 327        \s+
 328        Version
 329        \s+
 330        ([0-9]+)   # The two part version number NNN.nnn
 331        \.
 332        ([0-9]+)
 333        """
 334    NEW_SOFTWARE_VERSION = r"""
 335        Primpoly Version 16.3
 336        """
 337
 338    # Match a copyright line.  Then extract the copyright symbol which can be
 339    # ascii (C) or HTML &copy; and extract the old year.
 340    TWO_DIGIT_YEAR_FORMAT = "%02d"
 341    COPYRIGHT_LINE = r"""
 342        Copyright                       # Copyright.
 343        \s+                             # One or more spaces.
 344        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 345        \D+                             # Any non-digits.
 346        (?P<old_year>[0-9]+)            # Match and extract the old copyright year,
 347                                        # then place it into variable 'old_year'
 348        -                               # to
 349        ([0-9]+)                        # New copyright year.
 350        """
 351
 352    # Match a line containing the words,
 353    #    last updated YY
 354    # and extract the two digit year YY.
 355    LAST_UPDATED_LINE = r"""
 356        last\s+         # Match the words "last updated"
 357        updated\s+
 358        \d+             # Day number
 359        \s+             # One or more blanks or tab(
 360        [A-Za-z]+       # Month
 361        \s+             # One or more blanks or tabs
 362        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 363        """
 364
 365    # Web server root directory.
 366    DEFAULT_ROOT_DIR = "/"
 367
 368    # The ftp listing occasionally shows a date newer than the actual date. 
 369    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 370    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 371    # Upload the file to be safe.
 372    # How to see the time differences from the log if they are large:
 373    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 374    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 375    # How to see the time differences from the log if they are small and we wait and NOT upload:
 376    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 377    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 378    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 379    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 380    DAYS_NEWER_FOR_REMOTE_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD
 381
 382    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 383    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 384    DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD
 385
 386    # An ftp list command line should be at least this many chars, or we'll
 387    # suspect and error.
 388    MIN_FTP_LINE_LENGTH = 7
 389
 390    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 391    # ftp listings are generally similar to UNIX ls -l listings.
 392    #
 393    # Some examples:
 394    #
 395    # (1) Freeservers ftp listing,
 396    #
 397    #          0        1   2                3           4    5   6   7      8
 398    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 399    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 400    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 401    #
 402    # (2) atspace ftp listing,
 403    #
 404    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 405    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 406    #
 407    FTP_LISTING = r"""
 408        [drwx-]+            # Unix type file mode.
 409        \s+                 # One or more blanks or tabs.
 410        \d+                 # Number of links.
 411        \s+
 412        \w+                 # Owner.
 413        \s+
 414        \w+                 # Group.
 415        \s+
 416        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 417        \s+
 418        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 419        \s+
 420        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 421        \s+
 422        (
 423            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 424            :
 425            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 426            |
 427            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 428                            # extract the year instead.
 429        )
 430        \s+
 431        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 432                                                    # and funny characters.  We must escape some of
 433                                                    # these characters with a backslash, \.
 434        """
 435
 436    # HTML header up to the style sheet.
 437    BASIC_HTML_BEGIN = \
 438        """
 439        <!DOCTYPE html>
 440        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 441        
 442        <head>
 443            <!-- This page uses Unicode characters. -->
 444            <meta charset="utf-8">
 445        
 446            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 447            <meta name="viewport" content="width=device-width, initial-scale=1">
 448        
 449            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 450            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 451        
 452            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 453            <meta name="description" content="Syntax Colored Source Code Listing">
 454        
 455            <!-- Some content management software uses the author's name. -->
 456            <meta name="author" content="Sean Erik O'Connor">
 457        
 458            <meta name="copyright" content="Copyright (C) 1986-2024 by Sean Erik O'Connor.  All Rights Reserved.">   
 459        
 460            <!-- Begin style sheet insertion -->
 461            <style>
 462                /* Default settings for all my main web pages. */
 463                body
 464                {
 465                    /* A wide sans-serif font is more readable on the web. */
 466                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 467        
 468                    /* Set the body font size a little smaller than the user's default browser setting. */
 469                    font-size:              0.8em ; 
 470        
 471                    /* Black text is easier to read. */
 472                    color:                  black ;
 473        
 474                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 475                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 476                    line-height:            1.7 ;
 477                }
 478        
 479                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 480        """
 481
 482    # After the style sheet and up to the start of the article in the body.
 483    BASIC_HTML_MIDDLE = \
 484        """
 485            </style>
 486        </head>
 487        
 488        <body>
 489            <article class="content">
 490        """
 491
 492    # After the source code listing, finish the article, body and html document.
 493    BASIC_HTML_END = \
 494        """
 495            </article>
 496        </body>
 497        
 498        </html>
 499        """
 500
 501    def __init__(self):
 502        """Set up the user settings."""
 503
 504        self.local_root_dir = ""
 505
 506        # Import the user settings from the parameter file.
 507        self.get_local_root_dir()
 508        self.get_server_settings()
 509
 510        self.precompile_regular_expressions()
 511
 512    def get_server_settings(self):
 513        """
 514        Read web account private settings from a secret offline parameter file.
 515        These also hold patterns to match and replace in all of our source pages.
 516        """
 517
 518        # Private file which contains my account settings.
 519        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 520        # Recommended by
 521        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 522        try:
 523            stream = open(settings_file_name, "r")
 524        except OSError as detail:
 525            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 526            # Rethrow the exception higher.
 527            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 528        # Read all the YAML documents in the file.
 529        yaml_contents = yaml.load_all(stream, Loader)
 530        yaml_document_list: list[Any] = []
 531        for yaml_doc in yaml_contents:
 532            yaml_document_list.append(yaml_doc)
 533        num_yaml_docs = len(yaml_document_list)
 534        if num_yaml_docs != 2:
 535            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 536            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 537
 538        # Load all the server settings.
 539        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 540        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 541        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 542        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 543        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 544
 545        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 546        self.STRING_REPLACEMENT_LIST = []
 547        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 548        for pat_rep in pat_rep_yaml_list:
 549            # Fetch the regular expression and compile it for speed.
 550            verbose_regex = pat_rep['pattern']
 551            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 552            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 553            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 554            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 555
 556        # Load the test and verify strings.
 557        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 558        for test_verify_string in test_verify_strings_list:
 559            test_string = test_verify_string['test_string'].strip().lstrip()
 560            verify_string = test_verify_string['verify_string'].strip().lstrip()
 561            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 562
 563        print("  ...done!", flush=True)
 564        return
 565
 566    def get_local_root_dir(self):
 567        """Get the local website root directory on this platform."""
 568
 569        # Each platform has a definite directory for the web page.
 570        local_web_dir_path = "/Desktop/Sean/WebSite"
 571
 572        if sys.platform.startswith('darwin'):
 573            self.local_root_dir = str(Path.home()) + local_web_dir_path
 574        # My Cyperpower PC running Ubuntu Linux.
 575        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 576            self.local_root_dir = str(Path.home()) + local_web_dir_path
 577        return
 578
 579    def precompile_regular_expressions(self):
 580        """For speed precompile the regular expression search patterns."""
 581        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 582        self.CURRENT_SOFTWARE_VERSION  = re.compile(self.CURRENT_SOFTWARE_VERSION,  re.VERBOSE | re.IGNORECASE)
 583        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 584        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 585        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 586        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 587        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 588        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 589        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 590        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 591
 592# ----------------------------------------------------------------------------
 593#  Unit test individual functions.
 594# ----------------------------------------------------------------------------
 595
 596class UnitTest(unittest.TestCase):
 597    """Initialize the UnitTest class."""
 598    def setUp(self):
 599        self.user_settings = UserSettings()
 600        self.user_settings.get_local_root_dir()
 601
 602    def tearDown(self):
 603        """Clean up the UnitTest class."""
 604        self.user_settings = None
 605
 606    def test_copyright_updating(self):
 607        """Test copyright line updating to the current year."""
 608        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 609        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 610        line_after_update_actual = "Copyright (C) 1999-2024 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 611        pat = self.user_settings.COPYRIGHT_LINE
 612        match = pat.search(line_before_update)
 613
 614        if match:
 615            old_year = int(match.group('old_year'))
 616            # Same as call to self.get_current_year():
 617            current_year = int(time.gmtime()[0])
 618            if old_year < current_year:
 619                # We matched and extracted the old copyright symbol into the variable
 620                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 621                # We now insert it back by placing the special syntax
 622                # \g<symbol> into the replacement string.
 623                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
 624                                str(current_year)
 625                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 626                self.assertEqual(
 627                    line_after_update_actual,
 628                    line_after_update_computed,
 629                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 630            else:
 631                self.fail()
 632        else:
 633            self.fail()
 634
 635    def test_update_software_version(self):
 636        """Test updating to a new version of Primpoly."""
 637        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 638        old_version_line = "|     Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
 639        new_version_line = "|     Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
 640        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
 641        match = pat.search(old_version_line)
 642        if match:
 643            # Note that since we are using raw strings leading and trailing
 644            # whitespace is ignored.
 645            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
 646            updated_version_line = pat.sub(new_version, old_version_line)
 647            self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
 648        else:
 649            self.fail()
 650
 651    def test_extract_filename_from_ftp_listing(self):
 652        """Test parsing an FTP listing."""
 653        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 654        extracted_file_name = "allclasses-frame.html"
 655        pat = self.user_settings.FTP_LISTING
 656        match = pat.search(ftp_line)
 657        if match:
 658            filename = match.group('filename')
 659            self.assertEqual(
 660                filename,
 661                extracted_file_name,
 662                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 663        else:
 664            self.fail()
 665
 666    def test_get_file_time_and_date(self):
 667        """Test getting a file time and date."""
 668        # Point to an old file.
 669        file_name = "./Images/home.png"
 670        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 671        # Get the UTC time.
 672        file_epoch_time = os.path.getmtime(full_file_name)
 673        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 674        # Create a datetime object for the file.
 675        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 676        # Check if the file time matches what we would see if we did ls -l <file_name>
 677        computed = f"file {file_name:s} datetime {d.ctime():s}"
 678        actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
 679        self.assertEqual(computed, actual)
 680
 681    def test_set_file_time_and_date(self):
 682        """Test setting a file time and date."""
 683        file_name = "./Images/home.png"
 684        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 685        # Create a temporary file in the same directory.
 686        temp_file_name = "temporal.tmp"
 687        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 688        try:
 689            with open(full_temp_file_name, 'w') as fp:
 690                fp.write("The End of Eternity")
 691        except OSError as detail:
 692            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 693            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 694        # Get the old file time.  Set the temporary file to the same time.
 695        file_stat = os.stat(full_file_name)
 696        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 697        # What is the temporary file's time now?
 698        file_epoch_time = os.path.getmtime(full_temp_file_name)
 699        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 700        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 701        # Is the temporary file time set properly?
 702        computed = f"file {file_name:s} datetime {d.ctime():s}"
 703        actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
 704        self.assertEqual(computed, actual)
 705        os.remove(full_temp_file_name)
 706
 707    def test_difference_of_time_and_date(self):
 708        """Test a date difference calculation."""
 709        file_name = "./Images/home.png"
 710        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 711        # Get the UTC time.
 712        file_epoch_time = os.path.getmtime(full_file_name)
 713        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 714        # Create a datetime object for the file.
 715        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 716        # Slightly change the date and time by adding 1 minute.
 717        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 718        time_delta = d2 - d
 719        seconds_different = time_delta.total_seconds()
 720        minutes_different = seconds_different / 60.0
 721        hours_different = minutes_different / 60.0
 722        days_different = hours_different / 24.0
 723        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 724        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 725        self.assertEqual(computed, actual)
 726
 727    def test_pattern_match_dir_to_skip(self):
 728        """Test if skipping certain named directories is recoginizing the dir names."""
 729        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 730        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 731        if pat.search(dir_skip):
 732            self.assertTrue(True)
 733        else:
 734            self.assertTrue(False)
 735
 736    def test_file_name_to_syntax_highlight(self):
 737        """Test if syntax highlighting recognizes file names to highlight."""
 738        file_name1 = "Computer/hello.lsp"
 739        file_name2 = "Computer/life.html"
 740        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 741        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 742            self.assertTrue(True)
 743        else:
 744            self.assertTrue(False)
 745
 746    def test_user_settings(self):
 747        """Test whether user settings are correctly initialized."""
 748        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 749        actual = "File size limit = 50000 K"
 750        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 751
 752    def test_check_replace_substring(self,debug=True):
 753        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 754           For troubleshooting, turn on debug.
 755        """
 756        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 757        # Iterate over all test strings.
 758        for pair in test_verify_pairs:
 759            [test_string, verify_string] = pair
 760            if debug:
 761                print( f">>>>>>> next test string   = {test_string}")
 762                print( f">>>>>>> next verify string = {verify_string}")
 763            # Iterate over all patterns and replacements.
 764            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 765                [pat, rep_string] = match_replace_tuple
 766                print( f"\t-------> next pattern = {pat}") 
 767                print( f"\t-------> next replacement = {rep_string}") 
 768                match = pat.search(test_string)
 769                # The pattern match succeeds.
 770                if match:
 771                    try:
 772                        sub = pat.sub(rep_string, test_string)
 773                        # String replacement succeeds for this pattern/replace pair iteration.
 774                        if debug:
 775                            print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
 776                        test_string = sub
 777                    except IndexError as detail:
 778                        print(f"\t\t.......> Caught an exception: {str(detail):s}.  Replacement failed.")
 779                        if debug:
 780                            self.assertTrue(False)
 781                elif debug:
 782                    print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
 783                # No match, so go on to the next pattern and don't change test_string.
 784            # Done with all pattern/replace on test string.
 785            # Check this test string in the list.
 786            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 787            if debug:
 788                print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
 789
 790# ----------------------------------------------------------------------------
 791#  Command line options.
 792# ----------------------------------------------------------------------------
 793
 794class CommandLineSettings(object):
 795    """Get the command line options."""
 796
 797    def __init__(self, user_settings, raw_args=None):
 798        """Get command line options"""
 799        command_line_parser = argparse.ArgumentParser(
 800            description="updateweb options")
 801
 802        # Log all changes, not just warnings and errors.
 803        command_line_parser.add_argument(
 804            "-v",
 805            "--verbose",
 806            help="Turn on verbose mode to log everything",
 807            action="store_true")
 808
 809        # Clean up the local website only.
 810        command_line_parser.add_argument(
 811            "-c",
 812            "--clean",
 813            help="Do a cleanup on the local web site only.",
 814            action="store_true")
 815
 816        # Clean up the local website only.
 817        command_line_parser.add_argument(
 818            "-m",
 819            "--mathjax",
 820            help="""ALSO upload mathjax directory.\
 821            Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
 822            You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
 823            NOTE:  If you did reset your server and delete all files, run the command    find . -name '*.*' -exec touch {} \\;    from the web page root directory.\
 824            Also run   find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 825            action="store_true")
 826
 827        # Run unit tests only.
 828        command_line_parser.add_argument("-t", "--test",
 829                                         help="Run unit tests.",
 830                                         action="store_true")
 831
 832        args = command_line_parser.parse_args(raw_args)
 833
 834        if args.verbose:
 835            user_settings.VERBOSE = True
 836        if args.clean:
 837            user_settings.CLEAN = True
 838        if args.test:
 839            user_settings.UNITTEST = True
 840        if args.mathjax:
 841            user_settings.MATHJAX = True
 842
 843# ----------------------------------------------------------------------------
 844#  Base class which describes my web site overall.
 845# ----------------------------------------------------------------------------
 846
 847class WebSite(object):
 848    """
 849    Abstract class used for analyzing both local and remote (ftp server) websites.
 850    Contains the web-walking functions which traverse the directory structures and files.
 851    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 852    Child classes may define additional functions which only they need.
 853    """
 854
 855    def __init__(self, settings):
 856        """Set up root directories"""
 857
 858        # Import the user settings.
 859        self.user_settings = settings
 860
 861        # Queue keeps track of directories not yet processed.
 862        self.queue = []
 863
 864        # List of all directories traversed.
 865        self.directories = []
 866
 867        # List of files traversed, with file information.
 868        self.files = []
 869
 870        # Find out the root directory and go there.
 871        self.root_dir = self.get_root_dir()
 872        self.go_to_root_dir(self.root_dir)
 873
 874    @staticmethod
 875    def get_current_year():
 876        """Get the current year."""
 877        return int(time.gmtime()[0])
 878
 879    @staticmethod
 880    def get_current_two_digit_year():
 881        """Get the last two digits of the current year."""
 882        return WebSite.get_current_year() % 100
 883
 884    @staticmethod
 885    def is_file_info_type(file_info):
 886        """Check if we have a file information structure or merely a simple file name."""
 887        try:
 888            if isinstance(file_info, list):
 889                return True
 890            elif isinstance(file_info, str):
 891                return False
 892            else:
 893                logging.error("is_file_info_type found a bad type.  Aborting...")
 894                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 895        except TypeError as detail:
 896            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 897            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 898
 899    def get_root_dir(self):
 900        """Subclass:  Put code here to get the root directory"""
 901        return ""
 902
 903    def go_to_root_dir(self, root_dir):
 904        """Subclass:  Put code here to go to the root directory"""
 905        pass  # Pythons's do-nothing statement.
 906
 907    def one_level_down(self, d):
 908        """Subclass:  Fill in with a method which returns a list of the
 909        directories and files immediately beneath dir"""
 910        return [], []
 911
 912    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 913        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 914
 915        # Get all subfiles and subdirectories off this node.
 916        subdirectories, subfiles = self.one_level_down(d)
 917
 918        # Add all the subfiles in order.
 919        for f in subfiles:
 920
 921            name = self.strip_root(f)
 922            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 923
 924            # Some files are private so skip them from consideration.
 925            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 926
 927            if pat.search(name[self.user_settings.FILE_NAME]):
 928                logging.warning(
 929                    f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 930            # Don't upload the log file due to file locking problems.
 931            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 932                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 933            # File size limit on some servers.
 934            else:
 935                self.files.append(name)
 936
 937        # Queue up the subdirectories.
 938        for d in subdirectories:
 939            # Some directories are private such as .git or just temporary file
 940            # caches so skip them from consideration.
 941            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 942            if pat.search(d):
 943                logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 944            else:
 945                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 946                self.queue.append(d)
 947
 948        # Search through the directories.
 949        while len(self.queue) > 0:
 950            # For breadth first search, remove from beginning of queue.
 951            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 952                d = self.queue.pop(0)
 953
 954            # For depth first search, remove from end of queue.
 955            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 956                d = self.queue.pop()
 957            else:
 958                d = self.queue.pop(0)
 959
 960            name = self.strip_root(d)
 961            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 962            self.directories.append(name)
 963
 964            self.walk(d)
 965
 966    def strip_root(self, file_info):
 967        """Return a path, but strip off the root directory"""
 968
 969        root = self.root_dir
 970
 971        # Extract the file name.
 972        if self.is_file_info_type(file_info):
 973            name = file_info[self.user_settings.FILE_NAME]
 974        else:
 975            name = file_info
 976
 977        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 978        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 979        # Art/foo.txt
 980        lenroot = len(root)
 981        if root == self.user_settings.DEFAULT_ROOT_DIR:
 982            pass
 983        else:
 984            lenroot = lenroot + 1
 985
 986        stripped_path = name[lenroot:]
 987
 988        if self.is_file_info_type(file_info):
 989            # Update the file name only.
 990            return [stripped_path,
 991                    file_info[self.user_settings.FILE_TYPE],
 992                    file_info[self.user_settings.FILE_DATE_TIME],
 993                    file_info[self.user_settings.FILE_SIZE]]
 994        else:
 995            return stripped_path
 996
 997    def append_root_dir(self, root_dir, name):
 998        """Append the root directory to a path"""
 999
1000        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1001        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1002        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1003            return root_dir + name
1004        else:
1005            return root_dir + "/" + name
1006
1007    def scan(self):
1008        """Scan the directory tree recursively from the root"""
1009        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
1010        self.walk(self.root_dir)
1011
1012    def modtime(self, f):
1013        """Subclass:  Get file modification time"""
1014        pass
1015
1016    def finish(self):
1017        """Quit web site"""
1018        logging.debug(f"Finished with WebSite object of class {type(self)}")
1019        pass
1020
1021# ----------------------------------------------------------------------------
1022#  Subclass which knows about the local web site on disk.
1023# ----------------------------------------------------------------------------
1024
1025class LocalWebSite(WebSite):
1026    """Walk the local web directory on local disk down from the root.
1027    Clean up temporary files and do other cleanup work."""
1028
1029    def __init__(self, settings):
1030        """Go to web page root and list all files and directories."""
1031
1032        # Initialize the parent class.
1033        WebSite.__init__(self, settings)
1034
1035        self.root_dir = self.get_root_dir()
1036        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1037
1038    def get_root_dir(self):
1039        """Get the name of the root directory"""
1040        return self.user_settings.local_root_dir
1041
1042    def go_to_root_dir(self, root_dir):
1043        """Go to the root directory"""
1044
1045        # Go to the root directory.
1046        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1047        os.chdir(root_dir)
1048
1049        # Read it back.
1050        self.root_dir = os.getcwd()
1051        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1052
1053    def one_level_down(self, d):
1054        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1055        such as time, date and size."""
1056
1057        directories = []
1058        files = []
1059
1060        # Change to current directory.
1061        os.chdir(d)
1062
1063        # List all subdirectories and files.
1064        dir_list = os.listdir(d)
1065
1066        if dir_list:
1067            for line in dir_list:
1068                # Add the full path prefix from the root.
1069                name = self.append_root_dir(d, line)
1070                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1071
1072                # Is it a directory or a file?
1073                if os.path.isdir(name):
1074                    directories.append(name)
1075                elif os.path.isfile(name):
1076                    # First assemble the file information of name, time/date and size into a list.
1077                    # Can index it like an array.  For example,
1078                    # file_info = 
1079                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1080                    #      1,                                           -- Enum type FileType.FILE = 1.
1081                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1082                    #      4675]                                        -- File size in bytes.
1083                    file_info = [name,
1084                                 FileType.FILE,
1085                                 self.get_file_date_time(name),
1086                                 self.get_file_size(name)]
1087                    files.append(file_info)
1088
1089        # Sort the names into order.
1090        if directories:
1091            directories.sort()
1092        if files:
1093            files.sort()
1094
1095        return directories, files
1096
1097    @staticmethod
1098    def get_file_date_time(file_name):
1099        """Get a local file time and date in UTC."""
1100
1101        file_epoch_time = os.path.getmtime(file_name)
1102        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1103        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1104        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1105        return d
1106
1107    @staticmethod
1108    def get_file_size(file_name):
1109        """Get file size in bytes."""
1110        return os.path.getsize(file_name)
1111
1112    @staticmethod
1113    def clean_up_temp_file(temp_file_name, file_name, changed):
1114        """Remove the original file, rename the temporary file name to the original name.
1115        If there are no changes, just remove the temporary file.
1116        """
1117
1118        if changed:
1119            # Remove the old file now that we have the rewritten file.
1120            try:
1121                os.remove(file_name)
1122                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1123            except OSError as detail:
1124                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1125
1126            # Rename the new file to the old file name.
1127            try:
1128                os.rename(temp_file_name, file_name)
1129                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1130            except OSError as detail:
1131                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1132        else:
1133            # No changes?  Remove the temporary file.
1134            try:
1135                os.remove(temp_file_name)
1136                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1137            except OSError as detail:
1138                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1139        return
1140
1141    @staticmethod
1142    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1143        """
1144        Process each line of a file with a list of functions.  Create a new temporary file.
1145
1146        The default list is None which means make an exact copy.
1147        """
1148
1149        # Assume no changes.
1150        changed = False
1151
1152        # Open both input and output files for processing.  Check if we cannot do it.
1153        fin = None
1154        try:
1155            fin = open(in_file_name, "r")
1156        except IOError as detail:
1157            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1158            if fin is not None:
1159                fin.close()
1160            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1161        fout = None
1162        try:
1163            fout = open(out_file_name, "w")
1164        except IOError as detail:
1165            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1166            if fout is not None:
1167                fout.close()
1168            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1169
1170        # Read each line of the file, aborting if there is a read error.
1171        try:
1172            line = fin.readline()
1173
1174            # Rewrite the next line of the file using all the rewrite functions.
1175            while line:
1176                original_line = line
1177                # If we have one or more rewrite functions...
1178                if process_line_function_list is not None:
1179                    # ...apply each rewrite functions to the line, one after the other in order.
1180                    for processLineFunction in process_line_function_list:
1181                        if processLineFunction is not None:
1182                            line = processLineFunction(line)
1183
1184                if original_line != line:
1185                    logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1186                    changed = True
1187
1188                fout.write(line)
1189
1190                line = fin.readline()
1191
1192            fin.close()
1193            fout.close()
1194        except IOError as detail:
1195            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1196            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1197
1198        if changed:
1199            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1200                          f"Changes are in temporary copy {out_file_name:s}")
1201
1202        # Return True if any lines were changed.
1203        return changed
1204
1205    def clean(self):
1206        """Scan through all directories and files in the local on disk website and clean them up."""
1207
1208        num_source_files_changed = 0
1209        num_source_files_syntax_highlighted = 0
1210
1211        logging.debug("Cleaning up the local web page.")
1212
1213        if self.directories is None or self.files is None:
1214            logging.error("Web site has no directories or files.  Aborting...")
1215            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1216
1217        for d in self.directories:
1218
1219            if self.is_temp_dir(d):
1220                # Add the full path prefix from the root.
1221                name = self.append_root_dir(self.get_root_dir(), d)
1222                try:
1223                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1224                    shutil.rmtree(name)
1225                except OSError as detail:
1226                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1227
1228        for f in self.files:
1229            # Add the full path prefix from the root.
1230            full_file_name = self.append_root_dir(
1231                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1232
1233            # Remove all temporary files.
1234            if self.is_temp_file(f):
1235                try:
1236                    logging.debug(f"Removing temp file {full_file_name:s}")
1237                    os.remove(full_file_name)
1238                except OSError as detail:
1239                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1240
1241            # Update source code files.
1242            if self.is_source_or_hypertext_file(f):
1243                changed = self.rewrite_source_file(full_file_name)
1244                if changed:
1245                    num_source_files_changed += 1
1246                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1247
1248            # Generate a  syntax highlighted code listing.  
1249            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1250            if self.is_file_to_syntax_highlight(f):
1251                # full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1252                full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name)
1253                if full_file_name_highlighted is not None:
1254                    logging.debug(f"Generated a syntax highlighted source listing file {full_file_name_highlighted:s} for the file {full_file_name:s}")
1255                else:
1256                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1257                num_source_files_syntax_highlighted += 1
1258
1259        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1260        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1261
1262    def is_temp_file(self, file_info):
1263        """Identify a file name as a temporary file"""
1264
1265        file_name = file_info[self.user_settings.FILE_NAME]
1266
1267        # Suffixes and names for temporary files be deleted.
1268        pat = self.user_settings.TEMP_FILE_SUFFIXES
1269        match = pat.search(file_name)
1270        # Remove any files containing twiddles anywhere in the name.
1271        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1272            return True
1273
1274        return False
1275
1276    def is_temp_dir(self, dir_name):
1277        """Identify a name as a temporary directory."""
1278
1279        p = self.user_settings.TEMP_DIR_SUFFIX
1280        return p.search(dir_name)
1281
1282    def is_source_or_hypertext_file(self, file_info):
1283        """ Check if the file name is a source file or a hypertext file."""
1284
1285        file_name = file_info[self.user_settings.FILE_NAME]
1286        p1 = self.user_settings.SOURCE_FILE_PATTERN
1287        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1288        if p1.search(file_name) or p2.search(file_name):
1289            return True
1290        else:
1291            return False
1292
1293    def is_file_to_syntax_highlight(self, file_info):
1294        """Check if this file type should have a syntax highlighted source listing."""
1295
1296        # Take apart the file name.
1297        full_file_name = file_info[self.user_settings.FILE_NAME]
1298        file_name = Path(full_file_name).name
1299
1300        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1301        if p.search(file_name):
1302            return True
1303        else:
1304            return False
1305
1306    def rewrite_substring(self, line):
1307        """Rewrite a line containing a pattern of your choice"""
1308
1309        # Start with the original unchanged line.
1310        rewritten_line = line
1311
1312        # Do the replacements in order from first to last.
1313        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1314            # Get the next pattern match replacement string tuple.
1315            [pat, rep_string] = match_replace_tuple
1316            # Does it match?  Then do string substitution, else leave the line unchanged.
1317            match = pat.search(rewritten_line)
1318            if match:
1319                # Now we have these cases:
1320                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1321                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1322                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1323                try:
1324                    sub = pat.sub(rep_string, rewritten_line)
1325                    rewritten_line = sub
1326                except IndexError as detail:
1327                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1328 
1329        return rewritten_line
1330
1331    def rewrite_email_address_line(self, line):
1332        """Rewrite lines containing old email addresses."""
1333
1334        # Search for the old email address.
1335        pat = self.user_settings.OLD_EMAIL_ADDRESS
1336        match = pat.search(line)
1337
1338        # Replace the old address with my new email address.
1339        if match:
1340            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1341            sub = pat.sub(new_address, line)
1342            line = sub
1343
1344        return line
1345
1346    def rewrite_version_line(self, line):
1347        """Rewrite lines containing the current version of software."""
1348
1349        # Search for the current version.
1350        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1351        match = pat.search(line)
1352
1353        # Replace with the new version.
1354        if match:
1355            # Note that since we are using raw strings leading and trailing
1356            # whitespace is ignored.
1357            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1358            sub = pat.sub(new_version, line)
1359            line = sub
1360
1361        return line
1362
1363    def rewrite_copyright_line(self, line):
1364        """Rewrite copyright lines if they are out of date."""
1365
1366        # Match the lines,
1367        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1368        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1369        # and pull out the old year and save it.
1370        pat = self.user_settings.COPYRIGHT_LINE
1371        match = pat.search(line)
1372
1373        # Found a match.
1374        if match:
1375            old_year = int(match.group('old_year'))
1376
1377            # Replace the old year with the current year.
1378            # We matched and extracted the old copyright symbol into the variable
1379            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1380            # We now insert it back by placing the special syntax \g<symbol>
1381            # into the replacement string.
1382            if old_year < WebSite.get_current_year():
1383                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1384                                str(WebSite.get_current_year())
1385                sub = pat.sub(new_copyright, line)
1386                line = sub
1387        return line
1388
1389    def rewrite_last_update_line(self, line):
1390        """Rewrite the Last Updated line if the year is out of date."""
1391
1392        # Match the last updated line and pull out the year.
1393        #      last updated 01 Jan 24.
1394        p = self.user_settings.LAST_UPDATED_LINE
1395        m = p.search(line)
1396
1397        if m:
1398            last_update_year = int(m.group('year'))
1399
1400            # Convert to four digit years.
1401            if last_update_year > 90:
1402                last_update_year += 1900
1403            else:
1404                last_update_year += 2000
1405
1406            # If the year is old, rewrite to "01 Jan <current year>".
1407            if last_update_year < WebSite.get_current_year():
1408                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1409                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1410                line = sub
1411
1412        return line
1413
1414    def rewrite_source_file(self, file_name):
1415        """Rewrite copyright lines, last updated lines, etc."""
1416        changed = False
1417
1418        # Create a new temporary file name for the rewritten file.
1419        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1420
1421        # Apply changes to all lines of the temporary file.  Apply change functions in
1422        # the sequence listed.
1423        if self.process_lines_of_file(file_name, temp_file_name,
1424                                      [self.rewrite_copyright_line,
1425                                       self.rewrite_last_update_line,
1426                                       self.rewrite_email_address_line,
1427                                       self.rewrite_substring,
1428                                       self.rewrite_version_line]):
1429            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1430            changed = True
1431
1432        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1433        self.clean_up_temp_file(temp_file_name, file_name, changed)
1434
1435        return changed
1436
1437    @staticmethod
1438    def create_syntax_highlighted_code_listing(full_file_name, **kwargs):
1439        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1440        Keep the same date/time as the original file."""
1441
1442        # kwargs is a dictionary for key, value in kwargs.items():
1443        # for key, value in kwargs.items():
1444        #    if key in kwargs:
1445        #        print( f"kwargs:" )
1446        #        print( f"  key   = |{key}|")
1447        #        print( f"  value = |{value}|" )
1448        dry_run_value = kwargs.get('dry_run') 
1449        dry_run = False
1450        if dry_run_value is not None and dry_run_value is True:
1451            dry_run = True
1452
1453        # Take apart the file name.
1454        file_name_without_extension = Path(full_file_name).stem
1455        file_extension = Path(full_file_name).suffix
1456
1457        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1458        full_file_name_highlighted = f"{full_file_name}.html"
1459
1460        # First choose the language lexer from the file name itself if there's no extension.
1461        # Dotted file names are treated as the entire file name.
1462        match file_name_without_extension:
1463            case "makefile":
1464                lexer = MakefileLexer()
1465            case ".bash_profile"|".bashrc"|".bash_logout":
1466                lexer = BashLexer()
1467            case ".vimrc":
1468                lexer = VimLexer()
1469            case ".gitignore_global" | ".gitignore" | ".gitconfig":
1470                lexer = OutputLexer() # No formatting.
1471            case _:
1472                # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1473                match file_extension:
1474                    case ".html":
1475                        lexer = HtmlLexer()
1476                    case ".css":
1477                        lexer = CssLexer()
1478                    case ".js":
1479                        lexer = JavascriptLexer()
1480                    case ".sh":
1481                        lexer = BashLexer()
1482                    case ".py":
1483                        lexer = PythonLexer()
1484                    case ".c" | ".h":
1485                        lexer = CLexer()
1486                    case ".hpp" | ".cpp":
1487                        lexer = CppLexer()
1488                    case ".lsp":
1489                        lexer = CommonLispLexer()
1490                    case ".for" | ".FOR" | ".f":
1491                        lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1492                    case ".txt" | ".dat":            # Generic data file;  no formatting.
1493                        lexer = OutputLexer()
1494                    case ".tex":
1495                        lexer = TexLexer()           # LaTeX, TeX, or related files.
1496                    case ".m":
1497                        lexer = MatlabLexer()
1498                    case ".yaml":
1499                        lexer = YamlLexer()
1500                    case _:
1501                        logging.error(f"Can't find a lexer for file {full_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1502                        return None
1503
1504        # Read the source code file into a single string.
1505        try:
1506            with open(full_file_name, 'r') as fp:
1507                source_file_string = fp.read()
1508        except OSError as detail:
1509            logging.error(f"Cannot read the source code file {full_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1510
1511        # Top level Pygments function generates the HTML for the highlighted code.
1512        highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1513
1514        # The style sheet is always the same for all languages.
1515        style_sheet = HtmlFormatter().get_style_defs('.highlight')
1516
1517        # Write out the syntax colored file.
1518        if dry_run:
1519            logging.debug(f"Dry run only:  don't generate the syntax highlighted file {full_file_name_highlighted:s}")
1520            return None
1521        else:
1522            try:
1523                # Write out the highlighted code listing in HTML with CSS style sheet attached.
1524                with open(full_file_name_highlighted, 'w') as fp:
1525                    fp.write(UserSettings.BASIC_HTML_BEGIN)
1526                    fp.write(style_sheet)
1527                    fp.write(UserSettings.BASIC_HTML_MIDDLE)
1528                    fp.write(highlighted_html_source_file_string)
1529                    fp.write(UserSettings.BASIC_HTML_END)
1530            except OSError as detail:
1531                logging.error(f"Cannot write the syntax highlighted file {full_file_name_highlighted:s}: {str(detail):s}  Aborting...")
1532
1533            # Set the listing file to the same modification and access time and date as the source file.
1534            file_stat = os.stat(full_file_name)
1535            os.utime(full_file_name_highlighted, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1536
1537            # What is the listing file time now?
1538            file_epoch_time = os.path.getmtime(full_file_name_highlighted)
1539            file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1540            d_list = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
1541
1542            # Source file and listing should be the same time.
1543            file_epoch_time = os.path.getmtime(full_file_name)
1544            file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1545            d_source = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
1546            logging.debug(f"Generated a syntax highlighted listing {full_file_name_highlighted:s} with same time as source file {full_file_name:s}.")
1547            logging.debug(f"\tsource  file time {d_source.ctime():s}")
1548            logging.debug(f"\tlisting file time {d_list.ctime():s}")
1549            return full_file_name_highlighted
1550
1551# ----------------------------------------------------------------------------
1552#   Subclass which knows about the remote web site.
1553# ----------------------------------------------------------------------------
1554
1555class RemoteWebSite(WebSite):
1556    """Walk the remote web directory on a web server down from the root.
1557       Use FTP commands:
1558           https://en.wikipedia.org/wiki/List_of_FTP_commands
1559       Use the Python ftp library:
1560           https://docs.python.org/3/library/ftplib.html
1561    """
1562
1563    def __init__(self, settings, server, user, password, ftproot):
1564        """Connect to FTP server and list all files and directories."""
1565
1566        # Root directory of FTP server.
1567        self.root_dir = ftproot
1568        logging.debug(f"Requesting remote web site ftp root dir {self.root_dir:s}")
1569
1570        # Connect to FTP server and log in.
1571        try:
1572            # self.ftp.set_debuglevel( 2 )
1573            self.ftp = ftplib.FTP(server)
1574            self.ftp.login(user, password)
1575        # Catch all exceptions with the parent class Exception:  all built-in,
1576        # non-system-exiting exceptions are derived from this class.
1577        except Exception as detail:
1578            # Extract the string message from the exception class with str().
1579            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1580            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1581        else:
1582            logging.debug("Remote web site ftp login succeeded.")
1583
1584        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1585
1586        # Initialize the superclass.
1587        WebSite.__init__(self, settings)
1588
1589    def go_to_root_dir(self, root_dir):
1590        """Go to the root directory"""
1591
1592        try:
1593            # Go to the root directory.
1594            self.ftp.cwd(root_dir)
1595            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1596
1597            # Read it back.
1598            self.root_dir = self.ftp.pwd()
1599            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1600
1601        except Exception as detail:
1602            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1603            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1604
1605    def get_root_dir(self):
1606        """Get the root directory name"""
1607
1608        return self.root_dir
1609
1610    def finish(self):
1611        """Quit remote web site"""
1612        logging.debug(f"Finished with WebSite object of class {type(self)}")
1613        try:
1614            self.ftp.quit()
1615        except Exception as detail:
1616            logging.error(f"Cannot ftp quit: {str(detail):s}")
1617
1618    def one_level_down(self, d):
1619        """List files and directories in a subdirectory using ftp"""
1620
1621        directories = []
1622        files = []
1623
1624        try:
1625            # ftp listing from current dir.
1626            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1627            self.ftp.cwd(d)
1628            dir_list = []
1629
1630            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1631            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1632            # Note the second argument requires a callback function.
1633            self.ftp.retrlines('LIST -a', dir_list.append)
1634
1635        except Exception as detail:
1636            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1637            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1638
1639        for line in dir_list:
1640            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1641
1642            # Line should at least have the minimum FTP information.
1643            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1644                # Parse the FTP LIST and put the pieces into file_info.
1645                file_info = self.parse_ftp_list(line)
1646                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1647
1648                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1649                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1650                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1651                    pass
1652                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1653                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1654                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1655                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1656                    directories.append(dirname)
1657                # For a file:  Add the full path prefix from the root to the file name.
1658                else:
1659                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1660                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1661                        {file_info[self.user_settings.FILE_NAME]:s}")
1662                    files.append(file_info)
1663            else:
1664                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1665
1666        directories.sort()
1667        files.sort()
1668
1669        return directories, files
1670
1671    def modtime(self, f):
1672        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1673        modtime = 0
1674
1675        try:
1676            response = self.ftp.sendcmd('MDTM ' + f)
1677            # MDTM returns the last modified time of the file in the format
1678            # "213 YYYYMMDDhhmmss \r\n <error-response>
1679            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1680            # error-response is 550 for info not available, and 500 or 501 if command cannot
1681            # be parsed.
1682            if response[:3] == '213':
1683                modtime = response[4:]
1684        except ftplib.error_perm as detail:
1685            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1686            modtime = 0
1687
1688        return modtime
1689
1690    def parse_ftp_list(self, line):
1691        """Parse the ftp file listing and return file name, datetime and file size.
1692
1693           An FTP LIST command will give output which looks like this for a file:
1694
1695               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1696
1697           and for a directory:
1698
1699                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1700
1701           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1702           We can have problems on New Year's Eve.  For example, the local file date/time is
1703
1704              Mon Jan  1 06:23:12 2018
1705
1706           But the remote file date/time from FTP listing doesn't show a year even though we
1707           know it was written to the server in 2017.
1708
1709               Mon Dec 31 03:02:00
1710
1711           So we default the remote file year to current year 2018 and get
1712
1713               Mon Dec 31 03:02:00 2018
1714
1715           Now we think that the remote file is newer by 363.860278 days.
1716        """
1717
1718        # Find out if we've a directory or a file.
1719        if line[0] == 'd':
1720            dir_or_file = FileType.DIRECTORY
1721        else:
1722            dir_or_file = FileType.FILE
1723
1724        pattern = self.user_settings.FTP_LISTING
1725
1726        # Sensible defaults.
1727        filesize = 0
1728        filename = ""
1729        # Default the time to midnight.
1730        hour = 0
1731        minute = 0
1732        seconds = 0
1733        # Default the date to Jan 1 of the current year.
1734        month = 1
1735        day = 1
1736        year = WebSite.get_current_year()
1737
1738        # Extract time and date from the ftp listing.
1739        match = pattern.search(line)
1740
1741        if match:
1742            filesize = int(match.group('bytes'))
1743            month = self.user_settings.monthToNumber[match.group('mon')]
1744            day = int(match.group('day'))
1745
1746            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1747            if match.group('year'):
1748                year = int(match.group('year'))
1749                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1750            else:
1751                # Remote file listing omits the year.  Default the year to the current UTC time year.
1752                # That may be incorrect (see comments above).
1753                year = WebSite.get_current_year()
1754                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1755
1756            # If the FTP listing has the hour and minute, it will omit the year.
1757            if match.group('hour') and match.group('min'):
1758                hour = int(match.group('hour'))
1759                minute = int(match.group('min'))
1760                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1761
1762            filename = match.group('filename')
1763
1764        # Package up the time and date nicely.
1765        # Note if we didn't get any matches, we'll default the remote date and
1766        # time to Jan 1 midnight of the current year.
1767        d = datetime.datetime(year, month, day, hour, minute, seconds)
1768
1769        return [filename, dir_or_file, d, filesize]
1770
1771# ----------------------------------------------------------------------------
1772#  Class for synchronizing local and remote web sites.
1773# ----------------------------------------------------------------------------
1774
1775class UpdateWeb(object):
1776    """Given previously scanned local and remote directories, update the remote website."""
1777
1778    def __init__(
1779            self,
1780            settings,
1781            server,
1782            user,
1783            password,
1784            ftproot,
1785            file_size_limit,
1786            local_directory_list,
1787            local_file_info,
1788            remote_directory_list,
1789            remote_file_info):
1790        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1791
1792        self.user_settings = settings
1793
1794        self.local_files_list = []
1795        self.remote_files_list = []
1796        self.local_file_to_size = {}
1797        self.local_file_to_date_time = {}
1798        self.remote_file_to_date_time = {}
1799        self.local_only_dirs = []
1800        self.local_only_files = []
1801        self.remote_only_dirs = []
1802        self.remote_only_files = []
1803        self.common_files = []
1804
1805        # Connect to FTP server and log in.
1806        try:
1807            self.ftp = ftplib.FTP(server)
1808            self.ftp.login(user, password)
1809        except Exception as detail:
1810            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1811            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1812        else:
1813            logging.debug("ftp login succeeded.")
1814
1815        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1816
1817        # Local root directory.
1818        self.local_root_dir = self.user_settings.local_root_dir
1819        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1820
1821        # Root directory of FTP server.
1822        self.ftp_root_dir = ftproot
1823        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1824
1825        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1826        self.file_size_limit = int(file_size_limit) * 1024
1827
1828        try:
1829            # Go to the root directory.
1830            self.ftp.cwd(self.ftp_root_dir)
1831
1832            # Read it back.
1833            self.ftp_root_dir = self.ftp.pwd()
1834            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1835        except Exception as detail:
1836            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1837
1838        self.local_directory_list = local_directory_list
1839        self.remote_directory_list = remote_directory_list
1840        self.local_file_info = local_file_info
1841        self.remote_file_info = remote_file_info
1842
1843    def append_root_dir(self, root_dir, name):
1844        """Append the root directory to a path"""
1845
1846        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1847        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1848        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1849            return root_dir + name
1850        else:
1851            return root_dir + "/" + name
1852
1853    def file_info(self):
1854        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1855        dates, times, and sizes."""
1856
1857        # Extract file names.
1858        self.local_files_list = [
1859            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1860        self.remote_files_list = [
1861            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1862
1863        # Use a dictionary comprehension to create key/value pairs, 
1864        #     (file name, file date/time)
1865        # which map file names onto date/time.
1866        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1867        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1868
1869        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1870        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1871
1872    def update(self):
1873        """Scan through the local website, cleaning it up.
1874        Go to remote website on my servers and synchronize all files."""
1875
1876        self.file_info()
1877
1878        # Which files and directories are different.
1879        self.changes()
1880
1881        # Synchronize with the local web site.
1882        self.synchronize()
1883
1884    def changes(self):
1885        """Find the set of different directories and files on local and remote."""
1886
1887        # Add all directories which are only on local to the dictionary.
1888        dir_to_type = {
1889            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1890
1891        # Scan through all remote directories, adding those only on remote or
1892        # on both.
1893        for d in self.remote_directory_list:
1894            if d in dir_to_type:
1895                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1896            else:
1897                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1898
1899        # Add all files which are only on local to the dictionary.
1900        file_to_type = {
1901            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1902
1903        # Scan through all remote files, adding those only on remote or on
1904        # both.
1905        for f in self.remote_files_list:
1906            if f in file_to_type:
1907                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1908            else:
1909                file_to_type[f] = FileType.ON_REMOTE_ONLY
1910
1911        logging.debug("Raw dictionary dump of directories")
1912        for k, v in dir_to_type.items():
1913            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1914
1915        logging.debug("Raw dictionary dump of files")
1916        for k, v in file_to_type.items():
1917            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1918
1919        # List of directories only on local.  Keep the ordering.
1920        self.local_only_dirs = [
1921            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1922
1923        # List of directories only on remote.  Keep the ordering.
1924        self.remote_only_dirs = [
1925            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1926
1927        # We don't care about common directories, only their changed files, if
1928        # any.
1929
1930        # List of files only on local.  Keep the ordering.
1931        self.local_only_files = [
1932            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1933
1934        # List of files only on remote.  Keep the ordering.
1935        self.remote_only_files = [
1936            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1937
1938        # List of common files on both local and remote.  Keep the ordering.
1939        self.common_files = [
1940            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1941
1942        logging.debug("*** Directories only on local ******************************")
1943        for d in self.local_only_dirs:
1944            logging.debug(f"\t {d:s}")
1945
1946        logging.debug("*** Directories only on remote ******************************")
1947        for d in self.remote_only_dirs:
1948            logging.debug(f"\t {d:s}")
1949
1950        logging.debug("*** Files only on local ******************************")
1951        for f in self.local_only_files:
1952            logging.debug(f"\t {f:s}")
1953
1954        logging.debug("*** Files only on remote ******************************")
1955        for f in self.remote_only_files:
1956            logging.debug(f"\t {f:s}")
1957
1958        logging.debug("*** Common files ******************************")
1959        for f in self.common_files:
1960            logging.debug(f"name {f:s}")
1961            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1962            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1963
1964    def synchronize(self):
1965        """Synchronize files and subdirectories in the remote directory with the local directory."""
1966
1967        # If we have the same files in local and remote, compare their times
1968        # and dates.
1969        for f in self.common_files:
1970            local_file_time = self.local_file_to_date_time[f]
1971            remote_file_time = self.remote_file_to_date_time[f]
1972
1973            # What's the time difference?
1974            time_delta = remote_file_time - local_file_time
1975            # How much difference, either earlier or later?
1976            seconds_different = abs(time_delta.total_seconds())
1977            minutes_different = seconds_different / 60.0
1978            hours_different = minutes_different / 60.0
1979            days_different = hours_different / 24.0
1980
1981            # Assume no upload initially.
1982            upload_to_host = False
1983
1984            logging.debug(f"Common file:  {f:s}.")
1985
1986            # Remote file time is newer.
1987            # Allow 200 characters
1988            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1989
1990            if remote_file_time > local_file_time:
1991                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
1992                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1993                    logging.error(f"Remote file {f:s} is MUCH newer[more than {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Upload the file to be safe.")
1994                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
1995                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
1996
1997                    # Set the local file to the current time.
1998                    full_file_name = self.append_root_dir(
1999                        self.local_root_dir, f)
2000                    if os.path.exists(full_file_name):
2001                        # Change the access and modify times of the file to the current time.
2002                        os.utime(full_file_name, None)
2003                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2004
2005                    upload_to_host = True
2006                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
2007                else:
2008                    logging.error(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2009                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2010                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2011                    upload_to_host = False
2012
2013            # Local file time is newer.
2014            elif local_file_time > remote_file_time:
2015                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
2016                if days_different >= self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2017                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer [more than {self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD} days] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Preparing for upload.")
2018                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2019                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2020                    upload_to_host = True
2021                else:
2022                    logging.debug(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2023                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2024                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2025                    upload_to_host = False
2026
2027            # Cancel the upload if the file is too big for the server.
2028            size = self.local_file_to_size[f]
2029            if size >= self.file_size_limit:
2030                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2031                upload_to_host = False
2032
2033            # Finally do the file upload.
2034            if upload_to_host:
2035                logging.debug(f"Uploading changed file {f:s}")
2036                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2037                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
2038                self.upload(f)
2039
2040        # Remote directory is not in local.  Delete it.
2041        for d in self.remote_only_dirs:
2042            logging.debug(f"Deleting remote only directory {d:s}")
2043            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2044            self.rmdir(d)
2045
2046        # Local directory missing on remote.  Create it.
2047        # Due to breadth first order scan, we'll create parent directories
2048        # before child directories.
2049        for d in self.local_only_dirs:
2050            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2051            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2052            self.mkdir(d)
2053
2054        # Local file missing on remote.  Upload it.
2055        for f in self.local_only_files:
2056            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2057
2058            #  But cancel the upload if the file is too big for the server.
2059            size = self.local_file_to_size[f]
2060            if size >= self.file_size_limit:
2061                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2062                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2063            else:
2064                logging.debug(f"Uploading new file {f:s}")
2065                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2066                self.upload(f)
2067
2068        # Remote contains a file not present on the local.  Delete the file.
2069        for f in self.remote_only_files:
2070            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2071            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2072            self.del_remote(f)
2073
2074    def del_remote(self, relative_file_path):
2075        """Delete a file using ftp."""
2076
2077        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2078
2079        # Parse the relative file path into file name and relative directory.
2080        relative_dir, file_name = os.path.split(relative_file_path)
2081        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2082        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2083        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2084
2085        try:
2086            # Add the remote root path and go to the remote directory.
2087            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2088            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2089            self.ftp.cwd(remote_dir)
2090        except Exception as detail:
2091            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2092        else:
2093            try:
2094                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2095
2096                # Don't remove zero length file names.
2097                if len(file_name) > 0:
2098                    self.ftp.delete(file_name)
2099                else:
2100                    logging.warning(
2101                        "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2102            except Exception as detail:
2103                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2104
2105    def mkdir(self, relative_dir):
2106        """Create new remote directory using ftp."""
2107
2108        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2109        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2110
2111        # Parse the relative dir path into prefix dir and suffix dir.
2112        path, d = os.path.split(relative_dir)
2113        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2114        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2115
2116        try:
2117            # Add the remote root path and go to the remote directory.
2118            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2119            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2120            self.ftp.cwd(remote_dir)
2121        except Exception as detail:
2122            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2123        else:
2124            try:
2125                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2126                self.ftp.mkd(d)
2127            except Exception as detail:
2128                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2129
2130    def rmdir(self, relative_dir):
2131        """Delete an empty directory using ftp."""
2132
2133        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2134        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2135
2136        # Parse the relative dir path into prefix dir and suffix dir.
2137        path, d = os.path.split(relative_dir)
2138        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2139        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2140
2141        try:
2142            # Add the remote root path and go to the remote directory.
2143            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2144            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2145            self.ftp.cwd(remote_dir)
2146        except Exception as detail:
2147            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2148        else:
2149            try:
2150                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2151                self.ftp.rmd(d)
2152            except Exception as detail:
2153                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2154
2155    def download(self, relative_file_path):
2156        """Download a binary file using ftp."""
2157
2158        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2159
2160        # Parse the relative file path into file name and relative directory.
2161        relative_dir, file_name = os.path.split(relative_file_path)
2162        logging.debug(f"download():  \tfile name: {file_name:s}")
2163        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2164        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2165
2166        # Add the remote root path and go to the remote directory.
2167        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2168        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2169
2170        try:
2171            self.ftp.cwd(remote_dir)
2172        except Exception as detail:
2173            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2174        else:
2175            # Add the local root path to get the local file name.
2176            # Open local binary file to write into.
2177            local_file_name = self.append_root_dir(
2178                self.local_root_dir, relative_file_path)
2179            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2180            try:
2181                f = open(local_file_name, "wb")
2182                try:
2183                    # Calls f.write() on each block of the binary file.
2184                    # ftp.retrbinary( "RETR " + file_name, f.write )
2185                    pass
2186                except Exception as detail:
2187                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2188                f.close()
2189            except IOError as detail:
2190                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2191
2192    def upload(self, relative_file_path):
2193        """Upload  a binary file using ftp."""
2194
2195        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2196
2197        # Parse the relative file path into file name and relative directory.
2198        relative_dir, file_name = os.path.split(relative_file_path)
2199        logging.debug(f"upload():  \tfile name: {file_name:s}")
2200        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2201        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2202
2203        # Add the remote root path and go to the remote directory.
2204        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2205        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2206
2207        try:
2208            self.ftp.cwd(remote_dir)
2209        except Exception as detail:
2210            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2211        else:
2212            # Add the local root path to get the local file name.
2213            # Open local binary file to read from.
2214            local_file_name = self.append_root_dir(
2215                self.local_root_dir, relative_file_path)
2216            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2217
2218            try:
2219                f = open(local_file_name, "rb")
2220                try:
2221                    # f.read() is called on each block of the binary file until
2222                    # EOF.
2223                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2224                    self.ftp.storbinary("STOR " + file_name, f)
2225                except Exception as detail:
2226                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2227                f.close()
2228            except IOError as detail:
2229                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2230
2231    def finish(self):
2232        """Log out of an ftp session"""
2233        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2234        try:
2235            self.ftp.quit()
2236        except Exception as detail:
2237            logging.error(f"Cannot ftp quit because {str(detail):s}")
2238
2239# ----------------------------------------------------------------------------
2240#  Main function
2241# ----------------------------------------------------------------------------
2242
2243def main(raw_args=None):
2244    """Main program.  Clean up and update my website."""
2245
2246    # Print the obligatory legal notice.
2247    print("""
2248    updateweb Version 7.1 - A Python utility program which maintains my web site.
2249    Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
2250
2251    It deletes temporary files, rewrites old copyright lines and email address
2252    lines in source files, then synchronizes all changes to my web sites.
2253
2254    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2255    GNU General Public License.  This is free software, and you are welcome
2256    to redistribute it under certain conditions; see the GNU General Public
2257    License for details.
2258    """)
2259
2260    # Put ALL the main code into a try block!
2261    try:
2262        # ---------------------------------------------------------------------
2263        #  Load default settings and start logging.
2264        # ---------------------------------------------------------------------
2265
2266        # Default user settings.
2267        user_settings = UserSettings()
2268
2269        print( f"Running main( {raw_args} ) Python version\
2270               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2271               local web directory\
2272               {user_settings.local_root_dir}\n")
2273        # Get command line options such as --verbose.  Pass them back as flags in
2274        # user_settings.
2275        CommandLineSettings(user_settings, raw_args)
2276
2277        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2278        if user_settings.UNITTEST:
2279            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2280            unittest.TextTestRunner(verbosity=2).run(suite)
2281            # We are done!
2282            print("  ...done!", flush=True)
2283            return
2284
2285        # Start logging to file.  Verbose turns on logging for
2286        # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2287        # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2288        if user_settings.VERBOSE:
2289            loglevel = logging.DEBUG
2290        else:
2291            loglevel = logging.WARNING
2292
2293        # Pick the log file name on the host.
2294        if user_settings.CLEAN:
2295            user_settings.LOGFILENAME = "/private/logLocal.txt"
2296        else:
2297            user_settings.LOGFILENAME = "/private/logRemote.txt"
2298
2299        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2300        if not user_settings.MATHJAX:
2301            user_settings.DIR_TO_SKIP += "|mathjax"
2302        else:
2303            print(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files...  ", end='', flush=True)
2304            print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ", end='', flush=True)
2305            logging.debug(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files.")
2306            logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...")
2307
2308        logging.basicConfig(
2309            level=loglevel,
2310            format='%(asctime)s %(levelname)-8s %(message)s',
2311            datefmt='%a, %d %b %Y %H:%M:%S',
2312            filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2313            filemode='w')
2314
2315        logging.debug("********** Begin logging") 
2316
2317        # ---------------------------------------------------------------------
2318        #  Scan the local website, finding out all files and directories.
2319        # ---------------------------------------------------------------------
2320
2321        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2322        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2323        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2324
2325        local = LocalWebSite(user_settings)
2326        local.scan()
2327
2328        # ---------------------------------------------------------------------
2329        # Clean up local website.
2330        # ---------------------------------------------------------------------
2331
2332        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2333        print("Cleaning local web site...  ", end='', flush=True)
2334        logging.debug("========================== Cleaning the local web site")
2335        local.clean()
2336
2337        # We are done with the first scan of the local web site and will dispose of it.
2338        local.finish()
2339        del local
2340
2341        # ---------------------------------------------------------------------
2342        #  Rescan the local website since there will be changes to source
2343        #  files from the clean up stage.
2344        # ---------------------------------------------------------------------
2345
2346        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2347        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2348
2349        local = LocalWebSite(user_settings)
2350
2351        local.scan()
2352
2353        # ---------------------------------------------------------------------
2354        #  List all the local directories and files and their sizes.
2355        # ---------------------------------------------------------------------
2356
2357        # Local website directories.
2358        local_directory_list = local.directories
2359        logging.debug("********** List of all the Local Directories")
2360        for d in local_directory_list:
2361            logging.debug(f"\t {d:s}")
2362
2363        # Generate lists of the local website filenames only, and their sizes in bytes.
2364        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2365        total_number_of_files = len( local_files_name_size_pairs )
2366        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2367        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2368
2369        # Local website filenames only, and their dates and times.
2370        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2371        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2372        for file_datetime_pair in local_file_datetime_pairs:
2373            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2374
2375        # Total number of bytes in the local files.
2376        total_number_of_bytes = 0
2377        for file_size_pair in local_files_name_size_pairs:
2378            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2379            total_number_of_bytes += file_size_pair[1]
2380        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2381
2382        local.finish()
2383
2384        if user_settings.CLEAN:
2385            logging.debug("========================== Done with local file and directory cleanup...")
2386            del local
2387            print("...done!", flush=True)
2388            return
2389
2390        # ---------------------------------------------------------------------
2391        #  Scan the remote hosted web site.
2392        # ---------------------------------------------------------------------
2393
2394        print("Scanning remote web site...", end='', flush=True)
2395        logging.debug("========================== Scanning the remote web site...")
2396
2397        # Pick which website to update.
2398        logging.debug("Connecting to primary remote site.")
2399        remote = RemoteWebSite(user_settings,
2400                               user_settings.SERVER_NAME,
2401                               user_settings.USER_NAME,
2402                               user_settings.PASSWORD_NAME,
2403                               user_settings.FTP_ROOT_NAME)
2404        remote.scan()
2405        remote.finish()
2406
2407        # ---------------------------------------------------------------------
2408        #  List all the remote server directories and files and their sizes.
2409        # ---------------------------------------------------------------------
2410
2411        remote_directory_list = remote.directories
2412        logging.debug("********** Remote Directories")
2413        for d in remote_directory_list:
2414            logging.debug(f"\t {d:s}")
2415
2416        # Local website filenames only, and their sizes in bytes.
2417        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2418        total_number_of_files = len( remote_files_name_size_list )
2419        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2420        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2421        total_number_of_bytes = 0
2422        for file_size in remote_files_name_size_list:
2423            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2424            total_number_of_bytes += file_size[1]
2425        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2426
2427        # ---------------------------------------------------------------------
2428        # Synchronize the local and remote web sites.
2429        # ---------------------------------------------------------------------
2430
2431        print("Synchronizing remote and local web sites...", end='', flush=True)
2432        logging.debug("========================= Synchronizing remote and local web sites...")
2433
2434        # Primary website.
2435        logging.debug("Connecting to primary remote site for synchronization.")
2436        sync = UpdateWeb(user_settings,
2437                         user_settings.SERVER_NAME,
2438                         user_settings.USER_NAME,
2439                         user_settings.PASSWORD_NAME,
2440                         user_settings.FTP_ROOT_NAME,
2441                         user_settings.FILE_SIZE_LIMIT_NAME,
2442                         local.directories,
2443                         local.files,
2444                         remote.directories,
2445                         remote.files)
2446
2447        sync.update()
2448        sync.finish()
2449
2450        del sync
2451        del remote
2452        del local
2453        print("...done!", flush=True)
2454
2455    except UpdateWebException as detail:
2456        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2457
2458    except RecursionError as detail:
2459        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2460
2461if __name__ == '__main__':
2462    """Python executes all code in this file.  Finally, we come here.  
2463
2464    * If we are executing this file as a standalone Python script, 
2465      the name of the current module is set to __main__ and thus we'll call the main() function.
2466
2467    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2468
2469        import updateweb
2470        updateweb.main(["--test"])"""
2471
2472    main()