1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.1 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import shutil
  81from pathlib import Path
  82
  83# Regular expressions
  84import re
  85
  86# FTP stuff
  87import ftplib
  88
  89# Date and time
  90import time
  91import stat
  92import datetime
  93
  94# Logging
  95import logging
  96
  97# Unit testing
  98import unittest
  99
 100# Enumerated types (v3.4)
 101from enum import Enum
 102from typing import List, Any
 103
 104# YAML configuration files (a superset of JSON!)
 105import yaml 
 106# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 107try:
 108    from yaml import CLoader as Loader
 109except ImportError:
 110    from yaml import Loader
 111
 112# Python syntax highlighter.  See https://pygments.org
 113from pygments import highlight
 114from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 115from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 116from pygments.formatters import HtmlFormatter
 117
 118
 119# ----------------------------------------------------------------------------
 120#  Custom Top Level Exceptions.
 121# ----------------------------------------------------------------------------
 122
 123class UpdateWebException(Exception):
 124    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 125       Derive from Exception as recommended by Python manual"""
 126    pass
 127
 128# ----------------------------------------------------------------------------
 129#  User settings.
 130# ----------------------------------------------------------------------------
 131
 132class TreeWalkSettings(Enum):
 133    """Enum types for how to walk the directory tree."""
 134    BREADTH_FIRST_SEARCH = 1
 135    DEPTH_FIRST_SEARCH = 2
 136
 137class FileType(Enum):
 138    """'Enum' types for properties of directories and files."""
 139    DIRECTORY = 0
 140    FILE = 1
 141    ON_LOCAL_ONLY = 2
 142    ON_REMOTE_ONLY = 3
 143    ON_BOTH_LOCAL_AND_REMOTE = 4
 144
 145class UserSettings:
 146    """Megatons of user selectable settings."""
 147    # Logging control.
 148    LOGFILENAME = ""
 149    VERBOSE = False  # Verbose mode.  Prints out everything.
 150    CLEAN = False  # Clean the local website only.
 151    UNITTEST = False  # Run a unit test of a function.
 152    MATHJAX = False  # Process and upload MathJax files to server.
 153
 154    # When diving into the MathJax directory, web walking the deep directories
 155    # may exceed Python's default recursion limit of 1000.
 156    RECURSION_DEPTH = 5000
 157    sys.setrecursionlimit(RECURSION_DEPTH)
 158
 159    # Fields in the file information (file_info) structure.
 160    # For example, file_info = 
 161    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 162    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 163    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 164    #      4675]                                        -- File size in bytes.
 165    FILE_NAME = 0
 166    FILE_TYPE = 1
 167    FILE_DATE_TIME = 2
 168    FILE_SIZE = 3
 169
 170    # Server settings.
 171    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 172    SERVER_NAME = None
 173    USER_NAME = None
 174    PASSWORD_NAME = None
 175    FTP_ROOT_NAME = None
 176    FILE_SIZE_LIMIT_NAME = None
 177
 178    # Map month names onto numbers.
 179    monthToNumber = {
 180        'Jan': 1,
 181        'Feb': 2,
 182        'Mar': 3,
 183        'Apr': 4,
 184        'May': 5,
 185        'Jun': 6,
 186        'Jul': 7,
 187        'Aug': 8,
 188        'Sep': 9,
 189        'Oct': 10,
 190        'Nov': 11,
 191        'Dec': 12}
 192
 193    # List of directories to skip over when processing or uploading the web page.
 194    # Some are private but most are dir of temporary files.
 195    # They will be listed as WARNING in the log.
 196    # Examples:
 197    #     My private admin settings directory.
 198    #     Git or SVN local admin directories.
 199    #     Compile build directories fromXCode.
 200    #     PyCharm build directories.
 201    #     Python cache directories.
 202    #     Jupyter checkpoint directories.
 203    #     XCode temporary file crap.
 204    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 205
 206    # List of files to skip when processing or uploading to the web page.
 207    # They will be listed as WARNING in the log.
 208    # Examples:
 209    #     MathJax yml file.
 210    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 211    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 212
 213    # File extension for text files.
 214    TEXT_FILE_EXT = ".txt"
 215
 216    # Suffixes for temporary files which will be deleted during the cleanup
 217    # phase.
 218    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 219        \.                           # Match the dot in the file name.
 220                                     # Now begin matching the file name suffix.
 221                                     # (?: non-capturing match for the regex inside the parentheses,
 222                                     #   i.e. matching string cannot be retrieved later.
 223                                     # Now match any of the following file extensions:
 224        (?: o   | obj | lib | exe |  #     Object files generated by C, C++, etc compilers
 225                              pyc |  #     Object file generated by the Python compiler
 226                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 227            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 228            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 229            res | aps | dep | db  |  #     Temp files from VC++ compiler
 230                              jbf |  #     Paintshop Pro
 231                      class | jar |  #     Java compiler
 232                              log |  #     WS_FTP
 233                              fas |  #     CLISP compiler
 234                        swp | swo |  #     Vim editor
 235                              aux |  #     TeX auxilliary files.
 236          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 237                       _\.Trashes |  #     macOS recycle bin
 238        gdb_history)                 #     GDB history
 239        $                            #     Now we should see only the end of line.
 240        """
 241
 242    # Special case:  Vim temporary files contain a twiddle anywhere in the
 243    # name.
 244    VIM_TEMP_FILE_EXT = "~"
 245
 246    # Suffixes for temporary directories which should be deleted during the
 247    # cleanup phase.
 248    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 249        (?: Debug | Release |        # C++ compiler
 250           ipch   | \.vs    |        # Temp directories from VC++ compiler
 251        \.Trashes | \.Trash)         # macOS recycle bin
 252        $
 253        """
 254
 255    # File extension for an internally created temporary file.
 256    TEMP_FILE_EXT = ".new"
 257
 258    # Identify source file types.
 259    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 260        (\.                        # Match the filename suffix after the .
 261            (?: html | htm |       # HTML hypertext
 262                css)               # CSS style sheet
 263        $)                         # End of line.
 264    """
 265
 266    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 267        (?: makefile$ |             # Any file called makefile is a source file.
 268                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 269          .vimrc$ |                 # Vim script
 270          (.bashrc$ |               # Bash configuration files.
 271           .bash_profile$ |
 272           .bash_logout$) 
 273          |
 274          (.gitignore$ |             # Git configuration files.
 275           .gitignore_global$ | 
 276           .gitconfig$)
 277          |
 278          (\.                       # Match the filename suffix after the .
 279                                    # Now match any of these suffixes:
 280             (?: 
 281                  c | cpp | h | hpp |   #     C++ and C
 282                  js |                  #     Javascript
 283                  py |                  #     Python
 284                  lsp |                 #     LISP
 285                  m  |                  #     MATLAB
 286                  FOR | for | f |       #     FORTRAN
 287                  yaml |                #     YAML = JSON superset
 288                  tex |                 #     LaTeX
 289                  txt | dat |           #     Data files
 290                  sh)                   #     Bash
 291             $)                         # End of line.
 292         )
 293         """
 294
 295    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 296    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 297        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 298            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 299            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 300            ^StyleSheet\.css$ )     # I want to list my style sheet.
 301        """
 302
 303    # Files for which we want to generate a syntax highlighted source code listing.
 304    # Uses an f-string combined with a raw-string.
 305    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 306        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 307            {SOURCE_FILE_PATTERN} )
 308        """
 309
 310    # Update my email address.
 311    # This is tricky:  Prevent matching and updating the name within in this
 312    # Python source file by using the character class brackets.
 313    OLD_EMAIL_ADDRESS = r"""
 314        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 315        """
 316    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 317
 318    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 319    # Read patterns and strings from the updateweb.yaml file.
 320    STRING_REPLACEMENT_LIST = []
 321    # Pairs of test strings and their correct match/replacements.
 322    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 323
 324    # Change all old software version lines of the form
 325    #      Primpoly Version nnnn.nnnn
 326    # to the new software version.
 327    # Note that since we are using raw strings leading and trailing whitespace
 328    # is ignored in both pattern and replacement.
 329    CURRENT_SOFTWARE_VERSION = r"""
 330        Primpoly
 331        \s+
 332        Version
 333        \s+
 334        ([0-9]+)   # The two part version number NNN.nnn
 335        \.
 336        ([0-9]+)
 337        """
 338    NEW_SOFTWARE_VERSION = r"""
 339        Primpoly Version 16.3
 340        """
 341
 342    # Match a copyright line.  Then extract the copyright symbol which can be
 343    # ascii (C) or HTML &copy; and extract the old year.
 344    TWO_DIGIT_YEAR_FORMAT = "%02d"
 345    COPYRIGHT_LINE = r"""
 346        Copyright                       # Copyright.
 347        \s+                             # One or more spaces.
 348        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 349        \D+                             # Any non-digits.
 350        (?P<old_year>[0-9]+)            # Match and extract the old copyright year,
 351                                        # then place it into variable 'old_year'
 352        -                               # to
 353        ([0-9]+)                        # New copyright year.
 354        """
 355
 356    # Match a line containing the words,
 357    #    last updated YY
 358    # and extract the two digit year YY.
 359    LAST_UPDATED_LINE = r"""
 360        last\s+         # Match the words "last updated"
 361        updated\s+
 362        \d+             # Day number
 363        \s+             # One or more blanks or tab(
 364        [A-Za-z]+       # Month
 365        \s+             # One or more blanks or tabs
 366        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 367        """
 368
 369    # Web server root directory.
 370    DEFAULT_ROOT_DIR = "/"
 371
 372    # The ftp listing occasionally shows a date newer than the actual date. 
 373    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 374    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 375    # Upload the file to be safe.
 376    # How to see the time differences from the log if they are large:
 377    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 378    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 379    # How to see the time differences from the log if they are small and we wait and NOT upload:
 380    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 381    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 382    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 383    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 384    DAYS_NEWER_FOR_REMOTE_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD
 385
 386    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 387    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 388    DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD = (1.0 / 24.0) * (1.0 / 60.0) * MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD
 389
 390    # An ftp list command line should be at least this many chars, or we'll
 391    # suspect and error.
 392    MIN_FTP_LINE_LENGTH = 7
 393
 394    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 395    # ftp listings are generally similar to UNIX ls -l listings.
 396    #
 397    # Some examples:
 398    #
 399    # (1) Freeservers ftp listing,
 400    #
 401    #          0        1   2                3           4    5   6   7      8
 402    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 403    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 404    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 405    #
 406    # (2) atspace ftp listing,
 407    #
 408    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 409    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 410    #
 411    FTP_LISTING = r"""
 412        [drwx-]+            # Unix type file mode.
 413        \s+                 # One or more blanks or tabs.
 414        \d+                 # Number of links.
 415        \s+
 416        \w+                 # Owner.
 417        \s+
 418        \w+                 # Group.
 419        \s+
 420        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 421        \s+
 422        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 423        \s+
 424        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 425        \s+
 426        (
 427            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 428            :
 429            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 430            |
 431            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 432                            # extract the year instead.
 433        )
 434        \s+
 435        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 436                                                    # and funny characters.  We must escape some of
 437                                                    # these characters with a backslash, \.
 438        """
 439
 440    # HTML header up to the style sheet.
 441    BASIC_HTML_BEGIN = \
 442        """
 443        <!DOCTYPE html>
 444        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 445        
 446        <head>
 447            <!-- This page uses Unicode characters. -->
 448            <meta charset="utf-8">
 449        
 450            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 451            <meta name="viewport" content="width=device-width, initial-scale=1">
 452        
 453            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 454            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 455        
 456            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 457            <meta name="description" content="Syntax Colored Source Code Listing">
 458        
 459            <!-- Some content management software uses the author's name. -->
 460            <meta name="author" content="Sean Erik O'Connor">
 461        
 462            <meta name="copyright" content="Copyright (C) 1986-2024 by Sean Erik O'Connor.  All Rights Reserved.">   
 463        
 464            <!-- Begin style sheet insertion -->
 465            <style>
 466                /* Default settings for all my main web pages. */
 467                body
 468                {
 469                    /* A wide sans-serif font is more readable on the web. */
 470                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 471        
 472                    /* Set the body font size a little smaller than the user's default browser setting. */
 473                    font-size:              0.8em ; 
 474        
 475                    /* Black text is easier to read. */
 476                    color:                  black ;
 477        
 478                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 479                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 480                    line-height:            1.7 ;
 481                }
 482        
 483                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 484        """
 485
 486    # After the style sheet and up to the start of the article in the body.
 487    BASIC_HTML_MIDDLE = \
 488        """
 489            </style>
 490        </head>
 491        
 492        <body>
 493            <article class="content">
 494        """
 495
 496    # After the source code listing, finish the article, body and html document.
 497    BASIC_HTML_END = \
 498        """
 499            </article>
 500        </body>
 501        
 502        </html>
 503        """
 504
 505    def __init__(self):
 506        """Set up the user settings."""
 507
 508        self.local_root_dir = ""
 509
 510        # Import the user settings from the parameter file.
 511        self.get_local_root_dir()
 512        self.get_server_settings()
 513
 514        self.precompile_regular_expressions()
 515
 516    def get_server_settings(self):
 517        """
 518        Read web account private settings from a secret offline parameter file.
 519        These also hold patterns to match and replace in all of our source pages.
 520        """
 521
 522        # Private file which contains my account settings.
 523        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 524        # Recommended by
 525        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 526        try:
 527            stream = open(settings_file_name, "r")
 528        except OSError as detail:
 529            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 530            # Rethrow the exception higher.
 531            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 532        # Read all the YAML documents in the file.
 533        yaml_contents = yaml.load_all(stream, Loader)
 534        yaml_document_list: list[Any] = []
 535        for yaml_doc in yaml_contents:
 536            yaml_document_list.append(yaml_doc)
 537        num_yaml_docs = len(yaml_document_list)
 538        if num_yaml_docs != 2:
 539            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 540            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 541
 542        # Load all the server settings.
 543        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 544        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 545        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 546        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 547        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 548
 549        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 550        self.STRING_REPLACEMENT_LIST = []
 551        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 552        for pat_rep in pat_rep_yaml_list:
 553            # Fetch the regular expression and compile it for speed.
 554            verbose_regex = pat_rep['pattern']
 555            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 556            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 557            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 558            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 559
 560        # Load the test and verify strings.
 561        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 562        for test_verify_string in test_verify_strings_list:
 563            test_string = test_verify_string['test_string'].strip().lstrip()
 564            verify_string = test_verify_string['verify_string'].strip().lstrip()
 565            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 566
 567        print("  ...done!", flush=True)
 568        return
 569
 570    def get_local_root_dir(self):
 571        """Get the local website root directory on this platform."""
 572
 573        # Each platform has a definite directory for the web page.
 574        local_web_dir_path = "/Desktop/Sean/WebSite"
 575
 576        if sys.platform.startswith('darwin'):
 577            self.local_root_dir = str(Path.home()) + local_web_dir_path
 578        # My Cyperpower PC running Ubuntu Linux.
 579        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 580            self.local_root_dir = str(Path.home()) + local_web_dir_path
 581        return
 582
 583    def precompile_regular_expressions(self):
 584        """For speed precompile the regular expression search patterns."""
 585        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 586        self.CURRENT_SOFTWARE_VERSION  = re.compile(self.CURRENT_SOFTWARE_VERSION,  re.VERBOSE | re.IGNORECASE)
 587        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 588        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 589        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 590        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 591        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 592        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 593        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 594        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 595
 596# ----------------------------------------------------------------------------
 597#  Unit test individual functions.
 598# ----------------------------------------------------------------------------
 599
 600class UnitTest(unittest.TestCase):
 601    """Initialize the UnitTest class."""
 602    def setUp(self):
 603        self.user_settings = UserSettings()
 604        self.user_settings.get_local_root_dir()
 605
 606    def tearDown(self):
 607        """Clean up the UnitTest class."""
 608        self.user_settings = None
 609
 610    def test_copyright_updating(self):
 611        """Test copyright line updating to the current year."""
 612        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 613        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 614        line_after_update_actual = "Copyright (C) 1999-2024 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2024 by Sean Erik O'Connor"
 615        pat = self.user_settings.COPYRIGHT_LINE
 616        match = pat.search(line_before_update)
 617
 618        if match:
 619            old_year = int(match.group('old_year'))
 620            # Same as call to self.get_current_year():
 621            current_year = int(time.gmtime()[0])
 622            if old_year < current_year:
 623                # We matched and extracted the old copyright symbol into the variable
 624                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 625                # We now insert it back by placing the special syntax
 626                # \g<symbol> into the replacement string.
 627                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
 628                                str(current_year)
 629                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 630                self.assertEqual(
 631                    line_after_update_actual,
 632                    line_after_update_computed,
 633                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 634            else:
 635                self.fail()
 636        else:
 637            self.fail()
 638
 639    def test_update_software_version(self):
 640        """Test updating to a new version of Primpoly."""
 641        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 642        old_version_line = "|     Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
 643        new_version_line = "|     Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
 644        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
 645        match = pat.search(old_version_line)
 646        if match:
 647            # Note that since we are using raw strings leading and trailing
 648            # whitespace is ignored.
 649            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
 650            updated_version_line = pat.sub(new_version, old_version_line)
 651            self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
 652        else:
 653            self.fail()
 654
 655    def test_extract_filename_from_ftp_listing(self):
 656        """Test parsing an FTP listing."""
 657        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 658        extracted_file_name = "allclasses-frame.html"
 659        pat = self.user_settings.FTP_LISTING
 660        match = pat.search(ftp_line)
 661        if match:
 662            filename = match.group('filename')
 663            self.assertEqual(
 664                filename,
 665                extracted_file_name,
 666                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 667        else:
 668            self.fail()
 669
 670    def test_get_file_time_and_date(self):
 671        """Test getting a file time and date."""
 672        # Point to an old file.
 673        file_name = "./Images/home.png"
 674        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 675        # Get the UTC time.
 676        file_epoch_time = os.path.getmtime(full_file_name)
 677        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 678        # Create a datetime object for the file.
 679        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 680        # Check if the file time matches what we would see if we did ls -l <file_name>
 681        computed = f"file {file_name:s} datetime {d.ctime():s}"
 682        actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
 683        self.assertEqual(computed, actual)
 684
 685    def test_set_file_time_and_date(self):
 686        """Test setting a file time and date."""
 687        file_name = "./Images/home.png"
 688        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 689        # Create a temporary file in the same directory.
 690        temp_file_name = "temporal.tmp"
 691        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 692        try:
 693            with open(full_temp_file_name, 'w') as fp:
 694                fp.write("The End of Eternity")
 695        except OSError as detail:
 696            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 697            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 698        # Get the old file time.  Set the temporary file to the same time.
 699        file_stat = os.stat(full_file_name)
 700        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 701        # What is the temporary file's time now?
 702        file_epoch_time = os.path.getmtime(full_temp_file_name)
 703        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 704        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 705        # Is the temporary file time set properly?
 706        computed = f"file {file_name:s} datetime {d.ctime():s}"
 707        actual = "file ./Images/home.png datetime Thu Jul 18 16:55:44 2024"
 708        self.assertEqual(computed, actual)
 709        os.remove(full_temp_file_name)
 710
 711    def test_difference_of_time_and_date(self):
 712        """Test a date difference calculation."""
 713        file_name = "./Images/home.png"
 714        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 715        # Get the UTC time.
 716        file_epoch_time = os.path.getmtime(full_file_name)
 717        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 718        # Create a datetime object for the file.
 719        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 720        # Slightly change the date and time by adding 1 minute.
 721        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 722        time_delta = d2 - d
 723        seconds_different = time_delta.total_seconds()
 724        minutes_different = seconds_different / 60.0
 725        hours_different = minutes_different / 60.0
 726        days_different = hours_different / 24.0
 727        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 728        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 729        self.assertEqual(computed, actual)
 730
 731    def test_pattern_match_dir_to_skip(self):
 732        """Test if skipping certain named directories is recoginizing the dir names."""
 733        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 734        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 735        if pat.search(dir_skip):
 736            self.assertTrue(True)
 737        else:
 738            self.assertTrue(False)
 739
 740    def test_file_name_to_syntax_highlight(self):
 741        """Test if syntax highlighting recognizes file names to highlight."""
 742        file_name1 = "Computer/hello.lsp"
 743        file_name2 = "Computer/life.html"
 744        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 745        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 746            self.assertTrue(True)
 747        else:
 748            self.assertTrue(False)
 749
 750    def test_user_settings(self):
 751        """Test whether user settings are correctly initialized."""
 752        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 753        actual = "File size limit = 50000 K"
 754        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 755
 756    def test_check_replace_substring(self,debug=False):
 757        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 758           For troubleshooting, turn on debug.
 759        """
 760        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 761        # Iterate over all test strings.
 762        for pair in test_verify_pairs:
 763            [test_string, verify_string] = pair
 764            if debug:
 765                print( f"\n>>>>>>> next test/verify string pair = \n\t{pair}")
 766            # Iterate over all patterns and replacements.
 767            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 768                [pat, rep_string] = match_replace_tuple
 769                match = pat.search(test_string)
 770                # The pattern match succeeds.
 771                if match:
 772                    try:
 773                        sub = pat.sub(rep_string, test_string)
 774                        # String replacement succeeds for this pattern/replace pair iteration.
 775                        if debug:
 776                            print( f">>>>>>> apply match and replace: \n\t{test_string} ---> {sub}")
 777                        test_string = sub
 778                    except IndexError as detail:
 779                        print(f"Caught an exception: {str(detail):s}.  Replacement failed.")
 780                        if debug:
 781                            self.assertTrue(False)
 782                elif debug:
 783                    print( f">>>>>>> match failed for pattern \n\t{pat} \nwhen applied to string \n\t{test_string}")
 784                # No match, so go on to the next pattern and don't change test_string.
 785            # Done with all pattern/replace on test string.
 786            # Check this test string in the list.
 787            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 788
 789# ----------------------------------------------------------------------------
 790#  Command line options.
 791# ----------------------------------------------------------------------------
 792
 793class CommandLineSettings(object):
 794    """Get the command line options."""
 795
 796    def __init__(self, user_settings, raw_args=None):
 797        """Get command line options"""
 798        command_line_parser = argparse.ArgumentParser(
 799            description="updateweb options")
 800
 801        # Log all changes, not just warnings and errors.
 802        command_line_parser.add_argument(
 803            "-v",
 804            "--verbose",
 805            help="Turn on verbose mode to log everything",
 806            action="store_true")
 807
 808        # Clean up the local website only.
 809        command_line_parser.add_argument(
 810            "-c",
 811            "--clean",
 812            help="Do a cleanup on the local web site only.",
 813            action="store_true")
 814
 815        # Clean up the local website only.
 816        command_line_parser.add_argument(
 817            "-m",
 818            "--mathjax",
 819            help="""ALSO upload mathjax directory.\
 820            Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
 821            You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
 822            NOTE:  If you did reset your server and delete all files, run the command    find . -name '*.*' -exec touch {} \\;    from the web page root directory.\
 823            Also run   find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 824            action="store_true")
 825
 826        # Run unit tests only.
 827        command_line_parser.add_argument("-t", "--test",
 828                                         help="Run unit tests.",
 829                                         action="store_true")
 830
 831        args = command_line_parser.parse_args(raw_args)
 832
 833        if args.verbose:
 834            user_settings.VERBOSE = True
 835        if args.clean:
 836            user_settings.CLEAN = True
 837        if args.test:
 838            user_settings.UNITTEST = True
 839        if args.mathjax:
 840            user_settings.MATHJAX = True
 841
 842# ----------------------------------------------------------------------------
 843#  Base class which describes my web site overall.
 844# ----------------------------------------------------------------------------
 845
 846class WebSite(object):
 847    """
 848    Abstract class used for analyzing both local and remote (ftp server) websites.
 849    Contains the web-walking functions which traverse the directory structures and files.
 850    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 851    Child classes may define additional functions which only they need.
 852    """
 853
 854    def __init__(self, settings):
 855        """Set up root directories"""
 856
 857        # Import the user settings.
 858        self.user_settings = settings
 859
 860        # Queue keeps track of directories not yet processed.
 861        self.queue = []
 862
 863        # List of all directories traversed.
 864        self.directories = []
 865
 866        # List of files traversed, with file information.
 867        self.files = []
 868
 869        # Find out the root directory and go there.
 870        self.root_dir = self.get_root_dir()
 871        self.go_to_root_dir(self.root_dir)
 872
 873    @staticmethod
 874    def get_current_year():
 875        """Get the current year."""
 876        return int(time.gmtime()[0])
 877
 878    @staticmethod
 879    def get_current_two_digit_year():
 880        """Get the last two digits of the current year."""
 881        return WebSite.get_current_year() % 100
 882
 883    @staticmethod
 884    def is_file_info_type(file_info):
 885        """Check if we have a file information structure or merely a simple file name."""
 886        try:
 887            if isinstance(file_info, list):
 888                return True
 889            elif isinstance(file_info, str):
 890                return False
 891            else:
 892                logging.error("is_file_info_type found a bad type.  Aborting...")
 893                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 894        except TypeError as detail:
 895            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 896            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 897
 898    def get_root_dir(self):
 899        """Subclass:  Put code here to get the root directory"""
 900        return ""
 901
 902    def go_to_root_dir(self, root_dir):
 903        """Subclass:  Put code here to go to the root directory"""
 904        pass  # Pythons's do-nothing statement.
 905
 906    def one_level_down(self, d):
 907        """Subclass:  Fill in with a method which returns a list of the
 908        directories and files immediately beneath dir"""
 909        return [], []
 910
 911    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 912        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 913
 914        # Get all subfiles and subdirectories off this node.
 915        subdirectories, subfiles = self.one_level_down(d)
 916
 917        # Add all the subfiles in order.
 918        for f in subfiles:
 919
 920            name = self.strip_root(f)
 921            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 922
 923            # Some files are private so skip them from consideration.
 924            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 925
 926            if pat.search(name[self.user_settings.FILE_NAME]):
 927                logging.warning(
 928                    f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 929            # Don't upload the log file due to file locking problems.
 930            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 931                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 932            # File size limit on some servers.
 933            else:
 934                self.files.append(name)
 935
 936        # Queue up the subdirectories.
 937        for d in subdirectories:
 938            # Some directories are private such as .git or just temporary file
 939            # caches so skip them from consideration.
 940            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 941            if pat.search(d):
 942                logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 943            else:
 944                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 945                self.queue.append(d)
 946
 947        # Search through the directories.
 948        while len(self.queue) > 0:
 949            # For breadth first search, remove from beginning of queue.
 950            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 951                d = self.queue.pop(0)
 952
 953            # For depth first search, remove from end of queue.
 954            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 955                d = self.queue.pop()
 956            else:
 957                d = self.queue.pop(0)
 958
 959            name = self.strip_root(d)
 960            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 961            self.directories.append(name)
 962
 963            self.walk(d)
 964
 965    def strip_root(self, file_info):
 966        """Return a path, but strip off the root directory"""
 967
 968        root = self.root_dir
 969
 970        # Extract the file name.
 971        if self.is_file_info_type(file_info):
 972            name = file_info[self.user_settings.FILE_NAME]
 973        else:
 974            name = file_info
 975
 976        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 977        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 978        # Art/foo.txt
 979        lenroot = len(root)
 980        if root == self.user_settings.DEFAULT_ROOT_DIR:
 981            pass
 982        else:
 983            lenroot = lenroot + 1
 984
 985        stripped_path = name[lenroot:]
 986
 987        if self.is_file_info_type(file_info):
 988            # Update the file name only.
 989            return [stripped_path,
 990                    file_info[self.user_settings.FILE_TYPE],
 991                    file_info[self.user_settings.FILE_DATE_TIME],
 992                    file_info[self.user_settings.FILE_SIZE]]
 993        else:
 994            return stripped_path
 995
 996    def append_root_dir(self, root_dir, name):
 997        """Append the root directory to a path"""
 998
 999        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1000        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1001        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1002            return root_dir + name
1003        else:
1004            return root_dir + "/" + name
1005
1006    def scan(self):
1007        """Scan the directory tree recursively from the root"""
1008        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
1009        self.walk(self.root_dir)
1010
1011    def modtime(self, f):
1012        """Subclass:  Get file modification time"""
1013        pass
1014
1015    def finish(self):
1016        """Quit web site"""
1017        logging.debug(f"Finished with WebSite object of class {type(self)}")
1018        pass
1019
1020# ----------------------------------------------------------------------------
1021#  Subclass which knows about the local web site on disk.
1022# ----------------------------------------------------------------------------
1023
1024class LocalWebSite(WebSite):
1025    """Walk the local web directory on local disk down from the root.
1026    Clean up temporary files and do other cleanup work."""
1027
1028    def __init__(self, settings):
1029        """Go to web page root and list all files and directories."""
1030
1031        # Initialize the parent class.
1032        WebSite.__init__(self, settings)
1033
1034        self.root_dir = self.get_root_dir()
1035        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1036
1037    def get_root_dir(self):
1038        """Get the name of the root directory"""
1039        return self.user_settings.local_root_dir
1040
1041    def go_to_root_dir(self, root_dir):
1042        """Go to the root directory"""
1043
1044        # Go to the root directory.
1045        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1046        os.chdir(root_dir)
1047
1048        # Read it back.
1049        self.root_dir = os.getcwd()
1050        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1051
1052    def one_level_down(self, d):
1053        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1054        such as time, date and size."""
1055
1056        directories = []
1057        files = []
1058
1059        # Change to current directory.
1060        os.chdir(d)
1061
1062        # List all subdirectories and files.
1063        dir_list = os.listdir(d)
1064
1065        if dir_list:
1066            for line in dir_list:
1067                # Add the full path prefix from the root.
1068                name = self.append_root_dir(d, line)
1069                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1070
1071                # Is it a directory or a file?
1072                if os.path.isdir(name):
1073                    directories.append(name)
1074                elif os.path.isfile(name):
1075                    # First assemble the file information of name, time/date and size into a list.
1076                    # Can index it like an array.  For example,
1077                    # file_info = 
1078                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1079                    #      1,                                           -- Enum type FileType.FILE = 1.
1080                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1081                    #      4675]                                        -- File size in bytes.
1082                    file_info = [name,
1083                                 FileType.FILE,
1084                                 self.get_file_date_time(name),
1085                                 self.get_file_size(name)]
1086                    files.append(file_info)
1087
1088        # Sort the names into order.
1089        if directories:
1090            directories.sort()
1091        if files:
1092            files.sort()
1093
1094        return directories, files
1095
1096    @staticmethod
1097    def get_file_date_time(file_name):
1098        """Get a local file time and date in UTC."""
1099
1100        file_epoch_time = os.path.getmtime(file_name)
1101        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1102        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1103        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1104        return d
1105
1106    @staticmethod
1107    def get_file_size(file_name):
1108        """Get file size in bytes."""
1109        return os.path.getsize(file_name)
1110
1111    def copy_to_text_file(self, file_name):
1112        """Make a copy of a file with a .txt extension"""
1113
1114        # Remove the old copy with the text file extension.
1115        copy_file_name = file_name + self.user_settings.TEXT_FILE_EXT
1116        try:
1117            os.remove(copy_file_name)
1118        except OSError as detail:
1119            logging.error(f"Cannot remove old text file copy {copy_file_name:s}: {str(detail):s}")
1120
1121        # Create the new copy, which is an exact duplicate.
1122        self.process_lines_of_file(file_name, copy_file_name)
1123
1124        # Make the new copy have the same modification and access time and date as the original
1125        # since it is just an exact copy.
1126        # That way we won't upload copies with newer times constantly, just because they look as
1127        # though they've been recently modified.
1128        file_stat = os.stat(file_name)
1129        os.utime(copy_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1130        logging.debug(f"Reset file time to original time for copy {copy_file_name:s}")
1131
1132    @staticmethod
1133    def clean_up_temp_file(temp_file_name, file_name, changed):
1134        """Remove the original file, rename the temporary file name to the original name.
1135        If there are no changes, just remove the temporary file.
1136        """
1137
1138        if changed:
1139            # Remove the old file now that we have the rewritten file.
1140            try:
1141                os.remove(file_name)
1142                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1143            except OSError as detail:
1144                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1145
1146            # Rename the new file to the old file name.
1147            try:
1148                os.rename(temp_file_name, file_name)
1149                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1150            except OSError as detail:
1151                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1152        else:
1153            # No changes?  Remove the temporary file.
1154            try:
1155                os.remove(temp_file_name)
1156                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1157            except OSError as detail:
1158                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1159        return
1160
1161    @staticmethod
1162    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1163        """
1164        Process each line of a file with a list of functions.  Create a new temporary file.
1165
1166        The default list is None which means make an exact copy.
1167        """
1168
1169        # Assume no changes.
1170        changed = False
1171
1172        # Open both input and output files for processing.  Check if we cannot do it.
1173        fin = None
1174        try:
1175            fin = open(in_file_name, "r")
1176        except IOError as detail:
1177            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1178            if fin is not None:
1179                fin.close()
1180            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1181        fout = None
1182        try:
1183            fout = open(out_file_name, "w")
1184        except IOError as detail:
1185            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1186            if fout is not None:
1187                fout.close()
1188            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1189
1190        # Read each line of the file, aborting if there is a read error.
1191        try:
1192            line = fin.readline()
1193
1194            # Rewrite the next line of the file using all the rewrite functions.
1195            while line:
1196                original_line = line
1197                # If we have one or more rewrite functions...
1198                if process_line_function_list is not None:
1199                    # ...apply each rewrite functions to the line, one after the other in order.
1200                    for processLineFunction in process_line_function_list:
1201                        if processLineFunction is not None:
1202                            line = processLineFunction(line)
1203
1204                if original_line != line:
1205                    logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1206                    changed = True
1207
1208                fout.write(line)
1209
1210                line = fin.readline()
1211
1212            fin.close()
1213            fout.close()
1214        except IOError as detail:
1215            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1216            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1217
1218        if changed:
1219            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1220                          f"Changes are in temporary copy {out_file_name:s}")
1221
1222        # Return True if any lines were changed.
1223        return changed
1224
1225    def clean(self):
1226        """Scan through all directories and files in the local on disk website and clean them up."""
1227
1228        num_source_files_changed = 0
1229        num_source_files_syntax_highlighted = 0
1230
1231        logging.debug("Cleaning up the local web page.")
1232
1233        if self.directories is None or self.files is None:
1234            logging.error("Web site has no directories or files.  Aborting...")
1235            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1236
1237        for d in self.directories:
1238
1239            if self.is_temp_dir(d):
1240                # Add the full path prefix from the root.
1241                name = self.append_root_dir(self.get_root_dir(), d)
1242                try:
1243                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1244                    shutil.rmtree(name)
1245                except OSError as detail:
1246                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1247
1248        for f in self.files:
1249            # Add the full path prefix from the root.
1250            full_file_name = self.append_root_dir(
1251                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1252
1253            # Remove all temporary files.
1254            if self.is_temp_file(f):
1255                try:
1256                    logging.debug(f"Removing temp file {full_file_name:s}")
1257                    os.remove(full_file_name)
1258                except OSError as detail:
1259                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1260
1261            # Update source code files.
1262            if self.is_source_or_hypertext_file(f):
1263                changed = self.rewrite_source_file(full_file_name)
1264                if changed:
1265                    num_source_files_changed += 1
1266                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1267
1268            # Generate a  syntax highlighted code listing.  
1269            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1270            if self.is_file_to_syntax_highlight(f):
1271                # full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1272                full_file_name_highlighted = self.create_syntax_highlighted_code_listing(full_file_name)
1273                if full_file_name_highlighted is not None:
1274                    logging.debug(f"Generated a syntax highlighted source listing file {full_file_name_highlighted:s} for the file {full_file_name:s}")
1275                else:
1276                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1277                num_source_files_syntax_highlighted += 1
1278
1279        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1280        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1281
1282    def is_temp_file(self, file_info):
1283        """Identify a file name as a temporary file"""
1284
1285        file_name = file_info[self.user_settings.FILE_NAME]
1286
1287        # Suffixes and names for temporary files be deleted.
1288        pat = self.user_settings.TEMP_FILE_SUFFIXES
1289        match = pat.search(file_name)
1290        # Remove any files containing twiddles anywhere in the name.
1291        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1292            return True
1293
1294        return False
1295
1296    def is_temp_dir(self, dir_name):
1297        """Identify a name as a temporary directory."""
1298
1299        p = self.user_settings.TEMP_DIR_SUFFIX
1300        return p.search(dir_name)
1301
1302    def is_source_or_hypertext_file(self, file_info):
1303        """ Check if the file name is a source file or a hypertext file."""
1304
1305        file_name = file_info[self.user_settings.FILE_NAME]
1306        p1 = self.user_settings.SOURCE_FILE_PATTERN
1307        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1308        if p1.search(file_name) or p2.search(file_name):
1309            return True
1310        else:
1311            return False
1312
1313    def is_file_to_syntax_highlight(self, file_info):
1314        """Check if this file type should have a syntax highlighted source listing."""
1315
1316        # Take apart the file name.
1317        full_file_name = file_info[self.user_settings.FILE_NAME]
1318        file_name = Path(full_file_name).name
1319
1320        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1321        if p.search(file_name):
1322            return True
1323        else:
1324            return False
1325
1326    def rewrite_substring(self, line):
1327        """Rewrite a line containing a pattern of your choice"""
1328
1329        # Start with the original unchanged line.
1330        rewritten_line = line
1331
1332        # Do the replacements in order from first to last.
1333        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1334            # Get the next pattern match replacement string tuple.
1335            [pat, rep_string] = match_replace_tuple
1336            # Does it match?  Then do string substitution, else leave the line unchanged.
1337            match = pat.search(rewritten_line)
1338            if match:
1339                # Now we have these cases:
1340                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1341                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1342                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1343                try:
1344                    sub = pat.sub(rep_string, rewritten_line)
1345                    rewritten_line = sub
1346                except IndexError as detail:
1347                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1348 
1349        return rewritten_line
1350
1351    def rewrite_email_address_line(self, line):
1352        """Rewrite lines containing old email addresses."""
1353
1354        # Search for the old email address.
1355        pat = self.user_settings.OLD_EMAIL_ADDRESS
1356        match = pat.search(line)
1357
1358        # Replace the old address with my new email address.
1359        if match:
1360            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1361            sub = pat.sub(new_address, line)
1362            line = sub
1363
1364        return line
1365
1366    def rewrite_version_line(self, line):
1367        """Rewrite lines containing the current version of software."""
1368
1369        # Search for the current version.
1370        pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1371        match = pat.search(line)
1372
1373        # Replace with the new version.
1374        if match:
1375            # Note that since we are using raw strings leading and trailing
1376            # whitespace is ignored.
1377            new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1378            sub = pat.sub(new_version, line)
1379            line = sub
1380
1381        return line
1382
1383    def rewrite_copyright_line(self, line):
1384        """Rewrite copyright lines if they are out of date."""
1385
1386        # Match the lines,
1387        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1388        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1389        # and pull out the old year and save it.
1390        pat = self.user_settings.COPYRIGHT_LINE
1391        match = pat.search(line)
1392
1393        # Found a match.
1394        if match:
1395            old_year = int(match.group('old_year'))
1396
1397            # Replace the old year with the current year.
1398            # We matched and extracted the old copyright symbol into the variable
1399            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1400            # We now insert it back by placing the special syntax \g<symbol>
1401            # into the replacement string.
1402            if old_year < WebSite.get_current_year():
1403                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1404                                str(WebSite.get_current_year())
1405                sub = pat.sub(new_copyright, line)
1406                line = sub
1407        return line
1408
1409    def rewrite_last_update_line(self, line):
1410        """Rewrite the Last Updated line if the year is out of date."""
1411
1412        # Match the last updated line and pull out the year.
1413        #      last updated 01 Jan 24.
1414        p = self.user_settings.LAST_UPDATED_LINE
1415        m = p.search(line)
1416
1417        if m:
1418            last_update_year = int(m.group('year'))
1419
1420            # Convert to four digit years.
1421            if last_update_year > 90:
1422                last_update_year += 1900
1423            else:
1424                last_update_year += 2000
1425
1426            # If the year is old, rewrite to "01 Jan <current year>".
1427            if last_update_year < WebSite.get_current_year():
1428                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1429                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1430                line = sub
1431
1432        return line
1433
1434    def rewrite_source_file(self, file_name):
1435        """Rewrite copyright lines, last updated lines, etc."""
1436        changed = False
1437
1438        # Create a new temporary file name for the rewritten file.
1439        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1440
1441        # Apply changes to all lines of the temporary file.  Apply change functions in
1442        # the sequence listed.
1443        if self.process_lines_of_file(file_name, temp_file_name,
1444                                      [self.rewrite_copyright_line,
1445                                       self.rewrite_last_update_line,
1446                                       self.rewrite_email_address_line,
1447                                       self.rewrite_substring,
1448                                       self.rewrite_version_line]):
1449            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1450            changed = True
1451
1452        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1453        self.clean_up_temp_file(temp_file_name, file_name, changed)
1454
1455        return changed
1456
1457    @staticmethod
1458    def create_syntax_highlighted_code_listing(full_file_name, **kwargs):
1459        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1460        Keep the same date/time as the original file."""
1461
1462        # kwargs is a dictionary for key, value in kwargs.items():
1463        # for key, value in kwargs.items():
1464        #    if key in kwargs:
1465        #        print( f"kwargs:" )
1466        #        print( f"  key   = |{key}|")
1467        #        print( f"  value = |{value}|" )
1468        dry_run_value = kwargs.get('dry_run') 
1469        dry_run = False
1470        if dry_run_value is not None and dry_run_value is True:
1471            dry_run = True
1472
1473        # Take apart the file name.
1474        file_name_without_extension = Path(full_file_name).stem
1475        file_extension = Path(full_file_name).suffix
1476
1477        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1478        full_file_name_highlighted = f"{full_file_name}.html"
1479
1480        # First choose the language lexer from the file name itself if there's no extension.
1481        # Dotted file names are treated as the entire file name.
1482        match file_name_without_extension:
1483            case "makefile":
1484                lexer = MakefileLexer()
1485            case ".bash_profile"|".bashrc"|".bash_logout":
1486                lexer = BashLexer()
1487            case ".vimrc":
1488                lexer = VimLexer()
1489            case ".gitignore_global" | ".gitignore" | ".gitconfig":
1490                lexer = OutputLexer() # No formatting.
1491            case _:
1492                # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1493                match file_extension:
1494                    case ".html":
1495                        lexer = HtmlLexer()
1496                    case ".css":
1497                        lexer = CssLexer()
1498                    case ".js":
1499                        lexer = JavascriptLexer()
1500                    case ".sh":
1501                        lexer = BashLexer()
1502                    case ".py":
1503                        lexer = PythonLexer()
1504                    case ".c" | ".h":
1505                        lexer = CLexer()
1506                    case ".hpp" | ".cpp":
1507                        lexer = CppLexer()
1508                    case ".lsp":
1509                        lexer = CommonLispLexer()
1510                    case ".for" | ".FOR" | ".f":
1511                        lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1512                    case ".txt" | ".dat":            # Generic data file;  no formatting.
1513                        lexer = OutputLexer()
1514                    case ".tex":
1515                        lexer = TexLexer()           # LaTeX, TeX, or related files.
1516                    case ".m":
1517                        lexer = MatlabLexer()
1518                    case ".yaml":
1519                        lexer = YamlLexer()
1520                    case _:
1521                        logging.error(f"Can't find a lexer for file {full_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1522                        return None
1523
1524        # Read the source code file into a single string.
1525        try:
1526            with open(full_file_name, 'r') as fp:
1527                source_file_string = fp.read()
1528        except OSError as detail:
1529            logging.error(f"Cannot read the source code file {full_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1530
1531        # Top level Pygments function generates the HTML for the highlighted code.
1532        highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1533
1534        # The style sheet is always the same for all languages.
1535        style_sheet = HtmlFormatter().get_style_defs('.highlight')
1536
1537        # Write out the syntax colored file.
1538        if dry_run:
1539            logging.debug(f"Dry run only:  don't generate the syntax highlighted file {full_file_name_highlighted:s}")
1540            return None
1541        else:
1542            try:
1543                # Write out the highlighted code listing in HTML with CSS style sheet attached.
1544                with open(full_file_name_highlighted, 'w') as fp:
1545                    fp.write(UserSettings.BASIC_HTML_BEGIN)
1546                    fp.write(style_sheet)
1547                    fp.write(UserSettings.BASIC_HTML_MIDDLE)
1548                    fp.write(highlighted_html_source_file_string)
1549                    fp.write(UserSettings.BASIC_HTML_END)
1550            except OSError as detail:
1551                logging.error(f"Cannot write the syntax highlighted file {full_file_name_highlighted:s}: {str(detail):s}  Aborting...")
1552
1553            # Set the listing file to the same modification and access time and date as the source file.
1554            file_stat = os.stat(full_file_name)
1555            os.utime(full_file_name_highlighted, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1556
1557            # What is the listing file time now?
1558            file_epoch_time = os.path.getmtime(full_file_name_highlighted)
1559            file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1560            d_list = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
1561
1562            # Source file and listing should be the same time.
1563            file_epoch_time = os.path.getmtime(full_file_name)
1564            file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1565            d_source = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
1566            logging.debug(f"Generated a syntax highlighted listing {full_file_name_highlighted:s} with same time as source file {full_file_name:s}.")
1567            logging.debug(f"\tsource  file time {d_source.ctime():s}")
1568            logging.debug(f"\tlisting file time {d_list.ctime():s}")
1569            return full_file_name_highlighted
1570
1571# ----------------------------------------------------------------------------
1572#   Subclass which knows about the remote web site.
1573# ----------------------------------------------------------------------------
1574
1575class RemoteWebSite(WebSite):
1576    """Walk the remote web directory on a web server down from the root.
1577       Use FTP commands:
1578           https://en.wikipedia.org/wiki/List_of_FTP_commands
1579       Use the Python ftp library:
1580           https://docs.python.org/3/library/ftplib.html
1581    """
1582
1583    def __init__(self, settings, server, user, password, ftproot):
1584        """Connect to FTP server and list all files and directories."""
1585
1586        # Root directory of FTP server.
1587        self.root_dir = ftproot
1588        logging.debug(f"Requesting remote web site ftp root dir {self.root_dir:s}")
1589
1590        # Connect to FTP server and log in.
1591        try:
1592            # self.ftp.set_debuglevel( 2 )
1593            self.ftp = ftplib.FTP(server)
1594            self.ftp.login(user, password)
1595        # Catch all exceptions with the parent class Exception:  all built-in,
1596        # non-system-exiting exceptions are derived from this class.
1597        except Exception as detail:
1598            # Extract the string message from the exception class with str().
1599            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1600            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1601        else:
1602            logging.debug("Remote web site ftp login succeeded.")
1603
1604        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1605
1606        # Initialize the superclass.
1607        WebSite.__init__(self, settings)
1608
1609    def go_to_root_dir(self, root_dir):
1610        """Go to the root directory"""
1611
1612        try:
1613            # Go to the root directory.
1614            self.ftp.cwd(root_dir)
1615            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1616
1617            # Read it back.
1618            self.root_dir = self.ftp.pwd()
1619            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1620
1621        except Exception as detail:
1622            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1623            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1624
1625    def get_root_dir(self):
1626        """Get the root directory name"""
1627
1628        return self.root_dir
1629
1630    def finish(self):
1631        """Quit remote web site"""
1632        logging.debug(f"Finished with WebSite object of class {type(self)}")
1633        try:
1634            self.ftp.quit()
1635        except Exception as detail:
1636            logging.error(f"Cannot ftp quit: {str(detail):s}")
1637
1638    def one_level_down(self, d):
1639        """List files and directories in a subdirectory using ftp"""
1640
1641        directories = []
1642        files = []
1643
1644        try:
1645            # ftp listing from current dir.
1646            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1647            self.ftp.cwd(d)
1648            dir_list = []
1649
1650            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1651            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1652            # Note the second argument requires a callback function.
1653            self.ftp.retrlines('LIST -a', dir_list.append)
1654
1655        except Exception as detail:
1656            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1657            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1658
1659        for line in dir_list:
1660            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1661
1662            # Line should at least have the minimum FTP information.
1663            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1664                # Parse the FTP LIST and put the pieces into file_info.
1665                file_info = self.parse_ftp_list(line)
1666                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1667
1668                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1669                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1670                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1671                    pass
1672                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1673                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1674                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1675                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1676                    directories.append(dirname)
1677                # For a file:  Add the full path prefix from the root to the file name.
1678                else:
1679                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1680                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1681                        {file_info[self.user_settings.FILE_NAME]:s}")
1682                    files.append(file_info)
1683            else:
1684                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1685
1686        directories.sort()
1687        files.sort()
1688
1689        return directories, files
1690
1691    def modtime(self, f):
1692        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1693        modtime = 0
1694
1695        try:
1696            response = self.ftp.sendcmd('MDTM ' + f)
1697            # MDTM returns the last modified time of the file in the format
1698            # "213 YYYYMMDDhhmmss \r\n <error-response>
1699            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1700            # error-response is 550 for info not available, and 500 or 501 if command cannot
1701            # be parsed.
1702            if response[:3] == '213':
1703                modtime = response[4:]
1704        except ftplib.error_perm as detail:
1705            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1706            modtime = 0
1707
1708        return modtime
1709
1710    def parse_ftp_list(self, line):
1711        """Parse the ftp file listing and return file name, datetime and file size.
1712
1713           An FTP LIST command will give output which looks like this for a file:
1714
1715               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1716
1717           and for a directory:
1718
1719                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1720
1721           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1722           We can have problems on New Year's Eve.  For example, the local file date/time is
1723
1724              Mon Jan  1 06:23:12 2018
1725
1726           But the remote file date/time from FTP listing doesn't show a year even though we
1727           know it was written to the server in 2017.
1728
1729               Mon Dec 31 03:02:00
1730
1731           So we default the remote file year to current year 2018 and get
1732
1733               Mon Dec 31 03:02:00 2018
1734
1735           Now we think that the remote file is newer by 363.860278 days.
1736        """
1737
1738        # Find out if we've a directory or a file.
1739        if line[0] == 'd':
1740            dir_or_file = FileType.DIRECTORY
1741        else:
1742            dir_or_file = FileType.FILE
1743
1744        pattern = self.user_settings.FTP_LISTING
1745
1746        # Sensible defaults.
1747        filesize = 0
1748        filename = ""
1749        # Default the time to midnight.
1750        hour = 0
1751        minute = 0
1752        seconds = 0
1753        # Default the date to Jan 1 of the current year.
1754        month = 1
1755        day = 1
1756        year = WebSite.get_current_year()
1757
1758        # Extract time and date from the ftp listing.
1759        match = pattern.search(line)
1760
1761        if match:
1762            filesize = int(match.group('bytes'))
1763            month = self.user_settings.monthToNumber[match.group('mon')]
1764            day = int(match.group('day'))
1765
1766            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1767            if match.group('year'):
1768                year = int(match.group('year'))
1769                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1770            else:
1771                # Remote file listing omits the year.  Default the year to the current UTC time year.
1772                # That may be incorrect (see comments above).
1773                year = WebSite.get_current_year()
1774                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1775
1776            # If the FTP listing has the hour and minute, it will omit the year.
1777            if match.group('hour') and match.group('min'):
1778                hour = int(match.group('hour'))
1779                minute = int(match.group('min'))
1780                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1781
1782            filename = match.group('filename')
1783
1784        # Package up the time and date nicely.
1785        # Note if we didn't get any matches, we'll default the remote date and
1786        # time to Jan 1 midnight of the current year.
1787        d = datetime.datetime(year, month, day, hour, minute, seconds)
1788
1789        return [filename, dir_or_file, d, filesize]
1790
1791# ----------------------------------------------------------------------------
1792#  Class for synchronizing local and remote web sites.
1793# ----------------------------------------------------------------------------
1794
1795class UpdateWeb(object):
1796    """Given previously scanned local and remote directories, update the remote website."""
1797
1798    def __init__(
1799            self,
1800            settings,
1801            server,
1802            user,
1803            password,
1804            ftproot,
1805            file_size_limit,
1806            local_directory_list,
1807            local_file_info,
1808            remote_directory_list,
1809            remote_file_info):
1810        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1811
1812        self.user_settings = settings
1813
1814        self.local_files_list = []
1815        self.remote_files_list = []
1816        self.local_file_to_size = {}
1817        self.local_file_to_date_time = {}
1818        self.remote_file_to_date_time = {}
1819        self.local_only_dirs = []
1820        self.local_only_files = []
1821        self.remote_only_dirs = []
1822        self.remote_only_files = []
1823        self.common_files = []
1824
1825        # Connect to FTP server and log in.
1826        try:
1827            self.ftp = ftplib.FTP(server)
1828            self.ftp.login(user, password)
1829        except Exception as detail:
1830            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1831            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1832        else:
1833            logging.debug("ftp login succeeded.")
1834
1835        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1836
1837        # Local root directory.
1838        self.local_root_dir = self.user_settings.local_root_dir
1839        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1840
1841        # Root directory of FTP server.
1842        self.ftp_root_dir = ftproot
1843        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1844
1845        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1846        self.file_size_limit = int(file_size_limit) * 1024
1847
1848        try:
1849            # Go to the root directory.
1850            self.ftp.cwd(self.ftp_root_dir)
1851
1852            # Read it back.
1853            self.ftp_root_dir = self.ftp.pwd()
1854            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1855        except Exception as detail:
1856            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1857
1858        self.local_directory_list = local_directory_list
1859        self.remote_directory_list = remote_directory_list
1860        self.local_file_info = local_file_info
1861        self.remote_file_info = remote_file_info
1862
1863    def append_root_dir(self, root_dir, name):
1864        """Append the root directory to a path"""
1865
1866        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1867        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1868        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1869            return root_dir + name
1870        else:
1871            return root_dir + "/" + name
1872
1873    def file_info(self):
1874        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1875        dates, times, and sizes."""
1876
1877        # Extract file names.
1878        self.local_files_list = [
1879            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1880        self.remote_files_list = [
1881            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1882
1883        # Use a dictionary comprehension to create key/value pairs, 
1884        #     (file name, file date/time)
1885        # which map file names onto date/time.
1886        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1887        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1888
1889        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1890        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1891
1892    def update(self):
1893        """Scan through the local website, cleaning it up.
1894        Go to remote website on my servers and synchronize all files."""
1895
1896        self.file_info()
1897
1898        # Which files and directories are different.
1899        self.changes()
1900
1901        # Synchronize with the local web site.
1902        self.synchronize()
1903
1904    def changes(self):
1905        """Find the set of different directories and files on local and remote."""
1906
1907        # Add all directories which are only on local to the dictionary.
1908        dir_to_type = {
1909            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1910
1911        # Scan through all remote directories, adding those only on remote or
1912        # on both.
1913        for d in self.remote_directory_list:
1914            if d in dir_to_type:
1915                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1916            else:
1917                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1918
1919        # Add all files which are only on local to the dictionary.
1920        file_to_type = {
1921            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1922
1923        # Scan through all remote files, adding those only on remote or on
1924        # both.
1925        for f in self.remote_files_list:
1926            if f in file_to_type:
1927                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1928            else:
1929                file_to_type[f] = FileType.ON_REMOTE_ONLY
1930
1931        logging.debug("Raw dictionary dump of directories")
1932        for k, v in dir_to_type.items():
1933            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1934
1935        logging.debug("Raw dictionary dump of files")
1936        for k, v in file_to_type.items():
1937            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1938
1939        # List of directories only on local.  Keep the ordering.
1940        self.local_only_dirs = [
1941            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1942
1943        # List of directories only on remote.  Keep the ordering.
1944        self.remote_only_dirs = [
1945            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1946
1947        # We don't care about common directories, only their changed files, if
1948        # any.
1949
1950        # List of files only on local.  Keep the ordering.
1951        self.local_only_files = [
1952            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1953
1954        # List of files only on remote.  Keep the ordering.
1955        self.remote_only_files = [
1956            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1957
1958        # List of common files on both local and remote.  Keep the ordering.
1959        self.common_files = [
1960            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1961
1962        logging.debug("*** Directories only on local ******************************")
1963        for d in self.local_only_dirs:
1964            logging.debug(f"\t {d:s}")
1965
1966        logging.debug("*** Directories only on remote ******************************")
1967        for d in self.remote_only_dirs:
1968            logging.debug(f"\t {d:s}")
1969
1970        logging.debug("*** Files only on local ******************************")
1971        for f in self.local_only_files:
1972            logging.debug(f"\t {f:s}")
1973
1974        logging.debug("*** Files only on remote ******************************")
1975        for f in self.remote_only_files:
1976            logging.debug(f"\t {f:s}")
1977
1978        logging.debug("*** Common files ******************************")
1979        for f in self.common_files:
1980            logging.debug(f"name {f:s}")
1981            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1982            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1983
1984    def synchronize(self):
1985        """Synchronize files and subdirectories in the remote directory with the local directory."""
1986
1987        # If we have the same files in local and remote, compare their times
1988        # and dates.
1989        for f in self.common_files:
1990            local_file_time = self.local_file_to_date_time[f]
1991            remote_file_time = self.remote_file_to_date_time[f]
1992
1993            # What's the time difference?
1994            time_delta = remote_file_time - local_file_time
1995            # How much difference, either earlier or later?
1996            seconds_different = abs(time_delta.total_seconds())
1997            minutes_different = seconds_different / 60.0
1998            hours_different = minutes_different / 60.0
1999            days_different = hours_different / 24.0
2000
2001            # Assume no upload initially.
2002            upload_to_host = False
2003
2004            logging.debug(f"Common file:  {f:s}.")
2005
2006            # Remote file time is newer.
2007            # Allow 200 characters
2008            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
2009
2010            if remote_file_time > local_file_time:
2011                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
2012                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
2013                    logging.error(f"Remote file {f:s} is MUCH newer[more than {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Upload the file to be safe.")
2014                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2015                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2016
2017                    # Set the local file to the current time.
2018                    full_file_name = self.append_root_dir(
2019                        self.local_root_dir, f)
2020                    if os.path.exists(full_file_name):
2021                        # Change the access and modify times of the file to the current time.
2022                        os.utime(full_file_name, None)
2023                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2024
2025                    upload_to_host = True
2026                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
2027                else:
2028                    logging.error(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2029                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2030                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2031                    upload_to_host = False
2032
2033            # Local file time is newer.
2034            elif local_file_time > remote_file_time:
2035                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
2036                if days_different >= self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2037                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer [more than {self.user_settings.DAYS_NEWER_FOR_LOCAL_BEFORE_UPLOAD} days] by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Preparing for upload.")
2038                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2039                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2040                    upload_to_host = True
2041                else:
2042                    logging.debug(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds = {minutes_different:8.1f} minutes = {hours_different:8.1f} hours = {days_different:8.1f} days.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
2043                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
2044                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
2045                    upload_to_host = False
2046
2047            # Cancel the upload if the file is too big for the server.
2048            size = self.local_file_to_size[f]
2049            if size >= self.file_size_limit:
2050                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2051                upload_to_host = False
2052
2053            # Finally do the file upload.
2054            if upload_to_host:
2055                logging.debug(f"Uploading changed file {f:s}")
2056                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2057                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
2058                self.upload(f)
2059
2060        # Remote directory is not in local.  Delete it.
2061        for d in self.remote_only_dirs:
2062            logging.debug(f"Deleting remote only directory {d:s}")
2063            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2064            self.rmdir(d)
2065
2066        # Local directory missing on remote.  Create it.
2067        # Due to breadth first order scan, we'll create parent directories
2068        # before child directories.
2069        for d in self.local_only_dirs:
2070            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2071            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2072            self.mkdir(d)
2073
2074        # Local file missing on remote.  Upload it.
2075        for f in self.local_only_files:
2076            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2077
2078            #  But cancel the upload if the file is too big for the server.
2079            size = self.local_file_to_size[f]
2080            if size >= self.file_size_limit:
2081                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2082                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2083            else:
2084                logging.debug(f"Uploading new file {f:s}")
2085                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2086                self.upload(f)
2087
2088        # Remote contains a file not present on the local.  Delete the file.
2089        for f in self.remote_only_files:
2090            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2091            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2092            self.del_remote(f)
2093
2094    def del_remote(self, relative_file_path):
2095        """Delete a file using ftp."""
2096
2097        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2098
2099        # Parse the relative file path into file name and relative directory.
2100        relative_dir, file_name = os.path.split(relative_file_path)
2101        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2102        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2103        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2104
2105        try:
2106            # Add the remote root path and go to the remote directory.
2107            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2108            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2109            self.ftp.cwd(remote_dir)
2110        except Exception as detail:
2111            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2112        else:
2113            try:
2114                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2115
2116                # Don't remove zero length file names.
2117                if len(file_name) > 0:
2118                    self.ftp.delete(file_name)
2119                else:
2120                    logging.warning(
2121                        "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2122            except Exception as detail:
2123                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2124
2125    def mkdir(self, relative_dir):
2126        """Create new remote directory using ftp."""
2127
2128        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2129        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2130
2131        # Parse the relative dir path into prefix dir and suffix dir.
2132        path, d = os.path.split(relative_dir)
2133        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2134        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2135
2136        try:
2137            # Add the remote root path and go to the remote directory.
2138            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2139            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2140            self.ftp.cwd(remote_dir)
2141        except Exception as detail:
2142            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2143        else:
2144            try:
2145                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2146                self.ftp.mkd(d)
2147            except Exception as detail:
2148                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2149
2150    def rmdir(self, relative_dir):
2151        """Delete an empty directory using ftp."""
2152
2153        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2154        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2155
2156        # Parse the relative dir path into prefix dir and suffix dir.
2157        path, d = os.path.split(relative_dir)
2158        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2159        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2160
2161        try:
2162            # Add the remote root path and go to the remote directory.
2163            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2164            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2165            self.ftp.cwd(remote_dir)
2166        except Exception as detail:
2167            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2168        else:
2169            try:
2170                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2171                self.ftp.rmd(d)
2172            except Exception as detail:
2173                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2174
2175    def download(self, relative_file_path):
2176        """Download a binary file using ftp."""
2177
2178        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2179
2180        # Parse the relative file path into file name and relative directory.
2181        relative_dir, file_name = os.path.split(relative_file_path)
2182        logging.debug(f"download():  \tfile name: {file_name:s}")
2183        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2184        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2185
2186        # Add the remote root path and go to the remote directory.
2187        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2188        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2189
2190        try:
2191            self.ftp.cwd(remote_dir)
2192        except Exception as detail:
2193            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2194        else:
2195            # Add the local root path to get the local file name.
2196            # Open local binary file to write into.
2197            local_file_name = self.append_root_dir(
2198                self.local_root_dir, relative_file_path)
2199            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2200            try:
2201                f = open(local_file_name, "wb")
2202                try:
2203                    # Calls f.write() on each block of the binary file.
2204                    # ftp.retrbinary( "RETR " + file_name, f.write )
2205                    pass
2206                except Exception as detail:
2207                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2208                f.close()
2209            except IOError as detail:
2210                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2211
2212    def upload(self, relative_file_path):
2213        """Upload  a binary file using ftp."""
2214
2215        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2216
2217        # Parse the relative file path into file name and relative directory.
2218        relative_dir, file_name = os.path.split(relative_file_path)
2219        logging.debug(f"upload():  \tfile name: {file_name:s}")
2220        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2221        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2222
2223        # Add the remote root path and go to the remote directory.
2224        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2225        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2226
2227        try:
2228            self.ftp.cwd(remote_dir)
2229        except Exception as detail:
2230            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2231        else:
2232            # Add the local root path to get the local file name.
2233            # Open local binary file to read from.
2234            local_file_name = self.append_root_dir(
2235                self.local_root_dir, relative_file_path)
2236            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2237
2238            try:
2239                f = open(local_file_name, "rb")
2240                try:
2241                    # f.read() is called on each block of the binary file until
2242                    # EOF.
2243                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2244                    self.ftp.storbinary("STOR " + file_name, f)
2245                except Exception as detail:
2246                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2247                f.close()
2248            except IOError as detail:
2249                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2250
2251    def finish(self):
2252        """Log out of an ftp session"""
2253        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2254        try:
2255            self.ftp.quit()
2256        except Exception as detail:
2257            logging.error(f"Cannot ftp quit because {str(detail):s}")
2258
2259# ----------------------------------------------------------------------------
2260#  Main function
2261# ----------------------------------------------------------------------------
2262
2263def main(raw_args=None):
2264    """Main program.  Clean up and update my website."""
2265
2266    # Print the obligatory legal notice.
2267    print("""
2268    updateweb Version 7.1 - A Python utility program which maintains my web site.
2269    Copyright (C) 2007-2024 by Sean Erik O'Connor.  All Rights Reserved.
2270
2271    It deletes temporary files, rewrites old copyright lines and email address
2272    lines in source files, then synchronizes all changes to my web sites.
2273
2274    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2275    GNU General Public License.  This is free software, and you are welcome
2276    to redistribute it under certain conditions; see the GNU General Public
2277    License for details.
2278    """)
2279
2280    # Put ALL the main code into a try block!
2281    try:
2282        # ---------------------------------------------------------------------
2283        #  Load default settings and start logging.
2284        # ---------------------------------------------------------------------
2285
2286        # Default user settings.
2287        user_settings = UserSettings()
2288
2289        print( f"Running main( {raw_args} ) Python version\
2290               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2291               local web directory\
2292               {user_settings.local_root_dir}\n")
2293        # Get command line options such as --verbose.  Pass them back as flags in
2294        # user_settings.
2295        CommandLineSettings(user_settings, raw_args)
2296
2297        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2298        if user_settings.UNITTEST:
2299            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2300            unittest.TextTestRunner(verbosity=2).run(suite)
2301            # We are done!
2302            print("  ...done!", flush=True)
2303            return
2304
2305        # Start logging to file.  Verbose turns on logging for
2306        # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2307        # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2308        if user_settings.VERBOSE:
2309            loglevel = logging.DEBUG
2310        else:
2311            loglevel = logging.WARNING
2312
2313        # Pick the log file name on the host.
2314        if user_settings.CLEAN:
2315            user_settings.LOGFILENAME = "/private/logLocal.txt"
2316        else:
2317            user_settings.LOGFILENAME = "/private/logRemote.txt"
2318
2319        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2320        if not user_settings.MATHJAX:
2321            user_settings.DIR_TO_SKIP += "|mathjax"
2322        else:
2323            print(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files...  ", end='', flush=True)
2324            print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ", end='', flush=True)
2325            logging.debug(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files.")
2326            logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...")
2327
2328        logging.basicConfig(
2329            level=loglevel,
2330            format='%(asctime)s %(levelname)-8s %(message)s',
2331            datefmt='%a, %d %b %Y %H:%M:%S',
2332            filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2333            filemode='w')
2334
2335        logging.debug("********** Begin logging") 
2336
2337        # ---------------------------------------------------------------------
2338        #  Scan the local website, finding out all files and directories.
2339        # ---------------------------------------------------------------------
2340
2341        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2342        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2343        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2344
2345        local = LocalWebSite(user_settings)
2346        local.scan()
2347
2348        # ---------------------------------------------------------------------
2349        # Clean up local website.
2350        # ---------------------------------------------------------------------
2351
2352        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2353        print("Cleaning local web site...  ", end='', flush=True)
2354        logging.debug("========================== Cleaning the local web site")
2355        local.clean()
2356
2357        # We are done with the first scan of the local web site and will dispose of it.
2358        local.finish()
2359        del local
2360
2361        # ---------------------------------------------------------------------
2362        #  Rescan the local website since there will be changes to source
2363        #  files from the clean up stage.
2364        # ---------------------------------------------------------------------
2365
2366        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2367        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2368
2369        local = LocalWebSite(user_settings)
2370
2371        local.scan()
2372
2373        # ---------------------------------------------------------------------
2374        #  List all the local directories and files and their sizes.
2375        # ---------------------------------------------------------------------
2376
2377        # Local website directories.
2378        local_directory_list = local.directories
2379        logging.debug("********** List of all the Local Directories")
2380        for d in local_directory_list:
2381            logging.debug(f"\t {d:s}")
2382
2383        # Generate lists of the local website filenames only, and their sizes in bytes.
2384        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2385        total_number_of_files = len( local_files_name_size_pairs )
2386        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2387        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2388
2389        # Local website filenames only, and their dates and times.
2390        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2391        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2392        for file_datetime_pair in local_file_datetime_pairs:
2393            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2394
2395        # Total number of bytes in the local files.
2396        total_number_of_bytes = 0
2397        for file_size_pair in local_files_name_size_pairs:
2398            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2399            total_number_of_bytes += file_size_pair[1]
2400        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2401
2402        local.finish()
2403
2404        if user_settings.CLEAN:
2405            logging.debug("========================== Done with local file and directory cleanup...")
2406            del local
2407            print("...done!", flush=True)
2408            return
2409
2410        # ---------------------------------------------------------------------
2411        #  Scan the remote hosted web site.
2412        # ---------------------------------------------------------------------
2413
2414        print("Scanning remote web site...", end='', flush=True)
2415        logging.debug("========================== Scanning the remote web site...")
2416
2417        # Pick which website to update.
2418        logging.debug("Connecting to primary remote site.")
2419        remote = RemoteWebSite(user_settings,
2420                               user_settings.SERVER_NAME,
2421                               user_settings.USER_NAME,
2422                               user_settings.PASSWORD_NAME,
2423                               user_settings.FTP_ROOT_NAME)
2424        remote.scan()
2425        remote.finish()
2426
2427        # ---------------------------------------------------------------------
2428        #  List all the remote server directories and files and their sizes.
2429        # ---------------------------------------------------------------------
2430
2431        remote_directory_list = remote.directories
2432        logging.debug("********** Remote Directories")
2433        for d in remote_directory_list:
2434            logging.debug(f"\t {d:s}")
2435
2436        # Local website filenames only, and their sizes in bytes.
2437        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2438        total_number_of_files = len( remote_files_name_size_list )
2439        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2440        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2441        total_number_of_bytes = 0
2442        for file_size in remote_files_name_size_list:
2443            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2444            total_number_of_bytes += file_size[1]
2445        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2446
2447        # ---------------------------------------------------------------------
2448        # Synchronize the local and remote web sites.
2449        # ---------------------------------------------------------------------
2450
2451        print("Synchronizing remote and local web sites...", end='', flush=True)
2452        logging.debug("========================= Synchronizing remote and local web sites...")
2453
2454        # Primary website.
2455        logging.debug("Connecting to primary remote site for synchronization.")
2456        sync = UpdateWeb(user_settings,
2457                         user_settings.SERVER_NAME,
2458                         user_settings.USER_NAME,
2459                         user_settings.PASSWORD_NAME,
2460                         user_settings.FTP_ROOT_NAME,
2461                         user_settings.FILE_SIZE_LIMIT_NAME,
2462                         local.directories,
2463                         local.files,
2464                         remote.directories,
2465                         remote.files)
2466
2467        sync.update()
2468        sync.finish()
2469
2470        del sync
2471        del remote
2472        del local
2473        print("...done!", flush=True)
2474
2475    except UpdateWebException as detail:
2476        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2477
2478    except RecursionError as detail:
2479        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2480
2481if __name__ == '__main__':
2482    """Python executes all code in this file.  Finally, we come here.  
2483
2484    * If we are executing this file as a standalone Python script, 
2485      the name of the current module is set to __main__ and thus we'll call the main() function.
2486
2487    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2488
2489        import updateweb
2490        updateweb.main(["--test"])"""
2491
2492    main()