1#!/usr/bin/env python3
   2# ============================================================================
   3#
   4# NAME
   5#
   6#     updateweb.py
   7#
   8# DESCRIPTION
   9#
  10#     Python script which updates my web sites.
  11#
  12#     It does miscellaneous cleanup on my local copy of the web site on disk,
  13#     including updating copyright information, then synchronizes the local
  14#     copy to my remote server web sites using FTP.
  15#
  16# USAGE
  17#
  18#     It's best to use the associated makefile.
  19#     But you can call this Python utility from the command line,
  20#
  21#     $ python updateweb.py          Clean up my local copy, then use it
  22#                                    to update my remote web server site.
  23#                                    Log warnings and errors.
  24#     $ python updateweb.py -v       Same, but log debug messages also.
  25#     $ python updateweb.py -c       Clean up my local copy only.
  26#     $ python updateweb.py -t       Run unit tests only.
  27#     $ python updateweb.py -m       Upload MathJax files (only need to do this once).
  28#
  29#     We get username and password information from the file PARAMETERS_FILE.
  30#
  31#     Logs are written to the files,
  32#
  33#         logLocal.txt       Local web site cleanup log.
  34#         logRemote.txt      Remote web server update log.
  35#
  36# AUTHOR
  37#
  38#     Sean E. O'Connor        23 Aug 2007  Version 1.0 released.
  39#
  40# LEGAL
  41#
  42#     updateweb.py Version 7.3 - A Python utility program which maintains my web site.
  43#     Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
  44#
  45#     This program is free software: you can redistribute it and/or modify
  46#     it under the terms of the GNU General Public License as published by
  47#     the Free Software Foundation, either version 3 of the License, or
  48#     (at your option) any later version.
  49#
  50#     This program is distributed in the hope that it will be useful,
  51#     but WITHOUT ANY WARRANTY; without even the implied warranty of
  52#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53#     GNU General Public License for more details.
  54#
  55#     You should have received a copy of the GNU General Public License
  56#     along with this program.  If not, see <http://www.gnu.org/licenses/>.
  57#
  58#     The author's address is seanerikoconnor!AT!gmail!DOT!com
  59#     with !DOT! replaced by . and the !AT! replaced by @
  60#
  61# NOTES
  62#
  63#    DOCUMENTATION
  64#
  65#    Python interpreter:               https://www.python.org/
  66#    Python tutorial and reference:    https://docs.python.org/lib/lib.html
  67#    Python debugger:                  https://docs.python.org/3/library/pdb.html
  68#    Python regular expression howto:  https://docs.python.org/3.7/howto/regex.html
  69#
  70# ============================================================================
  71
  72# ----------------------------------------------------------------------------
  73#  Load Python Packages
  74# ----------------------------------------------------------------------------
  75
  76# OS stuff
  77import sys
  78import os
  79import argparse
  80import subprocess
  81import shutil
  82from pathlib import Path
  83
  84# Regular expressions
  85import re
  86
  87# FTP stuff
  88import ftplib
  89
  90# Date and time
  91import time
  92import stat
  93import datetime
  94
  95# Logging
  96import logging
  97
  98# Unit testing
  99import unittest
 100
 101# Enumerated types (v3.4)
 102from enum import Enum
 103from typing import List, Any
 104
 105# YAML configuration files (a superset of JSON!)
 106import yaml 
 107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 108try:
 109    from yaml import CLoader as Loader
 110except ImportError:
 111    from yaml import Loader
 112
 113# Python syntax highlighter.  See https://pygments.org
 114from pygments import highlight
 115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
 116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
 117from pygments.formatters import HtmlFormatter
 118
 119
 120# ----------------------------------------------------------------------------
 121#  Custom Top Level Exceptions.
 122# ----------------------------------------------------------------------------
 123
 124class UpdateWebException(Exception):
 125    """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
 126       Derive from Exception as recommended by Python manual"""
 127    pass
 128
 129# ----------------------------------------------------------------------------
 130#  User settings.
 131# ----------------------------------------------------------------------------
 132
 133class TreeWalkSettings(Enum):
 134    """Enum types for how to walk the directory tree."""
 135    BREADTH_FIRST_SEARCH = 1
 136    DEPTH_FIRST_SEARCH = 2
 137
 138class FileType(Enum):
 139    """'Enum' types for properties of directories and files."""
 140    DIRECTORY = 0
 141    FILE = 1
 142    ON_LOCAL_ONLY = 2
 143    ON_REMOTE_ONLY = 3
 144    ON_BOTH_LOCAL_AND_REMOTE = 4
 145
 146class UserSettings:
 147    """Megatons of user selectable settings."""
 148    # Logging control.
 149    LOGFILENAME = ""
 150    VERBOSE = False  # Verbose mode.  Prints out everything.
 151    CLEAN = False  # Clean the local website only.
 152    UNITTEST = False  # Run a unit test of a function.
 153    MATHJAX = False  # Process and upload MathJax files to server.
 154
 155    # When diving into the MathJax directory, web walking the deep directories
 156    # may exceed Python's default recursion limit of 1000.
 157    RECURSION_DEPTH = 5000
 158    sys.setrecursionlimit(RECURSION_DEPTH)
 159
 160    # Fields in the file information (file_info) structure.
 161    # For example, file_info = 
 162    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
 163    #      1,                                           -- Enum type: Is it a file? dir? on local? on remote? on both?
 164    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a datetime class.
 165    #      4675]                                        -- File size in bytes.
 166    FILE_NAME = 0
 167    FILE_TYPE = 1
 168    FILE_DATE_TIME = 2
 169    FILE_SIZE = 3
 170
 171    # Server settings.
 172    SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
 173    SERVER_NAME = None
 174    USER_NAME = None
 175    PASSWORD_NAME = None
 176    FTP_ROOT_NAME = None
 177    FILE_SIZE_LIMIT_NAME = None
 178
 179    # Map month names onto numbers.
 180    monthToNumber = {
 181        'Jan': 1,
 182        'Feb': 2,
 183        'Mar': 3,
 184        'Apr': 4,
 185        'May': 5,
 186        'Jun': 6,
 187        'Jul': 7,
 188        'Aug': 8,
 189        'Sep': 9,
 190        'Oct': 10,
 191        'Nov': 11,
 192        'Dec': 12}
 193
 194    # List of directories to skip over when processing or uploading the web page.
 195    # Some are private but most are dir of temporary files.
 196    # They will be listed as WARNING in the log.
 197    # Examples:
 198    #     My private admin settings directory.
 199    #     Git or SVN local admin directories.
 200    #     Compile build directories fromXCode.
 201    #     PyCharm build directories.
 202    #     Python cache directories.
 203    #     Jupyter checkpoint directories.
 204    #     XCode temporary file crap.
 205    DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
 206
 207    # List of files to skip when processing or uploading to the web page.
 208    # They will be listed as WARNING in the log.
 209    # Examples:
 210    #     MathJax yml file.
 211    #     .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
 212    FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
 213
 214    # Suffixes for temporary files which will be deleted during the cleanup
 215    # phase.
 216    TEMP_FILE_SUFFIXES = r"""        # Use Python raw strings.
 217        \.                           # Match the dot in the file name.
 218                                     # Now begin matching the file name suffix.
 219                                     # (?: non-capturing match for the regex inside the parentheses,
 220                                     #   i.e. matching string cannot be retrieved later.
 221                                     # Now match any of the following file extensions:
 222        (?: o   | obj | lib |        #     Object files generated by C, C++, etc compilers
 223                              pyc |  #     Object file generated by the Python compiler
 224                  ilk | pdb | sup |  #     Temp files from VC++ compiler
 225            idb | ncb | opt | plg |  #     Temp files from VC++ compiler
 226            sbr | bsc | map | bce |  #     Temp files from VC++ compiler
 227            res | aps | dep | db  |  #     Temp files from VC++ compiler
 228                              jbf |  #     Paintshop Pro
 229                      class | jar |  #     Java compiler
 230                              fas |  #     CLISP compiler
 231                        swp | swo |  #     Vim editor
 232                        toc | aux |  #     TeX auxilliary files (not .synctex.gz or .log)
 233          DS_Store  | _\.DS_Store |  #     macOS finder folder settings.
 234                       _\.Trashes |  #     macOS recycle bin
 235        gdb_history)                 #     GDB history
 236        $                            #     Now we should see only the end of line.
 237        """
 238
 239    # Special case:  Vim temporary files contain a twiddle anywhere in the
 240    # name.
 241    VIM_TEMP_FILE_EXT = "~"
 242
 243    # Suffixes for temporary directories which should be deleted during the
 244    # cleanup phase.
 245    TEMP_DIR_SUFFIX = r"""           # Use Python raw strings.
 246        (?: Debug | Release |        # C++ compiler
 247           ipch   | \.vs    |        # Temp directories from VC++ compiler
 248        \.Trashes | \.Trash)         # macOS recycle bin
 249        $
 250        """
 251
 252    # File extension for an internally created temporary file.
 253    TEMP_FILE_EXT = ".new"
 254
 255    # Identify source file types.
 256    HYPERTEXT_FILE_PATTERN = r"""  # Use Python raw strings.
 257        (\.                        # Match the filename suffix after the .
 258            (?: html | htm |       # HTML hypertext
 259                css)               # CSS style sheet
 260        $)                         # End of line.
 261    """
 262
 263    SOURCE_FILE_PATTERN = r"""      # Use Python raw strings.
 264        (?: makefile$ |             # Any file called makefile is a source file.
 265                                    # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
 266          .vimrc$ |                 # Vim script
 267          (.bashrc$ |               # Bash configuration files.
 268           .bash_profile$ |
 269           .bash_logout$) 
 270          |
 271          (.gitignore$ |             # Git configuration files.
 272           .gitignore_global$ | 
 273           .gitconfig$)
 274          |
 275          (\.                       # Match the filename suffix after the .
 276                                    # Now match any of these suffixes:
 277             (?: 
 278                  c | cpp | h | hpp |   #     C++ and C
 279                  js |                  #     Javascript
 280                  py |                  #     Python
 281                  lsp |                 #     LISP
 282                  ipynb |               #     Jupyter notebook
 283                  m  |                  #     MATLAB
 284                  FOR | for | f |       #     FORTRAN
 285                  yaml |                #     YAML = JSON superset
 286                  tex |                 #     LaTeX
 287                  txt | dat |           #     Data files
 288                  sh)                   #     Bash
 289             $)                         # End of line.
 290         )
 291         """
 292
 293    # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
 294    SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
 295        (?: ^life\.html$          | # We want a listing of this particular HTML file.
 296            ^index\.html$         | # I want to list my top level HTML file.  (There is only one file with this name at the top level web directory.)
 297            ^webPageDesign\.html$ | # and also this HTML example file, but no others.
 298            ^StyleSheet\.css$ )     # I want to list my style sheet.
 299        """
 300
 301    # Files for which we want to generate a syntax highlighted source code listing.
 302    # Uses an f-string combined with a raw-string.
 303    FILE_TO_HIGHLIGHT_PATTERN = fr"""
 304        (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} | 
 305            {SOURCE_FILE_PATTERN} )
 306        """
 307
 308    # Update my email address.
 309    # This is tricky:  Prevent matching and updating the name within in this
 310    # Python source file by using the character class brackets.
 311    OLD_EMAIL_ADDRESS = r"""
 312        artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
 313        """
 314    NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
 315
 316    # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.  
 317    # Read patterns and strings from the updateweb.yaml file.
 318    STRING_REPLACEMENT_LIST = []
 319    # Pairs of test strings and their correct match/replacements.
 320    STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
 321
 322    # Match a copyright line like this:
 323    #     Copyright (C) 1999-2025 by Sean Erik O&#39;Connor.  All Rights Reserved.
 324    # Extract the copyright symbol which can be ascii (C) or HTML &copy; and extract the old year.
 325    TWO_DIGIT_YEAR_FORMAT = "%02d"
 326    COPYRIGHT_LINE = r"""
 327        Copyright                       # Copyright.
 328        \s+                             # One or more spaces.
 329        (?P<symbol> \(C\) | &copy;)     # Match and extract the copyright symbol.
 330        \D+                             # Any non-digits.
 331        (?P<old_year>[0-9]+)            # Match and extract the old copyright year, place it into variable 'old_year'
 332        -                               # hyphen
 333        ([0-9]+)                        # New copyright year.
 334        \s+                             # One or more spaces.
 335        by\s+Sean\sErik                 # Start of my name.  This way we don't rewrite somebody else's copyright notice.
 336        """
 337
 338    # Match a line containing the words,
 339    #    last updated YY
 340    # and extract the two digit year YY.
 341    LAST_UPDATED_LINE = r"""
 342        last\s+         # Match the words "last updated"
 343        updated\s+
 344        \d+             # Day number
 345        \s+             # One or more blanks or tab(
 346        [A-Za-z]+       # Month
 347        \s+             # One or more blanks or tabs
 348        (?P<year>\d+)   # Two digit year.  Place it into the variable 'year'
 349        """
 350
 351    # Web server root directory.
 352    DEFAULT_ROOT_DIR = "/"
 353
 354    # The ftp listing occasionally shows a date newer than the actual date. 
 355    # On my server, it could be 6 months newer when we are near New Year's Day.  Typically the server file time is only a 1 or 2 minutes newer.
 356    # But if the remote file time is much newer, it might be an old file with a bad date/time.  
 357    # Upload the file to be safe.
 358    # How to see the time differences from the log if they are large:
 359    #     egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
 360    #     Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
 361    # How to see the time differences from the log if they are small and we wait and NOT upload:
 362    #    egrep -o "Remote file.*is newer.*days" logRemote.txt
 363    #    Remote file error404.html is newer by    102.0 seconds =      1.7 minutes =      0.0 hours =      0.0 days
 364    #    Remote file index.html is newer by    113.0 seconds =      1.9 minutes =      0.0 hours =      0.0 days
 365    MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
 366
 367    # Upload only if we are newer by more than a few minutes.  Allows for a little slop in time stamps on server or host.
 368    MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
 369
 370    # An ftp list command line should be at least this many chars, or we'll
 371    # suspect and error.
 372    MIN_FTP_LINE_LENGTH = 7
 373
 374    # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
 375    # ftp listings are generally similar to UNIX ls -l listings.
 376    #
 377    # Some examples:
 378    #
 379    # (1) Freeservers ftp listing,
 380    #
 381    #          0        1   2                3           4    5   6   7      8
 382    #     drwxr-xr-x    3 1000             1000         4096 Nov 18  2006 Electronics
 383    #     -rw-r--r--    1 1000             1000        21984 Jun  4 03:46 StyleSheet.css
 384    #     -rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html
 385    #
 386    # (2) atspace ftp listing,
 387    #
 388    #     drwxr-xr-x    3  seanerikoconnor vusers         49 Apr  7  2006 Electronics
 389    #     -rw-r--r--    1  seanerikoconnor vusers      21984 Jun  4 04:03 StyleSheet.css
 390    #
 391    FTP_LISTING = r"""
 392        [drwx-]+            # Unix type file mode.
 393        \s+                 # One or more blanks or tabs.
 394        \d+                 # Number of links.
 395        \s+
 396        \w+                 # Owner.
 397        \s+
 398        \w+                 # Group.
 399        \s+
 400        (?P<bytes> \d+)     # File size in bytes, placed into the variable 'bytes'.
 401        \s+
 402        (?P<mon> \w+)       # Month modified, placed into the variable 'mon'.
 403        \s+
 404        (?P<day> \d+)       # Day modified, placed into the variable 'day'.
 405        \s+
 406        (
 407            (?P<hour> \d+)  # Hour modified, placed into the variable 'hour'.
 408            :
 409            (?P<min> \d+)   # Minute modified, placed into the variable 'min'.
 410            |
 411            (?P<year> \d+)  # If hours and minutes are absent (happens when year is not the current year),
 412                            # extract the year instead.
 413        )
 414        \s+
 415        (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+)    # Path and file name containing letters, numbers,
 416                                                    # and funny characters.  We must escape some of
 417                                                    # these characters with a backslash, \.
 418        """
 419
 420    # HTML header up to the style sheet.
 421    BASIC_HTML_BEGIN = \
 422        """
 423        <!DOCTYPE html>
 424        <html lang="en-US">  <!-- Set language of this page to USA English. -->
 425        
 426        <head>
 427            <!-- This page uses Unicode characters. -->
 428            <meta charset="utf-8">
 429        
 430            <!-- Set viewport to actual device width.  Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
 431            <meta name="viewport" content="width=device-width, initial-scale=1">
 432        
 433            <!-- Title appears in the web browser tab for this page.  The browser also uses it to bookmark this page. -->
 434            <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
 435        
 436            <!-- Search engines will search using words in this description.  They will also display title in their search results. -->
 437            <meta name="description" content="Syntax Colored Source Code Listing">
 438        
 439            <!-- Some content management software uses the author's name. -->
 440            <meta name="author" content="Sean Erik O'Connor">
 441        
 442            <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor.  All Rights Reserved.">   
 443        
 444            <!-- Begin style sheet insertion -->
 445            <style>
 446                /* Default settings for all my main web pages. */
 447                body
 448                {
 449                    /* A wide sans-serif font is more readable on the web. */
 450                    font-family:            Verdana, Geneva, "Trebuchet MS", sans-serif ;
 451        
 452                    /* Set the body font size a little smaller than the user's default browser setting. */
 453                    font-size:              0.8em ; 
 454        
 455                    /* Black text is easier to read. */
 456                    color:                  black ;
 457        
 458                    /*  More vertical space between lines for more pleasant reading.  Use a unitless font height multiplier.  
 459                        Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
 460                    line-height:            1.7 ;
 461                }
 462        
 463                <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
 464        """
 465
 466    # After the style sheet and up to the start of the article in the body.
 467    BASIC_HTML_MIDDLE = \
 468        """
 469            </style>
 470        </head>
 471        
 472        <body>
 473            <article class="content">
 474        """
 475
 476    # After the source code listing, finish the article, body and html document.
 477    BASIC_HTML_END = \
 478        """
 479            </article>
 480        </body>
 481        
 482        </html>
 483        """
 484
 485    def __init__(self):
 486        """Set up the user settings."""
 487
 488        self.local_root_dir = ""
 489
 490        # Import the user settings from the parameter file.
 491        self.get_local_root_dir()
 492        self.get_server_settings()
 493
 494        self.precompile_regular_expressions()
 495
 496    def get_server_settings(self):
 497        """
 498        Read web account private settings from a secret offline parameter file.
 499        These also hold patterns to match and replace in all of our source pages.
 500        """
 501
 502        # Private file which contains my account settings.
 503        settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
 504        # Recommended by
 505        #  https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
 506        try:
 507            stream = open(settings_file_name, "r")
 508        except OSError as detail:
 509            logging.error(f"Cannot open the YAML file {settings_file_name:s}.  Unable to read the settings because: {str(detail):s}")
 510            # Rethrow the exception higher.
 511            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ") from detail
 512        # Read all the YAML documents in the file.
 513        yaml_contents = yaml.load_all(stream, Loader)
 514        yaml_document_list: list[Any] = []
 515        for yaml_doc in yaml_contents:
 516            yaml_document_list.append(yaml_doc)
 517        num_yaml_docs = len(yaml_document_list)
 518        if num_yaml_docs != 2:
 519            logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file.  Aborting...")
 520            raise UpdateWebException("Cannot load the settings.  See the log file for details.  Aborting... ")
 521
 522        # Load all the server settings.
 523        self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
 524        self.USER_NAME = yaml_document_list[0]['ftp_user_name']
 525        self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
 526        self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
 527        self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
 528
 529        # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
 530        self.STRING_REPLACEMENT_LIST = []
 531        pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
 532        for pat_rep in pat_rep_yaml_list:
 533            # Fetch the regular expression and compile it for speed.
 534            verbose_regex = pat_rep['pattern']
 535            pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
 536            # Since we use raw strings, we need to strip off leading and trailing whitespace.
 537            replacement_string = pat_rep['replacement_string'].strip().lstrip()
 538            self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
 539
 540        # Load the test and verify strings.
 541        test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
 542        for test_verify_string in test_verify_strings_list:
 543            test_string = test_verify_string['test_string'].strip().lstrip()
 544            verify_string = test_verify_string['verify_string'].strip().lstrip()
 545            self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
 546
 547        print("  ...done!", flush=True)
 548        return
 549
 550    def get_local_root_dir(self):
 551        """Get the local website root directory on this platform."""
 552
 553        # Each platform has a definite directory for the web page.
 554        local_web_dir_path = "/Desktop/Sean/WebSite"
 555
 556        if sys.platform.startswith('darwin'):
 557            self.local_root_dir = str(Path.home()) + local_web_dir_path
 558        # My Cyperpower PC running Ubuntu Linux.
 559        elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
 560            self.local_root_dir = str(Path.home()) + local_web_dir_path
 561        return
 562
 563    def precompile_regular_expressions(self):
 564        """For speed precompile the regular expression search patterns."""
 565        self.COPYRIGHT_LINE            = re.compile(self.COPYRIGHT_LINE,            re.VERBOSE | re.IGNORECASE)
 566        self.FTP_LISTING               = re.compile(self.FTP_LISTING,               re.VERBOSE | re.IGNORECASE)
 567        self.TEMP_FILE_SUFFIXES        = re.compile(self.TEMP_FILE_SUFFIXES,        re.VERBOSE | re.IGNORECASE)
 568        self.TEMP_DIR_SUFFIX           = re.compile(self.TEMP_DIR_SUFFIX,           re.VERBOSE)
 569        self.SOURCE_FILE_PATTERN       = re.compile(self.SOURCE_FILE_PATTERN,       re.VERBOSE)
 570        self.HYPERTEXT_FILE_PATTERN    = re.compile(self.HYPERTEXT_FILE_PATTERN,    re.VERBOSE)
 571        self.OLD_EMAIL_ADDRESS         = re.compile(self.OLD_EMAIL_ADDRESS,         re.VERBOSE | re.IGNORECASE)
 572        self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
 573        self.LAST_UPDATED_LINE         = re.compile(self.LAST_UPDATED_LINE,         re.VERBOSE | re.IGNORECASE)
 574
 575# ----------------------------------------------------------------------------
 576#  Unit test individual functions.
 577# ----------------------------------------------------------------------------
 578
 579class UnitTest(unittest.TestCase):
 580    """Initialize the UnitTest class."""
 581    def setUp(self):
 582        self.user_settings = UserSettings()
 583        self.user_settings.get_local_root_dir()
 584
 585    def tearDown(self):
 586        """Clean up the UnitTest class."""
 587        self.user_settings = None
 588
 589    def test_copyright_updating(self):
 590        """Test copyright line updating to the current year."""
 591        # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
 592        line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 593        line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor.  All Rights Reserved. Copyright &copy; 1999-2025 by Sean Erik O'Connor"
 594        pat = self.user_settings.COPYRIGHT_LINE
 595        match = pat.search(line_before_update)
 596
 597        if match:
 598            old_year = int(match.group('old_year'))
 599            # Same as call to self.get_current_year():
 600            current_year = int(time.gmtime()[0])
 601            if old_year < current_year:
 602                # We matched and extracted the old copyright symbol into the variable
 603                # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
 604                # We now insert it back by placing the special syntax
 605                # \g<symbol> into the replacement string.
 606                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
 607                line_after_update_computed = pat.sub(new_copyright, line_before_update)
 608                self.assertEqual(
 609                    line_after_update_actual,
 610                    line_after_update_computed,
 611                    f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
 612            else:
 613                print( "old_year >= current_year" )
 614                self.fail()
 615        else:
 616            print( "no match for copyright pattern" )
 617            self.fail()
 618
 619    def test_extract_filename_from_ftp_listing(self):
 620        """Test parsing an FTP listing."""
 621        ftp_line = "-rw-r--r--    1 1000             1000         2901 Sep 26 17:12 allclasses-frame.html"
 622        extracted_file_name = "allclasses-frame.html"
 623        pat = self.user_settings.FTP_LISTING
 624        match = pat.search(ftp_line)
 625        if match:
 626            filename = match.group('filename')
 627            self.assertEqual(
 628                filename,
 629                extracted_file_name,
 630                f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
 631        else:
 632            self.fail()
 633
 634    def test_get_file_time_and_date(self):
 635        """Test getting a file time and date."""
 636        # Point to an old file.
 637        file_name = "./Images/home.png"
 638        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 639        # Get the UTC time.
 640        file_epoch_time = os.path.getmtime(full_file_name)
 641        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 642        # Create a datetime object for the file.
 643        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 644        # Check if the file time matches what we would see if we did ls -l <file_name>
 645        computed = f"file {file_name:s} datetime {d.ctime():s}"
 646        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 647        self.assertEqual(computed, actual)
 648
 649    def test_set_file_time_and_date(self):
 650        """Test setting a file time and date."""
 651        file_name = "./Images/home.png"
 652        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 653        # Create a temporary file in the same directory.
 654        temp_file_name = "temporal.tmp"
 655        full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
 656        try:
 657            with open(full_temp_file_name, 'w') as fp:
 658                fp.write("The End of Eternity")
 659        except OSError as detail:
 660            logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s}  Aborting...")
 661            raise UpdateWebException("Failed the unit test for setting time and date of a file.  See the log file for details.  Aborting...") from detail
 662        # Get the old file time.  Set the temporary file to the same time.
 663        file_stat = os.stat(full_file_name)
 664        os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
 665        # What is the temporary file's time now?
 666        file_epoch_time = os.path.getmtime(full_temp_file_name)
 667        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 668        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 669        # Is the temporary file time set properly?
 670        computed = f"file {file_name:s} datetime {d.ctime():s}"
 671        actual = "file ./Images/home.png datetime Wed Jan  1 03:42:41 2025"
 672        self.assertEqual(computed, actual)
 673        os.remove(full_temp_file_name)
 674
 675    def test_difference_of_time_and_date(self):
 676        """Test a date difference calculation."""
 677        file_name = "./Images/home.png"
 678        full_file_name = self.user_settings.local_root_dir + '/' + file_name
 679        # Get the UTC time.
 680        file_epoch_time = os.path.getmtime(full_file_name)
 681        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
 682        # Create a datetime object for the file.
 683        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])  # datetime class;  year, month, day, hour, minute, seconds.
 684        # Slightly change the date and time by adding 1 minute.
 685        d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1)  # year, month, day, hour, minute, second
 686        time_delta = d2 - d
 687        seconds_different = time_delta.total_seconds()
 688        minutes_different = seconds_different / 60.0
 689        hours_different = minutes_different / 60.0
 690        days_different = hours_different / 24.0
 691        computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
 692        actual = "difference  0.00001 days,  0.00028 hours  0.01667 minutes,  1.00000 seconds"
 693        self.assertEqual(computed, actual)
 694
 695    def test_pattern_match_dir_to_skip(self):
 696        """Test if skipping certain named directories is recoginizing the dir names."""
 697        dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
 698        pat = re.compile(self.user_settings.DIR_TO_SKIP)
 699        if pat.search(dir_skip):
 700            self.assertTrue(True)
 701        else:
 702            self.assertTrue(False)
 703
 704    def test_file_name_to_syntax_highlight(self):
 705        """Test if syntax highlighting recognizes file names to highlight."""
 706        file_name1 = "Computer/hello.lsp"
 707        file_name2 = "Computer/life.html"
 708        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
 709        if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
 710            self.assertTrue(True)
 711        else:
 712            self.assertTrue(False)
 713
 714    def test_user_settings(self):
 715        """Test whether user settings are correctly initialized."""
 716        computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
 717        actual = "File size limit = 50000 K"
 718        self.assertEqual(computed, actual, "File size limit settings are incorrect.")
 719
 720    def test_check_replace_substring(self,debug=True):
 721        """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
 722           For troubleshooting, turn on debug.
 723        """
 724        test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
 725        # Iterate over all test strings.
 726        for pair in test_verify_pairs:
 727            [test_string, verify_string] = pair
 728            if debug:
 729                print( f">>>>>>> next test string   = {test_string}")
 730                print( f">>>>>>> next verify string = {verify_string}")
 731            # Iterate over all patterns and replacements.
 732            for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
 733                [pat, rep_string] = match_replace_tuple
 734                print( f"\t-------> next pattern = {pat}") 
 735                print( f"\t-------> next replacement = {rep_string}") 
 736                match = pat.search(test_string)
 737                # The pattern match succeeds.
 738                if match:
 739                    try:
 740                        sub = pat.sub(rep_string, test_string)
 741                        # String replacement succeeds for this pattern/replace pair iteration.
 742                        if debug:
 743                            print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
 744                        test_string = sub
 745                    except IndexError as detail:
 746                        print(f"\t\t.......> Caught an exception: {str(detail):s}.  Replacement failed.")
 747                        if debug:
 748                            self.assertTrue(False)
 749                elif debug:
 750                    print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
 751                # No match, so go on to the next pattern and don't change test_string.
 752            # Done with all pattern/replace on test string.
 753            # Check this test string in the list.
 754            self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
 755            if debug:
 756                print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
 757
 758# ----------------------------------------------------------------------------
 759#  Command line options.
 760# ----------------------------------------------------------------------------
 761
 762class CommandLineSettings(object):
 763    """Get the command line options."""
 764
 765    def __init__(self, user_settings, raw_args=None):
 766        """Get command line options"""
 767        command_line_parser = argparse.ArgumentParser(
 768            description="updateweb options")
 769
 770        # Log all changes, not just warnings and errors.
 771        command_line_parser.add_argument(
 772            "-v",
 773            "--verbose",
 774            help="Turn on verbose mode to log everything",
 775            action="store_true")
 776
 777        # Clean up the local website only.
 778        command_line_parser.add_argument(
 779            "-c",
 780            "--clean",
 781            help="Do a cleanup on the local web site only.",
 782            action="store_true")
 783
 784        # Clean up the local website only.
 785        command_line_parser.add_argument(
 786            "-m",
 787            "--mathjax",
 788            help="""ALSO upload mathjax directory.\
 789            Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
 790            You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
 791            NOTE:  If you did reset your server and delete all files, run the command    find . -name '*.*' -exec touch {} \\;    from the web page root directory.\
 792            Also run   find . -name '*' -exec touch {} \\;    This will ensure accurate times on the server.""",
 793            action="store_true")
 794
 795        # Run unit tests only.
 796        command_line_parser.add_argument("-t", "--test",
 797                                         help="Run unit tests.",
 798                                         action="store_true")
 799
 800        args = command_line_parser.parse_args(raw_args)
 801
 802        if args.verbose:
 803            user_settings.VERBOSE = True
 804        if args.clean:
 805            user_settings.CLEAN = True
 806        if args.test:
 807            user_settings.UNITTEST = True
 808        if args.mathjax:
 809            user_settings.MATHJAX = True
 810
 811# ----------------------------------------------------------------------------
 812#  Base class which describes my web site overall.
 813# ----------------------------------------------------------------------------
 814
 815class WebSite(object):
 816    """
 817    Abstract class used for analyzing both local and remote (ftp server) websites.
 818    Contains the web-walking functions which traverse the directory structures and files.
 819    These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
 820    Child classes may define additional functions which only they need.
 821    """
 822
 823    def __init__(self, settings):
 824        """Set up root directories"""
 825
 826        # Import the user settings.
 827        self.user_settings = settings
 828
 829        # Queue keeps track of directories not yet processed.
 830        self.queue = []
 831
 832        # List of all directories traversed.
 833        self.directories = []
 834
 835        # List of files traversed, with file information.
 836        self.files = []
 837
 838        # Find out the root directory and go there.
 839        self.root_dir = self.get_root_dir()
 840        self.go_to_root_dir(self.root_dir)
 841
 842    # This is a Python decorator which says get_current_year is a class function.  And so there is no self first argument, and you can call it without creating an 
 843    # instance of this class.  Call it from anywhere, inside or outside the class, using WebSite.get_current_year().  You could just create a global function instead.)
 844    @staticmethod
 845    def get_current_year():
 846        """Get the current year."""
 847        return int(time.gmtime()[0])
 848
 849    @staticmethod
 850    def get_current_two_digit_year():
 851        """Get the last two digits of the current year."""
 852        return WebSite.get_current_year() % 100
 853
 854    @staticmethod
 855    def is_file_info_type(file_info):
 856        """Check if we have a file information structure or merely a simple file name."""
 857        try:
 858            if isinstance(file_info, list):
 859                return True
 860            elif isinstance(file_info, str):
 861                return False
 862            else:
 863                logging.error("is_file_info_type found a bad type.  Aborting...")
 864                raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ")
 865        except TypeError as detail:
 866            logging.error(f"is_file_info_type found a bad type {str(detail):s}.  Aborting...")
 867            raise UpdateWebException("Internal error for file type.  See the log file for details.  Aborting... ") from detail
 868
 869    def get_root_dir(self):
 870        """Subclass:  Put code here to get the root directory"""
 871        return ""
 872
 873    def go_to_root_dir(self, root_dir):
 874        """Subclass:  Put code here to go to the root directory"""
 875        pass  # Pythons's do-nothing statement.
 876
 877    def one_level_down(self, d):
 878        """Subclass:  Fill in with a method which returns a list of the
 879        directories and files immediately beneath dir"""
 880        return [], []
 881
 882    def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
 883        """Walk a directory in either depth first or breadth first order.  BFS is the default."""
 884
 885        # Get all subfiles and subdirectories off this node.
 886        subdirectories, subfiles = self.one_level_down(d)
 887
 888        # Add all the subfiles in order.
 889        for f in subfiles:
 890
 891            name = self.strip_root(f)
 892            logging.debug(f"Webwalking:  Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
 893
 894            # Some files are private so skip them from consideration.
 895            pat = re.compile(self.user_settings.FILE_TO_SKIP)
 896
 897            if pat.search(name[self.user_settings.FILE_NAME]):
 898                logging.warning(
 899                    f"Webwalking:  Skipping private file {name[self.user_settings.FILE_NAME]:s}")
 900            # Don't upload the log file due to file locking problems.
 901            elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
 902                logging.debug(f"Webwalking:  Skipping log file {name[self.user_settings.FILE_NAME]:s}")
 903            # File size limit on some servers.
 904            else:
 905                self.files.append(name)
 906
 907        # Queue up the subdirectories.
 908        for d in subdirectories:
 909            # Some directories are private such as .git or just temporary file
 910            # caches so skip them from consideration.
 911            pat = re.compile(self.user_settings.DIR_TO_SKIP)
 912            if pat.search(d):
 913                logging.warning(f"Webwalking:  Skipping private dir {d:s}")
 914            else:
 915                logging.debug(f"Webwalking:  Pushing dir {d:s} on the queue.")
 916                self.queue.append(d)
 917
 918        # Search through the directories.
 919        while len(self.queue) > 0:
 920            # For breadth first search, remove from beginning of queue.
 921            if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
 922                d = self.queue.pop(0)
 923
 924            # For depth first search, remove from end of queue.
 925            elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
 926                d = self.queue.pop()
 927            else:
 928                d = self.queue.pop(0)
 929
 930            name = self.strip_root(d)
 931            logging.debug(f"Webwalking:  Adding relative directory {name:s} to list, full path = {d:s}.")
 932            self.directories.append(name)
 933
 934            self.walk(d)
 935
 936    def strip_root(self, file_info):
 937        """Return a path, but strip off the root directory"""
 938
 939        root = self.root_dir
 940
 941        # Extract the file name.
 942        if self.is_file_info_type(file_info):
 943            name = file_info[self.user_settings.FILE_NAME]
 944        else:
 945            name = file_info
 946
 947        # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
 948        # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
 949        # Art/foo.txt
 950        lenroot = len(root)
 951        if root == self.user_settings.DEFAULT_ROOT_DIR:
 952            pass
 953        else:
 954            lenroot = lenroot + 1
 955
 956        stripped_path = name[lenroot:]
 957
 958        if self.is_file_info_type(file_info):
 959            # Update the file name only.
 960            return [stripped_path,
 961                    file_info[self.user_settings.FILE_TYPE],
 962                    file_info[self.user_settings.FILE_DATE_TIME],
 963                    file_info[self.user_settings.FILE_SIZE]]
 964        else:
 965            return stripped_path
 966
 967    def append_root_dir(self, root_dir, name):
 968        """Append the root directory to a path"""
 969
 970        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
 971        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
 972        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
 973            return root_dir + name
 974        else:
 975            return root_dir + "/" + name
 976
 977    def scan(self):
 978        """Scan the directory tree recursively from the root"""
 979        logging.debug(f"Webwalking:  Beginning recursive directory scan from root directory {self.root_dir:s}")
 980        self.walk(self.root_dir)
 981
 982    def modtime(self, f):
 983        """Subclass:  Get file modification time"""
 984        pass
 985
 986    def finish(self):
 987        """Quit web site"""
 988        logging.debug(f"Finished with WebSite object of class {type(self)}")
 989        pass
 990
 991# ----------------------------------------------------------------------------
 992#  Subclass which knows about the local web site on disk.
 993# ----------------------------------------------------------------------------
 994
 995class LocalWebSite(WebSite):
 996    """Walk the local web directory on local disk down from the root.
 997    Clean up temporary files and do other cleanup work."""
 998
 999    def __init__(self, settings):
1000        """Go to web page root and list all files and directories."""
1001
1002        # Initialize the parent class.
1003        WebSite.__init__(self, settings)
1004
1005        self.root_dir = self.get_root_dir()
1006        logging.debug(f"LocalWebSite.__init__():  \tRoot directory: {self.root_dir:s}")
1007
1008    def get_root_dir(self):
1009        """Get the name of the root directory"""
1010        return self.user_settings.local_root_dir
1011
1012    def go_to_root_dir(self, root_dir):
1013        """Go to the root directory"""
1014
1015        # Go to the root directory.
1016        logging.debug(f"LocalWebSite.go_to_root_dir():  \tchdir to root directory:  {root_dir:s}")
1017        os.chdir(root_dir)
1018
1019        # Read it back.
1020        self.root_dir = os.getcwd()
1021        logging.debug(f"LocalWebSite.go_to_root_dir():  \tgetcwd root directory:  {self.root_dir:s}")
1022
1023    def one_level_down(self, d):
1024        """List all files and subdirectories in the current directory, dir.  For files, collect file info
1025        such as time, date and size."""
1026
1027        directories = []
1028        files = []
1029
1030        # Change to current directory.
1031        os.chdir(d)
1032
1033        # List all subdirectories and files.
1034        dir_list = os.listdir(d)
1035
1036        if dir_list:
1037            for line in dir_list:
1038                # Add the full path prefix from the root.
1039                name = self.append_root_dir(d, line)
1040                logging.debug(f"LocalWebSite.one_level_down():  \tlocal dir or file {name:s}")
1041
1042                # Is it a directory or a file?
1043                if os.path.isdir(name):
1044                    directories.append(name)
1045                elif os.path.isfile(name):
1046                    # First assemble the file information of name, time/date and size into a list.
1047                    # Can index it like an array.  For example,
1048                    # file_info = 
1049                    #   [ '/WebDesign/EquationImages/equation001.png',  -- The file name.
1050                    #      1,                                           -- Enum type FileType.FILE = 1.
1051                    #      datetime.datetime(2010, 2, 3, 17, 15),       -- UTC encoded in a date/time class.
1052                    #      4675]                                        -- File size in bytes.
1053                    file_info = [name,
1054                                 FileType.FILE,
1055                                 self.get_file_date_time(name),
1056                                 self.get_file_size(name)]
1057                    files.append(file_info)
1058
1059        # Sort the names into order.
1060        if directories:
1061            directories.sort()
1062        if files:
1063            files.sort()
1064
1065        return directories, files
1066
1067    @staticmethod
1068    def get_file_date_time(file_name):
1069        """Get a local file time and date in UTC."""
1070
1071        file_epoch_time = os.path.getmtime(file_name)
1072        file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1073        # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1074        d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1075        return d
1076
1077    @staticmethod
1078    def get_file_size(file_name):
1079        """Get file size in bytes."""
1080        return os.path.getsize(file_name)
1081
1082    @staticmethod
1083    def clean_up_temp_file(temp_file_name, file_name, changed):
1084        """Remove the original file, rename the temporary file name to the original name.
1085        If there are no changes, just remove the temporary file.
1086        """
1087
1088        if changed:
1089            # Remove the old file now that we have the rewritten file.
1090            try:
1091                os.remove(file_name)
1092                logging.debug(f"Changes were made.  Removed original file {file_name:s}")
1093            except OSError as detail:
1094                logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}.  Need to remove it manually.")
1095
1096            # Rename the new file to the old file name.
1097            try:
1098                os.rename(temp_file_name, file_name)
1099                logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1100            except OSError as detail:
1101                logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}.  Need to rename manually")
1102        else:
1103            # No changes?  Remove the temporary file.
1104            try:
1105                os.remove(temp_file_name)
1106                logging.debug(f"No changes were made.  Removed temporary file {temp_file_name:s}")
1107            except OSError as detail:
1108                logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}.  Need to remove it manually.")
1109        return
1110
1111    @staticmethod
1112    def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1113        """
1114        Process each line of a file with a list of functions.  Create a new temporary file.
1115
1116        The default list is None which means make an exact copy.
1117        """
1118
1119        # Assume no changes.
1120        changed = False
1121
1122        # Open both input and output files for processing.  Check if we cannot do it.
1123        fin = None
1124        try:
1125            fin = open(in_file_name, "r")
1126        except IOError as detail:
1127            logging.error(f"process_lines_of_file():  \tCannot open file {in_file_name:s} for reading:  {str(detail):s} Aborting...")
1128            if fin is not None:
1129                fin.close()
1130            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1131        fout = None
1132        try:
1133            fout = open(out_file_name, "w")
1134        except IOError as detail:
1135            logging.error(f"process_lines_of_file():  \tCannot open file {out_file_name:s} for writing:  {str(detail):s} Aborting...")
1136            if fout is not None:
1137                fout.close()
1138            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1139
1140        # Read each line of the file, aborting if there is a read error.
1141        try:
1142            line = fin.readline()
1143
1144            # Rewrite the next line of the file using all the rewrite functions.
1145            while line:
1146                original_line = line
1147                # If we have one or more rewrite functions...
1148                if process_line_function_list is not None:
1149                    # ...apply each rewrite functions to the line, one after the other in order.
1150                    for processLineFunction in process_line_function_list:
1151                        if processLineFunction is not None:
1152                            line = processLineFunction(line)
1153
1154                if original_line != line:
1155                    logging.debug(f"Rewrote the line:    >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1156                    changed = True
1157
1158                fout.write(line)
1159
1160                line = fin.readline()
1161
1162            fin.close()
1163            fout.close()
1164        except IOError as detail:
1165            logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s}  Aborting...")
1166            raise UpdateWebException("Internal error for processing a file.  See the log file for details.  Aborting... ") from detail
1167
1168        if changed:
1169            logging.debug(f"process_lines_of_file():  \tRewrote original file {in_file_name:s}."
1170                          f"Changes are in temporary copy {out_file_name:s}")
1171
1172        # Return True if any lines were changed.
1173        return changed
1174
1175    def clean(self):
1176        """Scan through all directories and files in the local on disk website and clean them up."""
1177
1178        num_source_files_changed = 0
1179        num_source_files_syntax_highlighted = 0
1180
1181        logging.debug("Cleaning up the local web page.")
1182
1183        if self.directories is None or self.files is None:
1184            logging.error("Web site has no directories or files.  Aborting...")
1185            raise UpdateWebException("Internal error for cleaning up the local web site.  See the log file for details.  Aborting... ")
1186
1187        for d in self.directories:
1188
1189            if self.is_temp_dir(d):
1190                # Add the full path prefix from the root.
1191                name = self.append_root_dir(self.get_root_dir(), d)
1192                try:
1193                    logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1194                    shutil.rmtree(name)
1195                except OSError as detail:
1196                    logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1197
1198        for f in self.files:
1199            # Add the full path prefix from the root.
1200            full_file_name = self.append_root_dir(
1201                self.get_root_dir(), f[self.user_settings.FILE_NAME])
1202
1203            # Remove all temporary files.
1204            if self.is_temp_file(f):
1205                try:
1206                    logging.debug(f"Removing temp file {full_file_name:s}")
1207                    os.remove(full_file_name)
1208                except OSError as detail:
1209                    logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1210
1211            # Update source code files.
1212            if self.is_source_or_hypertext_file(f):
1213                changed = self.rewrite_source_file(full_file_name)
1214                if changed:
1215                    num_source_files_changed += 1
1216                    logging.debug(f"Rewrote source code file {self.root_dir:s}")
1217
1218            # Generate a  syntax highlighted code listing.  
1219            # Make it the same time and date as the original code.  Then, only if there are recent changes, we will update the remote server.
1220            if self.is_file_to_syntax_highlight(f):
1221                # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1222                syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1223                if syntax_highlighted_file_name is not None:
1224                    logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1225                else:
1226                    logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1227                num_source_files_syntax_highlighted += 1
1228
1229        logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1230        logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1231
1232    def is_temp_file(self, file_info):
1233        """Identify a file name as a temporary file"""
1234
1235        file_name = file_info[self.user_settings.FILE_NAME]
1236
1237        # Suffixes and names for temporary files be deleted.
1238        pat = self.user_settings.TEMP_FILE_SUFFIXES
1239        match = pat.search(file_name)
1240        # Remove any files containing twiddles anywhere in the name.
1241        if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1242            return True
1243
1244        return False
1245
1246    def is_temp_dir(self, dir_name):
1247        """Identify a name as a temporary directory."""
1248
1249        p = self.user_settings.TEMP_DIR_SUFFIX
1250        return p.search(dir_name)
1251
1252    def is_source_or_hypertext_file(self, file_info):
1253        """ Check if the file name is a source file or a hypertext file."""
1254
1255        file_name = file_info[self.user_settings.FILE_NAME]
1256        p1 = self.user_settings.SOURCE_FILE_PATTERN
1257        p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1258        if p1.search(file_name) or p2.search(file_name):
1259            return True
1260        else:
1261            return False
1262
1263    def is_file_to_syntax_highlight(self, file_info):
1264        """Check if this file type should have a syntax highlighted source listing."""
1265
1266        # Take apart the file name.
1267        full_file_name = file_info[self.user_settings.FILE_NAME]
1268        file_name = Path(full_file_name).name
1269
1270        p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1271        if p.search(file_name):
1272            return True
1273        else:
1274            return False
1275
1276    def rewrite_substring(self, line):
1277        """Rewrite a line containing a pattern of your choice"""
1278
1279        # Start with the original unchanged line.
1280        rewritten_line = line
1281
1282        # Do the replacements in order from first to last.
1283        for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1284            # Get the next pattern match replacement string tuple.
1285            [pat, rep_string] = match_replace_tuple
1286            # Does it match?  Then do string substitution, else leave the line unchanged.
1287            match = pat.search(rewritten_line)
1288            if match:
1289                # Now we have these cases:
1290                #     -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1291                #     -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ... 
1292                #      If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1293                try:
1294                    sub = pat.sub(rep_string, rewritten_line)
1295                    rewritten_line = sub
1296                except IndexError as detail:
1297                    logging.error(f"ERROR: {str(detail):s}.  Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml.    Did not rewrite the line |{rewritten_line:s}|")
1298 
1299        return rewritten_line
1300
1301    def rewrite_email_address_line(self, line):
1302        """Rewrite lines containing old email addresses."""
1303
1304        # Search for the old email address.
1305        pat = self.user_settings.OLD_EMAIL_ADDRESS
1306        match = pat.search(line)
1307
1308        # Replace the old address with my new email address.
1309        if match:
1310            new_address = self.user_settings.NEW_EMAIL_ADDRESS
1311            sub = pat.sub(new_address, line)
1312            line = sub
1313
1314        return line
1315
1316    def rewrite_copyright_line(self, line):
1317        """Rewrite copyright lines if they are out of date."""
1318
1319        # Match the lines,
1320        #     Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1321        #     Copyright &copy; nnnn-mmmm by Sean Erik O'Connor.
1322        # and pull out the old year and save it.
1323        pat = self.user_settings.COPYRIGHT_LINE
1324        match = pat.search(line)
1325
1326        # Found a match.
1327        if match:
1328            old_year = int(match.group('old_year'))
1329
1330            # Replace the old year with the current year.
1331            # We matched and extracted the old copyright symbol into the variable
1332            # 'symbol' using the pattern syntax (?P<symbol> \(C\) | &copy;)
1333            # We now insert it back by placing the special syntax \g<symbol>
1334            # into the replacement string.
1335            if old_year < WebSite.get_current_year():
1336                new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1337                sub = pat.sub(new_copyright, line)
1338                line = sub
1339        return line
1340
1341    def rewrite_last_update_line(self, line):
1342        """Rewrite the Last Updated line if the year is out of date."""
1343
1344        # Match the last updated line and pull out the year.
1345        #      last updated 01 Jan 25.
1346        p = self.user_settings.LAST_UPDATED_LINE
1347        m = p.search(line)
1348
1349        if m:
1350            last_update_year = int(m.group('year'))
1351
1352            # Convert to four digit years.
1353            if last_update_year > 90:
1354                last_update_year += 1900
1355            else:
1356                last_update_year += 2000
1357
1358            # If the year is old, rewrite to "01 Jan <current year>".
1359            if last_update_year < WebSite.get_current_year():
1360                two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1361                sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1362                line = sub
1363
1364        return line
1365
1366    def rewrite_source_file(self, file_name):
1367        """Rewrite copyright lines, last updated lines, etc."""
1368        changed = False
1369
1370        # Create a new temporary file name for the rewritten file.
1371        temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1372
1373        # Apply changes to all lines of the temporary file.  Apply change functions in
1374        # the sequence listed.
1375        if self.process_lines_of_file(file_name, temp_file_name,
1376                                      [self.rewrite_copyright_line,
1377                                       self.rewrite_last_update_line,
1378                                       self.rewrite_email_address_line,
1379                                       self.rewrite_substring]):
1380            logging.debug(f"Changed (rewritten) source file {file_name:s}")
1381            changed = True
1382
1383        # Rename the temporary file to the original file name.  If no changes, just delete the temp file.
1384        self.clean_up_temp_file(temp_file_name, file_name, changed)
1385
1386        return changed
1387
1388    @staticmethod
1389    def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1390        """Create a syntax highlighted source listing for the file and return its name.  Return None if there is an error.
1391        Keep the same date/time as the original file."""
1392
1393        # kwargs is a dictionary for key, value in kwargs.items():
1394        # for key, value in kwargs.items():
1395        #    if key in kwargs:
1396        #        print( f"kwargs:" )
1397        #        print( f"  key   = |{key}|")
1398        #        print( f"  value = |{value}|" )
1399        dry_run_value = kwargs.get('dry_run') 
1400        dry_run = False
1401        if dry_run_value is not None and dry_run_value is True:
1402            dry_run = True
1403
1404        # Take apart the file name.
1405        file_name_without_extension = Path(source_file_name).stem
1406        file_extension = Path(source_file_name).suffix
1407
1408        # Append *.html to the source code file name.  This will be the syntax highlighted code listing.
1409        syntax_highlighted_file_name = f"{source_file_name}.html"
1410
1411        # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1412        if file_extension == ".ipynb":
1413            if dry_run:
1414                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1415                return None
1416            # Python manual recommends using the run() command instead of Popen().  See https://docs.python.org/3/library/subprocess.html#subprocess.run
1417            try:
1418                shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1419                # Throw an exception if we can't run the process.  
1420                # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1421                # Since the shell command is a single string, use shell=True in the run() command.
1422                subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1423            except subprocess.CalledProcessError as detail: 
1424                logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s}  Aborting...")
1425                return None
1426        # Otherwise, use the Pygments syntax highlighter.
1427        else:
1428            # First choose the language lexer from the file name itself if there's no extension.
1429            # Dotted file names are treated as the entire file name.
1430            match file_name_without_extension:
1431                case "makefile":
1432                    lexer = MakefileLexer()
1433                case ".bash_profile"|".bashrc"|".bash_logout":
1434                    lexer = BashLexer()
1435                case ".vimrc":
1436                    lexer = VimLexer()
1437                case ".gitignore_global" | ".gitignore" | ".gitconfig":
1438                    lexer = OutputLexer() # No formatting.
1439                case _:
1440                    # Choose the language lexer from the file extension.  Web stuff first, then programming languages.
1441                    match file_extension:
1442                        case ".html":
1443                            lexer = HtmlLexer()
1444                        case ".css":
1445                            lexer = CssLexer()
1446                        case ".js":
1447                            lexer = JavascriptLexer()
1448                        case ".sh":
1449                            lexer = BashLexer()
1450                        case ".py":
1451                            lexer = PythonLexer()
1452                        case ".c" | ".h":
1453                            lexer = CLexer()
1454                        case ".hpp" | ".cpp":
1455                            lexer = CppLexer()
1456                        case ".lsp":
1457                            lexer = CommonLispLexer()
1458                        case ".for" | ".FOR" | ".f":
1459                            lexer = FortranFixedLexer()  # Fixed format FORTRAN, not FORTRAN 90.
1460                        case ".txt" | ".dat":            # Generic data file;  no formatting.
1461                            lexer = OutputLexer()
1462                        case ".tex":
1463                            lexer = TexLexer()           # LaTeX, TeX, or related files.
1464                        case ".m":
1465                            lexer = MatlabLexer()
1466                        case ".yaml":
1467                            lexer = YamlLexer()
1468                        case _:
1469                            logging.error(f"Can't find a lexer for file {source_file_name}.  Cannot generate a syntax highlighted source listing.  Aborting...")
1470                            return None
1471
1472            # Read the source code file into a single string.
1473            try:
1474                with open(source_file_name, 'r') as fp:
1475                    source_file_string = fp.read()
1476            except OSError as detail:
1477                logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s}  Aborting...")
1478
1479            # Top level Pygments function generates the HTML for the highlighted code.
1480            highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1481
1482            # The style sheet is always the same for all languages.
1483            style_sheet = HtmlFormatter().get_style_defs('.highlight')
1484
1485            # Write out the syntax colored file.
1486            if dry_run:
1487                logging.debug(f"Dry run only:  don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1488                return None
1489            else:
1490                try:
1491                    # Write out the highlighted code listing in HTML with CSS style sheet attached.
1492                    with open(syntax_highlighted_file_name, 'w') as fp:
1493                        fp.write(UserSettings.BASIC_HTML_BEGIN)
1494                        fp.write(style_sheet)
1495                        fp.write(UserSettings.BASIC_HTML_MIDDLE)
1496                        fp.write(highlighted_html_source_file_string)
1497                        fp.write(UserSettings.BASIC_HTML_END)
1498                except OSError as detail:
1499                    logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s}  Aborting...")
1500        # ------- end Pygments syntax highlighter
1501
1502        # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1503        file_stat = os.stat(source_file_name)
1504        os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1505
1506        # Are the original source and the syntax highlighted code the same data and time?
1507        dates_and_times_source_file_name             = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1508        dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1509        if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1510            logging.error(f"Source code and syntax highlighted source don't have the same times.  source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1511            return None
1512
1513        logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1514        return syntax_highlighted_file_name
1515
1516# ----------------------------------------------------------------------------
1517#   Subclass which knows about the remote web site.
1518# ----------------------------------------------------------------------------
1519
1520class RemoteWebSite(WebSite):
1521    """Walk the remote web directory on a web server down from the root.
1522       Use FTP commands:
1523           https://en.wikipedia.org/wiki/List_of_FTP_commands
1524       Use the Python ftp library:
1525           https://docs.python.org/3/library/ftplib.html
1526    """
1527
1528    def __init__(self, user_settings):
1529        """Connect to FTP server and list all files and directories."""
1530
1531        # Root directory of FTP server.
1532        self.root_dir = user_settings.FTP_ROOT_NAME
1533        logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1534
1535        # Connect to FTP server and log in.
1536        try:
1537            # self.ftp.set_debuglevel( 2 )
1538            # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password =  {user_settings.PASSWORD_NAME}\n")
1539            self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1540            self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1541        # Catch all exceptions with the parent class Exception:  all built-in,
1542        # non-system-exiting exceptions are derived from this class.
1543        except Exception as detail:
1544            # Extract the string message from the exception class with str().
1545            logging.error(f"Remote web site cannot login to ftp server: {str(detail):s}  Aborting...")
1546            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1547        else:
1548            logging.debug("Remote web site ftp login succeeded.")
1549
1550        logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1551
1552        # Initialize the superclass.
1553        WebSite.__init__(self, user_settings)
1554
1555    def go_to_root_dir(self, root_dir):
1556        """Go to the root directory"""
1557
1558        try:
1559            # Go to the root directory.
1560            self.ftp.cwd(root_dir)
1561            logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1562
1563            # Read it back.
1564            self.root_dir = self.ftp.pwd()
1565            logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1566
1567        except Exception as detail:
1568            logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1569            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1570
1571    def get_root_dir(self):
1572        """Get the root directory name"""
1573
1574        return self.root_dir
1575
1576    def finish(self):
1577        """Quit remote web site"""
1578        logging.debug(f"Finished with WebSite object of class {type(self)}")
1579        try:
1580            self.ftp.quit()
1581        except Exception as detail:
1582            logging.error(f"Cannot ftp quit: {str(detail):s}")
1583
1584    def one_level_down(self, d):
1585        """List files and directories in a subdirectory using ftp"""
1586
1587        directories = []
1588        files = []
1589
1590        try:
1591            # ftp listing from current dir.
1592            logging.debug(f"RemoteWebSite.one_level_down():  \tftp cwd: {d:s}")
1593            self.ftp.cwd(d)
1594            dir_list = []
1595
1596            # Use the nonstandard -a option in LIST to show all the hidden .* files.
1597            # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1598            # Note the second argument requires a callback function.
1599            self.ftp.retrlines('LIST -a', dir_list.append)
1600
1601        except Exception as detail:
1602            logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}:  {str(detail):s} Aborting...")
1603            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1604
1605        for line in dir_list:
1606            logging.debug(f"RemoteWebSite.one_level_down():  \tftp LIST: {line:s}")
1607
1608            # Line should at least have the minimum FTP information.
1609            if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1610                # Parse the FTP LIST and put the pieces into file_info.
1611                file_info = self.parse_ftp_list(line)
1612                logging.debug(f"RemoteWebSite.one_level_down():  \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1613
1614                # Skip over the UNIX hidden files for current and parent directories . and ..  Also skip over any NULL file names.
1615                if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1616                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1617                    pass
1618                # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1619                elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1620                    dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1621                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp dir (full path): {dirname:s}")
1622                    directories.append(dirname)
1623                # For a file:  Add the full path prefix from the root to the file name.
1624                else:
1625                    file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1626                    logging.debug(f"RemoteWebSite.one_level_down():  \tftp file (full path):\
1627                        {file_info[self.user_settings.FILE_NAME]:s}")
1628                    files.append(file_info)
1629            else:
1630                logging.error(f"RemoteWebSite.one_level_down():  \tFTP LIST line is too short:  {line:s}")
1631
1632        directories.sort()
1633        files.sort()
1634
1635        return directories, files
1636
1637    def modtime(self, f):
1638        """Get the modification time of a file via ftp.  Return 0 if ftp cannot get it."""
1639        modtime = 0
1640
1641        try:
1642            response = self.ftp.sendcmd('MDTM ' + f)
1643            # MDTM returns the last modified time of the file in the format
1644            # "213 YYYYMMDDhhmmss \r\n <error-response>
1645            # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1646            # error-response is 550 for info not available, and 500 or 501 if command cannot
1647            # be parsed.
1648            if response[:3] == '213':
1649                modtime = response[4:]
1650        except ftplib.error_perm as detail:
1651            logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1652            modtime = 0
1653
1654        return modtime
1655
1656    def parse_ftp_list(self, line):
1657        """Parse the ftp file listing and return file name, datetime and file size.
1658
1659           An FTP LIST command will give output which looks like this for a file:
1660
1661               -rw-r--r--    1 1000       free             4084 Jul 18 16:55 sparkCoil.png
1662
1663           and for a directory:
1664
1665                drwxr-xr-x    2 1000       free             4096 Jul 18 16:36 ReadingList
1666
1667           FTP uses UTC for its listings; the conversion to local time is done by the OS.
1668           We can have problems on New Year's Eve.  For example, the local file date/time is
1669
1670              Mon Jan  1 06:23:12 2018
1671
1672           But the remote file date/time from FTP listing doesn't show a year even though we
1673           know it was written to the server in 2017.
1674
1675               Mon Dec 31 03:02:00
1676
1677           So we default the remote file year to current year 2018 and get
1678
1679               Mon Dec 31 03:02:00 2018
1680
1681           Now we think that the remote file is newer by 363.860278 days.
1682        """
1683
1684        # Find out if we've a directory or a file.
1685        if line[0] == 'd':
1686            dir_or_file = FileType.DIRECTORY
1687        else:
1688            dir_or_file = FileType.FILE
1689
1690        pattern = self.user_settings.FTP_LISTING
1691
1692        # Sensible defaults.
1693        filesize = 0
1694        filename = ""
1695        # Default the time to midnight.
1696        hour = 0
1697        minute = 0
1698        seconds = 0
1699        # Default the date to Jan 1 of the current year.
1700        month = 1
1701        day = 1
1702        year = WebSite.get_current_year()
1703
1704        # Extract time and date from the ftp listing.
1705        match = pattern.search(line)
1706
1707        if match:
1708            filesize = int(match.group('bytes'))
1709            month = self.user_settings.monthToNumber[match.group('mon')]
1710            day = int(match.group('day'))
1711
1712            # Remote file listing contains the year.  The FTP listing will omit the hour and minute.
1713            if match.group('year'):
1714                year = int(match.group('year'))
1715                logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1716            else:
1717                # Remote file listing omits the year.  Default the year to the current UTC time year.
1718                # That may be incorrect (see comments above).
1719                year = WebSite.get_current_year()
1720                logging.debug(f"ftp is missing the year;  use the current year = {year}")
1721
1722            # If the FTP listing has the hour and minute, it will omit the year.
1723            if match.group('hour') and match.group('min'):
1724                hour = int(match.group('hour'))
1725                minute = int(match.group('min'))
1726                logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1727
1728            filename = match.group('filename')
1729
1730        # Package up the time and date nicely.
1731        # Note if we didn't get any matches, we'll default the remote date and
1732        # time to Jan 1 midnight of the current year.
1733        d = datetime.datetime(year, month, day, hour, minute, seconds)
1734
1735        return [filename, dir_or_file, d, filesize]
1736
1737# ----------------------------------------------------------------------------
1738#  Class for synchronizing local and remote web sites.
1739# ----------------------------------------------------------------------------
1740
1741class UpdateWeb(object):
1742    """Given previously scanned local and remote directories, update the remote website."""
1743
1744    def __init__(
1745            self,
1746            user_settings,
1747            local_directory_list,
1748            local_file_info,
1749            remote_directory_list,
1750            remote_file_info):
1751        """Connect to remote site.  Accept previously scanned local and remote files and directories."""
1752
1753        # Initialize from args.
1754        self.user_settings = user_settings
1755        self.local_directory_list = local_directory_list
1756        self.remote_directory_list = remote_directory_list
1757        self.local_file_info = local_file_info
1758        self.remote_file_info = remote_file_info
1759
1760        # Initialize defaults.
1761        self.local_files_list = []
1762        self.remote_files_list = []
1763        self.local_file_to_size = {}
1764        self.local_file_to_date_time = {}
1765        self.remote_file_to_date_time = {}
1766        self.local_only_dirs = []
1767        self.local_only_files = []
1768        self.remote_only_dirs = []
1769        self.remote_only_files = []
1770        self.common_files = []
1771
1772        # Connect to FTP server and log in.
1773        try:
1774            self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1775            self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1776        except Exception as detail:
1777            logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1778            raise UpdateWebException("Problem accessing remote web site.  See the log file for details.  Aborting... ") from detail
1779        else:
1780            logging.debug("ftp login succeeded.")
1781
1782        logging.debug(f"ftp server welcome message:  {self.ftp.getwelcome():s}")
1783
1784        # Local root directory.
1785        self.local_root_dir = self.user_settings.local_root_dir
1786        logging.debug(f"Local root directory: {self.local_root_dir:s}")
1787
1788        # Root directory of FTP server.
1789        self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1790        logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1791
1792        # Transform KB string to integer bytes.  e.g. "200" => 2048000
1793        self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1794
1795        try:
1796            # Go to the root directory.
1797            self.ftp.cwd(self.ftp_root_dir)
1798
1799            # Read it back.
1800            self.ftp_root_dir = self.ftp.pwd()
1801            logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1802        except Exception as detail:
1803            logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1804
1805    def append_root_dir(self, root_dir, name):
1806        """Append the root directory to a path"""
1807
1808        # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1809        # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1810        if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1811            return root_dir + name
1812        else:
1813            return root_dir + "/" + name
1814
1815    def file_info(self):
1816        """Create lists of file names from the file information.  Also create dictionaries which map file names onto
1817        dates, times, and sizes."""
1818
1819        # Extract file names.
1820        self.local_files_list = [
1821            file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1822        self.remote_files_list = [
1823            file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1824
1825        # Use a dictionary comprehension to create key/value pairs, 
1826        #     (file name, file date/time)
1827        # which map file names onto date/time.
1828        self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1829        self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1830
1831        # Dictionary comprehension creates a mapping of local file names onto file sizes.
1832        self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1833
1834    def update(self):
1835        """Scan through the local website, cleaning it up.
1836        Go to remote website on my servers and synchronize all files."""
1837
1838        self.file_info()
1839
1840        # Which files and directories are different.
1841        self.changes()
1842
1843        # Synchronize with the local web site.
1844        self.synchronize()
1845
1846    def changes(self):
1847        """Find the set of different directories and files on local and remote."""
1848
1849        # Add all directories which are only on local to the dictionary.
1850        dir_to_type = {
1851            d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1852
1853        # Scan through all remote directories, adding those only on remote or
1854        # on both.
1855        for d in self.remote_directory_list:
1856            if d in dir_to_type:
1857                dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1858            else:
1859                dir_to_type[d] = FileType.ON_REMOTE_ONLY
1860
1861        # Add all files which are only on local to the dictionary.
1862        file_to_type = {
1863            f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1864
1865        # Scan through all remote files, adding those only on remote or on
1866        # both.
1867        for f in self.remote_files_list:
1868            if f in file_to_type:
1869                file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1870            else:
1871                file_to_type[f] = FileType.ON_REMOTE_ONLY
1872
1873        logging.debug("Raw dictionary dump of directories")
1874        for k, v in dir_to_type.items():
1875            logging.debug(f"\t dir:  {str(k):s}  type: {str(v):s}")
1876
1877        logging.debug("Raw dictionary dump of files")
1878        for k, v in file_to_type.items():
1879            logging.debug(f"\t file: {str(k):s}  type: {str(v):s}")
1880
1881        # List of directories only on local.  Keep the ordering.
1882        self.local_only_dirs = [
1883            d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1884
1885        # List of directories only on remote.  Keep the ordering.
1886        self.remote_only_dirs = [
1887            d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1888
1889        # We don't care about common directories, only their changed files, if
1890        # any.
1891
1892        # List of files only on local.  Keep the ordering.
1893        self.local_only_files = [
1894            f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1895
1896        # List of files only on remote.  Keep the ordering.
1897        self.remote_only_files = [
1898            f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1899
1900        # List of common files on both local and remote.  Keep the ordering.
1901        self.common_files = [
1902            f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1903
1904        logging.debug("*** Directories only on local ******************************")
1905        for d in self.local_only_dirs:
1906            logging.debug(f"\t {d:s}")
1907
1908        logging.debug("*** Directories only on remote ******************************")
1909        for d in self.remote_only_dirs:
1910            logging.debug(f"\t {d:s}")
1911
1912        logging.debug("*** Files only on local ******************************")
1913        for f in self.local_only_files:
1914            logging.debug(f"\t {f:s}")
1915
1916        logging.debug("*** Files only on remote ******************************")
1917        for f in self.remote_only_files:
1918            logging.debug(f"\t {f:s}")
1919
1920        logging.debug("*** Common files ******************************")
1921        for f in self.common_files:
1922            logging.debug(f"name {f:s}")
1923            logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1924            logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1925
1926    def synchronize(self):
1927        """Synchronize files and subdirectories in the remote directory with the local directory."""
1928
1929        # If we have the same files in local and remote, compare their times
1930        # and dates.
1931        for f in self.common_files:
1932            local_file_time = self.local_file_to_date_time[f]
1933            remote_file_time = self.remote_file_to_date_time[f]
1934
1935            # What's the time difference?
1936            time_delta = remote_file_time - local_file_time
1937            # How much difference, either earlier or later?
1938            seconds_different = abs(time_delta.total_seconds())
1939            minutes_different = seconds_different / 60.0
1940            hours_different = minutes_different / 60.0
1941            days_different = hours_different / 24.0
1942
1943            # Assume no upload initially.
1944            upload_to_host = False
1945
1946            logging.debug(f"Common file:  {f:s}.")
1947
1948            # Remote file time is newer.
1949            # Allow 200 characters
1950            # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1951
1952            if remote_file_time > local_file_time:
1953                # Remote file time is MUCH newer:  suspect time is out of joint on the server, so upload local local file to be safe.
1954                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1955                    logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1956                    logging.error(f"\tlocal time {local_file_time.ctime():s}")
1957                    logging.error(f"\tremote time {remote_file_time.ctime():s}")
1958
1959                    # Set the local file to the current time.
1960                    full_file_name = self.append_root_dir(
1961                        self.local_root_dir, f)
1962                    if os.path.exists(full_file_name):
1963                        # Change the access and modify times of the file to the current time.
1964                        os.utime(full_file_name, None)
1965                        logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1966
1967                    upload_to_host = True
1968                # Remote file time is newer, but not by much.  Let's just assume a slight time mismatch on the server.  Don't upload.
1969                else:
1970                    logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1971                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1972                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1973                    upload_to_host = False
1974
1975            # Local file time is newer.
1976            elif local_file_time > remote_file_time:
1977                # Local file time slightly newer than the remote file.  So we are pretty sure the local file really got changed vs the server file.
1978                if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1979                    logging.warning(f"Local file {f:20s} is SLIGHTLY newer by  {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes].  Uploading to remote server.")
1980                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1981                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1982                    upload_to_host = True
1983                else:
1984                    logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds.  Probably just inaccurate time/date on the server.  Wait -- don't upload the file yet.")
1985                    logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1986                    logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1987                    upload_to_host = False
1988
1989            # Cancel the upload if the file is too big for the server.
1990            size = self.local_file_to_size[f]
1991            if size >= self.file_size_limit:
1992                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1993                upload_to_host = False
1994
1995            # Finally do the file upload.
1996            if upload_to_host:
1997                logging.debug(f"Uploading changed file {f:s}")
1998                # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
1999                print(f"Uploading changed file {f:s}...  ", end='', flush=True)
2000                self.upload(f)
2001
2002        # Remote directory is not in local.  Delete it.
2003        for d in self.remote_only_dirs:
2004            logging.debug(f"Deleting remote only directory {d:s}")
2005            print(f"Deleting remote only directory {d:s}...  ", end='', flush=True)
2006            self.rmdir(d)
2007
2008        # Local directory missing on remote.  Create it.
2009        # Due to breadth first order scan, we'll create parent directories
2010        # before child directories.
2011        for d in self.local_only_dirs:
2012            logging.debug(f"Only on local.  Creating new remote dir {d:s}.")
2013            print(f"Creating new remote directory {d:s}...  ", end='', flush=True)
2014            self.mkdir(d)
2015
2016        # Local file missing on remote.  Upload it.
2017        for f in self.local_only_files:
2018            logging.debug(f"Local only file.  Uploading {f:s} to remote.")
2019
2020            #  But cancel the upload if the file is too big for the server.
2021            size = self.local_file_to_size[f]
2022            if size >= self.file_size_limit:
2023                logging.error(f"upload():  Skipping upload of file {f:s} of size {size:d};"
2024                              f" too large for server, limit is {self.file_size_limit:d} bytes")
2025            else:
2026                logging.debug(f"Uploading new file {f:s}")
2027                print(f"Uploading new file {f:s}...  ", end='', flush=True)
2028                self.upload(f)
2029
2030        # Remote contains a file not present on the local.  Delete the file.
2031        for f in self.remote_only_files:
2032            logging.debug(f"Remote only file.  Deleting remote file {f:s}.")
2033            print(f"Deleting remote file {f:s}...  ", end='', flush=True)
2034            self.del_remote(f)
2035
2036    def del_remote(self, relative_file_path):
2037        """Delete a file using ftp."""
2038
2039        logging.debug(f"del_remote():  \trelative file path name: {relative_file_path:s}")
2040
2041        # Parse the relative file path into file name and relative directory.
2042        relative_dir, file_name = os.path.split(relative_file_path)
2043        logging.debug(f"del_remote():  \tfile name: {file_name:s}")
2044        logging.debug(f"del_remote():  \trelative dir: {relative_dir:s}")
2045        logging.debug(f"del_remote():  \tremote root dir: {self.ftp_root_dir:s}")
2046
2047        try:
2048            # Add the remote root path and go to the remote directory.
2049            remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2050            logging.debug(f"del_remote():  \tftp cd remote dir: {remote_dir:s}")
2051            self.ftp.cwd(remote_dir)
2052        except Exception as detail:
2053            logging.error(f"del_remote():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2054        else:
2055            try:
2056                logging.debug(f"del_remote():  \tftp rm: {file_name:s}")
2057
2058                # Don't remove zero length file names.
2059                if len(file_name) > 0:
2060                    self.ftp.delete(file_name)
2061                else:
2062                    logging.warning(
2063                        "fdel_remote():  skipping ftp delete;  file NAME {file_name:s} had zero length")
2064            except Exception as detail:
2065                logging.error(f"del_remote():  \tCannot ftp rm: {str(detail):s}")
2066
2067    def mkdir(self, relative_dir):
2068        """Create new remote directory using ftp."""
2069
2070        logging.debug(f"mkdir():  \trelative dir path name: {relative_dir:s}")
2071        logging.debug(f"mkdir():  \tremote root dir: {self.ftp_root_dir:s}")
2072
2073        # Parse the relative dir path into prefix dir and suffix dir.
2074        path, d = os.path.split(relative_dir)
2075        logging.debug(f"mkdir():  \tremote prefix dir: {path:s}")
2076        logging.debug(f"mkdir():  \tremote dir:  {d:s}")
2077
2078        try:
2079            # Add the remote root path and go to the remote directory.
2080            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2081            logging.debug(f"mkdir():  \tftp cd remote dir: {remote_dir:s}")
2082            self.ftp.cwd(remote_dir)
2083        except Exception as detail:
2084            logging.error(f"mkdir():  \tCannot ftp chrdir: {str(detail):s}  Skipping...")
2085        else:
2086            try:
2087                logging.debug(f"mkdir():  \tftp mkd: {d:s}")
2088                self.ftp.mkd(d)
2089            except Exception as detail:
2090                logging.error(f"mkdir():  \tCannot ftp mkdir: {str(detail):s}")
2091
2092    def rmdir(self, relative_dir):
2093        """Delete an empty directory using ftp."""
2094
2095        logging.debug(f"rmdir():  \tintermediate dir path name: {relative_dir:s}")
2096        logging.debug(f"rmdir():  \tremote root dir: {self.ftp_root_dir:s}")
2097
2098        # Parse the relative dir path into prefix dir and suffix dir.
2099        path, d = os.path.split(relative_dir)
2100        logging.debug(f"rmdir():  \tremote prefix dir: {path:s}")
2101        logging.debug(f"rmdir():  \tremote dir:  {d:s}")
2102
2103        try:
2104            # Add the remote root path and go to the remote directory.
2105            remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2106            logging.debug(f"rmdir():  \tftp cd remote dir: {remote_dir:s}")
2107            self.ftp.cwd(remote_dir)
2108        except Exception as detail:
2109            logging.error(f"rmdir():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2110        else:
2111            try:
2112                logging.debug(f"rmdir():  \tftp rmd: {d:s}")
2113                self.ftp.rmd(d)
2114            except Exception as detail:
2115                logging.error(f"rmdir():  \tCannot ftp rmdir dir {d:s}: {str(detail):s}.  Directory is probably not empty.  Do a manual delete.")
2116
2117    def download(self, relative_file_path):
2118        """Download a binary file using ftp."""
2119
2120        logging.debug(f"download():  \tfile name: {relative_file_path:s}")
2121
2122        # Parse the relative file path into file name and relative directory.
2123        relative_dir, file_name = os.path.split(relative_file_path)
2124        logging.debug(f"download():  \tfile name: {file_name:s}")
2125        logging.debug(f"download():  \trelative dir: {relative_dir:s}")
2126        logging.debug(f"download():  \troot dir: {self.ftp_root_dir:s}")
2127
2128        # Add the remote root path and go to the remote directory.
2129        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2130        logging.debug(f"download():  \tftp cd remote dir: {remote_dir:s}")
2131
2132        try:
2133            self.ftp.cwd(remote_dir)
2134        except Exception as detail:
2135            logging.error(f"download():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2136        else:
2137            # Add the local root path to get the local file name.
2138            # Open local binary file to write into.
2139            local_file_name = self.append_root_dir(
2140                self.local_root_dir, relative_file_path)
2141            logging.debug(f"download():  \topen local file name: {local_file_name:s}")
2142            try:
2143                f = open(local_file_name, "wb")
2144                try:
2145                    # Calls f.write() on each block of the binary file.
2146                    # ftp.retrbinary( "RETR " + file_name, f.write )
2147                    pass
2148                except Exception as detail:
2149                    logging.error(f"download():  \tCannot cannot ftp retrbinary: {str(detail):s}")
2150                f.close()
2151            except IOError as detail:
2152                logging.error(f"download():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2153
2154    def upload(self, relative_file_path):
2155        """Upload  a binary file using ftp."""
2156
2157        logging.debug(f"upload():  \trelative file path name: {relative_file_path:s}")
2158
2159        # Parse the relative file path into file name and relative directory.
2160        relative_dir, file_name = os.path.split(relative_file_path)
2161        logging.debug(f"upload():  \tfile name: {file_name:s}")
2162        logging.debug(f"upload():  \trelative dir: {relative_dir:s}")
2163        logging.debug(f"upload():  \tremote root dir: {self.ftp_root_dir:s}")
2164
2165        # Add the remote root path and go to the remote directory.
2166        remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2167        logging.debug(f"upload():  \tftp cd remote dir: {remote_dir:s}")
2168
2169        try:
2170            self.ftp.cwd(remote_dir)
2171        except Exception as detail:
2172            logging.error(f"upload():  \tCannot ftp chdir: {str(detail):s}  Skipping...")
2173        else:
2174            # Add the local root path to get the local file name.
2175            # Open local binary file to read from.
2176            local_file_name = self.append_root_dir(
2177                self.local_root_dir, relative_file_path)
2178            logging.debug(f"upload():  \topen local file name: {local_file_name:s}")
2179
2180            try:
2181                f = open(local_file_name, "rb")
2182                try:
2183                    # f.read() is called on each block of the binary file until
2184                    # EOF.
2185                    logging.debug(f"upload():  \tftp STOR file {file_name:s}")
2186                    self.ftp.storbinary("STOR " + file_name, f)
2187                except Exception as detail:
2188                    logging.error(f"upload():  \tCannot ftp storbinary: {str(detail):s}")
2189                f.close()
2190            except IOError as detail:
2191                logging.error(f"upload():  \tCannot open local file {local_file_name:s} for reading:  {str(detail):s}")
2192
2193    def finish(self):
2194        """Log out of an ftp session"""
2195        logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2196        try:
2197            self.ftp.quit()
2198        except Exception as detail:
2199            logging.error(f"Cannot ftp quit because {str(detail):s}")
2200
2201# ----------------------------------------------------------------------------
2202#  Main function
2203# ----------------------------------------------------------------------------
2204
2205def main(raw_args=None):
2206    """Main program.  Clean up and update my website."""
2207
2208    # Print the obligatory legal notice.
2209    print("""
2210    updateweb Version 7.3 - A Python utility program which maintains my web site.
2211    Copyright (C) 2007-2025 by Sean Erik O'Connor.  All Rights Reserved.
2212
2213    It deletes temporary files, rewrites old copyright lines and email address
2214    lines in source files, then synchronizes all changes to my web sites.
2215
2216    updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2217    GNU General Public License.  This is free software, and you are welcome
2218    to redistribute it under certain conditions; see the GNU General Public
2219    License for details.
2220    """)
2221
2222    # Put ALL the main code into a try block!
2223    try:
2224        # ---------------------------------------------------------------------
2225        #  Load default settings and start logging.
2226        # ---------------------------------------------------------------------
2227
2228        # Default user settings.
2229        user_settings = UserSettings()
2230
2231        print( f"Running main( {raw_args} ) Python version\
2232               {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2233               local web directory\
2234               {user_settings.local_root_dir}\n")
2235        # Get command line options such as --verbose.  Pass them back as flags in
2236        # user_settings.
2237        CommandLineSettings(user_settings, raw_args)
2238
2239        # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2240        if user_settings.UNITTEST:
2241            suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2242            unittest.TextTestRunner(verbosity=2).run(suite)
2243            # We are done!
2244            print("  ...done!", flush=True)
2245            return
2246
2247        # Start logging to file.  Verbose turns on logging for
2248        # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2249        # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2250        if user_settings.VERBOSE:
2251            loglevel = logging.DEBUG
2252        else:
2253            loglevel = logging.WARNING
2254
2255        # Pick the log file name on the host.
2256        if user_settings.CLEAN:
2257            user_settings.LOGFILENAME = "/private/logLocal.txt"
2258        else:
2259            user_settings.LOGFILENAME = "/private/logRemote.txt"
2260
2261        # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2262        if not user_settings.MATHJAX:
2263            user_settings.DIR_TO_SKIP += "|mathjax"
2264        else:
2265            print(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files...  ", end='', flush=True)
2266            print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...  ", end='', flush=True)
2267            logging.debug(f"Processing and uploading mathjax files.  git restore any changed files and git clean -f to remove extra files.")
2268            logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually.  If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box.  This avoids ERROR 421 Too many connections...")
2269
2270        logging.basicConfig(
2271            level=loglevel,
2272            format='%(asctime)s %(levelname)-8s %(message)s',
2273            datefmt='%a, %d %b %Y %H:%M:%S',
2274            filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2275            filemode='w')
2276
2277        logging.debug("********** Begin logging") 
2278
2279        # ---------------------------------------------------------------------
2280        #  Scan the local website, finding out all files and directories.
2281        # ---------------------------------------------------------------------
2282
2283        # Suppress newline to keep the message to the console more compact.  Flush output buffer, so we can see the message right away.
2284        print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}...  ", end='', flush=True)
2285        logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2286
2287        local = LocalWebSite(user_settings)
2288        local.scan()
2289
2290        # ---------------------------------------------------------------------
2291        # Clean up local website.
2292        # ---------------------------------------------------------------------
2293
2294        # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2295        print("Cleaning local web site...  ", end='', flush=True)
2296        logging.debug("========================== Cleaning the local web site")
2297        local.clean()
2298
2299        # We are done with the first scan of the local web site and will dispose of it.
2300        local.finish()
2301        del local
2302
2303        # ---------------------------------------------------------------------
2304        #  Rescan the local website since there will be changes to source
2305        #  files from the clean up stage.
2306        # ---------------------------------------------------------------------
2307
2308        print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2309        logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2310
2311        local = LocalWebSite(user_settings)
2312
2313        local.scan()
2314
2315        # ---------------------------------------------------------------------
2316        #  List all the local directories and files and their sizes.
2317        # ---------------------------------------------------------------------
2318
2319        # Local website directories.
2320        local_directory_list = local.directories
2321        logging.debug("********** List of all the Local Directories")
2322        for d in local_directory_list:
2323            logging.debug(f"\t {d:s}")
2324
2325        # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2326        local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2327        total_number_of_files = len( local_files_name_size_pairs )
2328        logging.debug(f"********** List of all the Local Files from largest to smallest.  There are {total_number_of_files:15d} files.")
2329        local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2330
2331        # Local website filenames only, and their dates and times.
2332        local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2333        logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2334        for file_datetime_pair in local_file_datetime_pairs:
2335            logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2336
2337        # Total number of bytes in the local files.
2338        total_number_of_bytes = 0
2339        for file_size_pair in local_files_name_size_pairs:
2340            logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2341            total_number_of_bytes += file_size_pair[1]
2342        logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2343
2344        local.finish()
2345
2346        if user_settings.CLEAN:
2347            logging.debug("========================== Done with local file and directory cleanup...")
2348            del local
2349            print("...done!", flush=True)
2350            return
2351
2352        # ---------------------------------------------------------------------
2353        #  Scan the remote hosted web site.
2354        # ---------------------------------------------------------------------
2355
2356        print("Scanning remote web site...", end='', flush=True)
2357        logging.debug("========================== Scanning the remote web site...")
2358
2359        # Pick which website to update.
2360        logging.debug("Connecting to primary remote site.")
2361        remote = RemoteWebSite(user_settings)
2362        remote.scan()
2363        remote.finish()
2364
2365        # ---------------------------------------------------------------------
2366        #  List all the remote server directories and files and their sizes.
2367        # ---------------------------------------------------------------------
2368
2369        remote_directory_list = remote.directories
2370        logging.debug("********** Remote Directories")
2371        for d in remote_directory_list:
2372            logging.debug(f"\t {d:s}")
2373
2374        # Local website filenames only, and their sizes in bytes.
2375        remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2376        total_number_of_files = len( remote_files_name_size_list )
2377        logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2378        remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2379        total_number_of_bytes = 0
2380        for file_size in remote_files_name_size_list:
2381            logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2382            total_number_of_bytes += file_size[1]
2383        logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2384
2385        # ---------------------------------------------------------------------
2386        # Synchronize the local and remote web sites.
2387        # ---------------------------------------------------------------------
2388
2389        print("Synchronizing remote and local web sites...", end='', flush=True)
2390        logging.debug("========================= Synchronizing remote and local web sites...")
2391
2392        # Primary website.
2393        logging.debug("Connecting to primary remote site for synchronization.")
2394        sync = UpdateWeb(user_settings,
2395                         local.directories,
2396                         local.files,
2397                         remote.directories,
2398                         remote.files)
2399
2400        sync.update()
2401        sync.finish()
2402
2403        del sync
2404        del remote
2405        del local
2406        print("...done!", flush=True)
2407
2408    except UpdateWebException as detail:
2409        logging.error(f"Couldn't update the web directory:  {str(detail):s}.  Aborting...")
2410
2411    except RecursionError as detail:
2412        logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}.  Aborting...")
2413
2414if __name__ == '__main__':
2415    """Python executes all code in this file.  Finally, we come here.  
2416
2417    * If we are executing this file as a standalone Python script, 
2418      the name of the current module is set to __main__ and thus we'll call the main() function.
2419
2420    * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2421
2422        import updateweb
2423        updateweb.main(["--test"])"""
2424
2425    main()