1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.2 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import subprocess
81import shutil
82from pathlib import Path
83
84# Regular expressions
85import re
86
87# FTP stuff
88import ftplib
89
90# Date and time
91import time
92import stat
93import datetime
94
95# Logging
96import logging
97
98# Unit testing
99import unittest
100
101# Enumerated types (v3.4)
102from enum import Enum
103from typing import List, Any
104
105# YAML configuration files (a superset of JSON!)
106import yaml
107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
108try:
109 from yaml import CLoader as Loader
110except ImportError:
111 from yaml import Loader
112
113# Python syntax highlighter. See https://pygments.org
114from pygments import highlight
115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
117from pygments.formatters import HtmlFormatter
118
119
120# ----------------------------------------------------------------------------
121# Custom Top Level Exceptions.
122# ----------------------------------------------------------------------------
123
124class UpdateWebException(Exception):
125 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
126 Derive from Exception as recommended by Python manual"""
127 pass
128
129# ----------------------------------------------------------------------------
130# User settings.
131# ----------------------------------------------------------------------------
132
133class TreeWalkSettings(Enum):
134 """Enum types for how to walk the directory tree."""
135 BREADTH_FIRST_SEARCH = 1
136 DEPTH_FIRST_SEARCH = 2
137
138class FileType(Enum):
139 """'Enum' types for properties of directories and files."""
140 DIRECTORY = 0
141 FILE = 1
142 ON_LOCAL_ONLY = 2
143 ON_REMOTE_ONLY = 3
144 ON_BOTH_LOCAL_AND_REMOTE = 4
145
146class UserSettings:
147 """Megatons of user selectable settings."""
148 # Logging control.
149 LOGFILENAME = ""
150 VERBOSE = False # Verbose mode. Prints out everything.
151 CLEAN = False # Clean the local website only.
152 UNITTEST = False # Run a unit test of a function.
153 MATHJAX = False # Process and upload MathJax files to server.
154
155 # When diving into the MathJax directory, web walking the deep directories
156 # may exceed Python's default recursion limit of 1000.
157 RECURSION_DEPTH = 5000
158 sys.setrecursionlimit(RECURSION_DEPTH)
159
160 # Fields in the file information (file_info) structure.
161 # For example, file_info =
162 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
163 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
164 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
165 # 4675] -- File size in bytes.
166 FILE_NAME = 0
167 FILE_TYPE = 1
168 FILE_DATE_TIME = 2
169 FILE_SIZE = 3
170
171 # Server settings.
172 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
173 SERVER_NAME = None
174 USER_NAME = None
175 PASSWORD_NAME = None
176 FTP_ROOT_NAME = None
177 FILE_SIZE_LIMIT_NAME = None
178
179 # Map month names onto numbers.
180 monthToNumber = {
181 'Jan': 1,
182 'Feb': 2,
183 'Mar': 3,
184 'Apr': 4,
185 'May': 5,
186 'Jun': 6,
187 'Jul': 7,
188 'Aug': 8,
189 'Sep': 9,
190 'Oct': 10,
191 'Nov': 11,
192 'Dec': 12}
193
194 # List of directories to skip over when processing or uploading the web page.
195 # Some are private but most are dir of temporary files.
196 # They will be listed as WARNING in the log.
197 # Examples:
198 # My private admin settings directory.
199 # Git or SVN local admin directories.
200 # Compile build directories fromXCode.
201 # PyCharm build directories.
202 # Python cache directories.
203 # Jupyter checkpoint directories.
204 # XCode temporary file crap.
205 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
206
207 # List of files to skip when processing or uploading to the web page.
208 # They will be listed as WARNING in the log.
209 # Examples:
210 # MathJax yml file.
211 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
212 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
213
214 # Suffixes for temporary files which will be deleted during the cleanup
215 # phase.
216 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
217 \. # Match the dot in the file name.
218 # Now begin matching the file name suffix.
219 # (?: non-capturing match for the regex inside the parentheses,
220 # i.e. matching string cannot be retrieved later.
221 # Now match any of the following file extensions:
222 (?: o | obj | lib | # Object files generated by C, C++, etc compilers
223 pyc | # Object file generated by the Python compiler
224 ilk | pdb | sup | # Temp files from VC++ compiler
225 idb | ncb | opt | plg | # Temp files from VC++ compiler
226 sbr | bsc | map | bce | # Temp files from VC++ compiler
227 res | aps | dep | db | # Temp files from VC++ compiler
228 jbf | # Paintshop Pro
229 class | jar | # Java compiler
230 fas | # CLISP compiler
231 swp | swo | # Vim editor
232 toc | aux | # TeX auxilliary files (not .synctex.gz or .log)
233 DS_Store | _\.DS_Store | # macOS finder folder settings.
234 _\.Trashes | # macOS recycle bin
235 gdb_history) # GDB history
236 $ # Now we should see only the end of line.
237 """
238
239 # Special case: Vim temporary files contain a twiddle anywhere in the
240 # name.
241 VIM_TEMP_FILE_EXT = "~"
242
243 # Suffixes for temporary directories which should be deleted during the
244 # cleanup phase.
245 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
246 (?: Debug | Release | # C++ compiler
247 ipch | \.vs | # Temp directories from VC++ compiler
248 \.Trashes | \.Trash) # macOS recycle bin
249 $
250 """
251
252 # File extension for an internally created temporary file.
253 TEMP_FILE_EXT = ".new"
254
255 # Identify source file types.
256 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
257 (\. # Match the filename suffix after the .
258 (?: html | htm | # HTML hypertext
259 css) # CSS style sheet
260 $) # End of line.
261 """
262
263 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
264 (?: makefile$ | # Any file called makefile is a source file.
265 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
266 .vimrc$ | # Vim script
267 (.bashrc$ | # Bash configuration files.
268 .bash_profile$ |
269 .bash_logout$)
270 |
271 (.gitignore$ | # Git configuration files.
272 .gitignore_global$ |
273 .gitconfig$)
274 |
275 (\. # Match the filename suffix after the .
276 # Now match any of these suffixes:
277 (?:
278 c | cpp | h | hpp | # C++ and C
279 js | # Javascript
280 py | # Python
281 lsp | # LISP
282 ipynb | # Jupyter notebook
283 m | # MATLAB
284 FOR | for | f | # FORTRAN
285 yaml | # YAML = JSON superset
286 tex | # LaTeX
287 txt | dat | # Data files
288 sh) # Bash
289 $) # End of line.
290 )
291 """
292
293 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
294 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
295 (?: ^life\.html$ | # We want a listing of this particular HTML file.
296 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
297 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
298 ^StyleSheet\.css$ ) # I want to list my style sheet.
299 """
300
301 # Files for which we want to generate a syntax highlighted source code listing.
302 # Uses an f-string combined with a raw-string.
303 FILE_TO_HIGHLIGHT_PATTERN = fr"""
304 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
305 {SOURCE_FILE_PATTERN} )
306 """
307
308 # Update my email address.
309 # This is tricky: Prevent matching and updating the name within in this
310 # Python source file by using the character class brackets.
311 OLD_EMAIL_ADDRESS = r"""
312 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
313 """
314 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
315
316 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
317 # Read patterns and strings from the updateweb.yaml file.
318 STRING_REPLACEMENT_LIST = []
319 # Pairs of test strings and their correct match/replacements.
320 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
321
322 # Change all old software version lines of the form
323 # Primpoly Version nnnn.nnnn
324 # to the new software version.
325 # Note that since we are using raw strings leading and trailing whitespace
326 # is ignored in both pattern and replacement.
327 CURRENT_SOFTWARE_VERSION = r"""
328 Primpoly
329 \s+
330 Version
331 \s+
332 ([0-9]+) # The two part version number NNN.nnn
333 \.
334 ([0-9]+)
335 """
336 NEW_SOFTWARE_VERSION = r"""
337 Primpoly Version 16.3
338 """
339
340 # Match a copyright line. Then extract the copyright symbol which can be
341 # ascii (C) or HTML © and extract the old year.
342 TWO_DIGIT_YEAR_FORMAT = "%02d"
343 COPYRIGHT_LINE = r"""
344 Copyright # Copyright.
345 \s+ # One or more spaces.
346 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
347 \D+ # Any non-digits.
348 (?P<old_year>[0-9]+) # Match and extract the old copyright year,
349 # then place it into variable 'old_year'
350 - # to
351 ([0-9]+) # New copyright year.
352 """
353
354 # Match a line containing the words,
355 # last updated YY
356 # and extract the two digit year YY.
357 LAST_UPDATED_LINE = r"""
358 last\s+ # Match the words "last updated"
359 updated\s+
360 \d+ # Day number
361 \s+ # One or more blanks or tab(
362 [A-Za-z]+ # Month
363 \s+ # One or more blanks or tabs
364 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
365 """
366
367 # Web server root directory.
368 DEFAULT_ROOT_DIR = "/"
369
370 # The ftp listing occasionally shows a date newer than the actual date.
371 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
372 # But if the remote file time is much newer, it might be an old file with a bad date/time.
373 # Upload the file to be safe.
374 # How to see the time differences from the log if they are large:
375 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
376 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
377 # How to see the time differences from the log if they are small and we wait and NOT upload:
378 # egrep -o "Remote file.*is newer.*days" logRemote.txt
379 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
380 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
381 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
382
383 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
384 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
385
386 # An ftp list command line should be at least this many chars, or we'll
387 # suspect and error.
388 MIN_FTP_LINE_LENGTH = 7
389
390 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
391 # ftp listings are generally similar to UNIX ls -l listings.
392 #
393 # Some examples:
394 #
395 # (1) Freeservers ftp listing,
396 #
397 # 0 1 2 3 4 5 6 7 8
398 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
399 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
400 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
401 #
402 # (2) atspace ftp listing,
403 #
404 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
405 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
406 #
407 FTP_LISTING = r"""
408 [drwx-]+ # Unix type file mode.
409 \s+ # One or more blanks or tabs.
410 \d+ # Number of links.
411 \s+
412 \w+ # Owner.
413 \s+
414 \w+ # Group.
415 \s+
416 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
417 \s+
418 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
419 \s+
420 (?P<day> \d+) # Day modified, placed into the variable 'day'.
421 \s+
422 (
423 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
424 :
425 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
426 |
427 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
428 # extract the year instead.
429 )
430 \s+
431 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
432 # and funny characters. We must escape some of
433 # these characters with a backslash, \.
434 """
435
436 # HTML header up to the style sheet.
437 BASIC_HTML_BEGIN = \
438 """
439 <!DOCTYPE html>
440 <html lang="en-US"> <!-- Set language of this page to USA English. -->
441
442 <head>
443 <!-- This page uses Unicode characters. -->
444 <meta charset="utf-8">
445
446 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
447 <meta name="viewport" content="width=device-width, initial-scale=1">
448
449 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
450 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
451
452 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
453 <meta name="description" content="Syntax Colored Source Code Listing">
454
455 <!-- Some content management software uses the author's name. -->
456 <meta name="author" content="Sean Erik O'Connor">
457
458 <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor. All Rights Reserved.">
459
460 <!-- Begin style sheet insertion -->
461 <style>
462 /* Default settings for all my main web pages. */
463 body
464 {
465 /* A wide sans-serif font is more readable on the web. */
466 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
467
468 /* Set the body font size a little smaller than the user's default browser setting. */
469 font-size: 0.8em ;
470
471 /* Black text is easier to read. */
472 color: black ;
473
474 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
475 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
476 line-height: 1.7 ;
477 }
478
479 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
480 """
481
482 # After the style sheet and up to the start of the article in the body.
483 BASIC_HTML_MIDDLE = \
484 """
485 </style>
486 </head>
487
488 <body>
489 <article class="content">
490 """
491
492 # After the source code listing, finish the article, body and html document.
493 BASIC_HTML_END = \
494 """
495 </article>
496 </body>
497
498 </html>
499 """
500
501 def __init__(self):
502 """Set up the user settings."""
503
504 self.local_root_dir = ""
505
506 # Import the user settings from the parameter file.
507 self.get_local_root_dir()
508 self.get_server_settings()
509
510 self.precompile_regular_expressions()
511
512 def get_server_settings(self):
513 """
514 Read web account private settings from a secret offline parameter file.
515 These also hold patterns to match and replace in all of our source pages.
516 """
517
518 # Private file which contains my account settings.
519 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
520 # Recommended by
521 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
522 try:
523 stream = open(settings_file_name, "r")
524 except OSError as detail:
525 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
526 # Rethrow the exception higher.
527 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
528 # Read all the YAML documents in the file.
529 yaml_contents = yaml.load_all(stream, Loader)
530 yaml_document_list: list[Any] = []
531 for yaml_doc in yaml_contents:
532 yaml_document_list.append(yaml_doc)
533 num_yaml_docs = len(yaml_document_list)
534 if num_yaml_docs != 2:
535 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
536 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
537
538 # Load all the server settings.
539 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
540 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
541 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
542 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
543 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
544
545 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
546 self.STRING_REPLACEMENT_LIST = []
547 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
548 for pat_rep in pat_rep_yaml_list:
549 # Fetch the regular expression and compile it for speed.
550 verbose_regex = pat_rep['pattern']
551 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
552 # Since we use raw strings, we need to strip off leading and trailing whitespace.
553 replacement_string = pat_rep['replacement_string'].strip().lstrip()
554 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
555
556 # Load the test and verify strings.
557 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
558 for test_verify_string in test_verify_strings_list:
559 test_string = test_verify_string['test_string'].strip().lstrip()
560 verify_string = test_verify_string['verify_string'].strip().lstrip()
561 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
562
563 print(" ...done!", flush=True)
564 return
565
566 def get_local_root_dir(self):
567 """Get the local website root directory on this platform."""
568
569 # Each platform has a definite directory for the web page.
570 local_web_dir_path = "/Desktop/Sean/WebSite"
571
572 if sys.platform.startswith('darwin'):
573 self.local_root_dir = str(Path.home()) + local_web_dir_path
574 # My Cyperpower PC running Ubuntu Linux.
575 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
576 self.local_root_dir = str(Path.home()) + local_web_dir_path
577 return
578
579 def precompile_regular_expressions(self):
580 """For speed precompile the regular expression search patterns."""
581 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
582 self.CURRENT_SOFTWARE_VERSION = re.compile(self.CURRENT_SOFTWARE_VERSION, re.VERBOSE | re.IGNORECASE)
583 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
584 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
585 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
586 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
587 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
588 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
589 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
590 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
591
592# ----------------------------------------------------------------------------
593# Unit test individual functions.
594# ----------------------------------------------------------------------------
595
596class UnitTest(unittest.TestCase):
597 """Initialize the UnitTest class."""
598 def setUp(self):
599 self.user_settings = UserSettings()
600 self.user_settings.get_local_root_dir()
601
602 def tearDown(self):
603 """Clean up the UnitTest class."""
604 self.user_settings = None
605
606 def test_copyright_updating(self):
607 """Test copyright line updating to the current year."""
608 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
609 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
610 line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
611 pat = self.user_settings.COPYRIGHT_LINE
612 match = pat.search(line_before_update)
613
614 if match:
615 old_year = int(match.group('old_year'))
616 # Same as call to self.get_current_year():
617 current_year = int(time.gmtime()[0])
618 if old_year < current_year:
619 # We matched and extracted the old copyright symbol into the variable
620 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
621 # We now insert it back by placing the special syntax
622 # \g<symbol> into the replacement string.
623 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
624 str(current_year)
625 line_after_update_computed = pat.sub(new_copyright, line_before_update)
626 self.assertEqual(
627 line_after_update_actual,
628 line_after_update_computed,
629 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
630 else:
631 self.fail()
632 else:
633 self.fail()
634
635 def test_update_software_version(self):
636 """Test updating to a new version of Primpoly."""
637 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
638 old_version_line = "| Primpoly Version 00." + "0 - A Program for Computing Primitive Polynomials.|"
639 new_version_line = "| Primpoly Version 16." + "3 - A Program for Computing Primitive Polynomials.|"
640 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
641 match = pat.search(old_version_line)
642 if match:
643 # Note that since we are using raw strings leading and trailing
644 # whitespace is ignored.
645 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
646 updated_version_line = pat.sub(new_version, old_version_line)
647 self.assertEqual(updated_version_line, new_version_line, f"updated version line = {updated_version_line:s} new line = {new_version_line:s}")
648 else:
649 self.fail()
650
651 def test_extract_filename_from_ftp_listing(self):
652 """Test parsing an FTP listing."""
653 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
654 extracted_file_name = "allclasses-frame.html"
655 pat = self.user_settings.FTP_LISTING
656 match = pat.search(ftp_line)
657 if match:
658 filename = match.group('filename')
659 self.assertEqual(
660 filename,
661 extracted_file_name,
662 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
663 else:
664 self.fail()
665
666 def test_get_file_time_and_date(self):
667 """Test getting a file time and date."""
668 # Point to an old file.
669 file_name = "./Images/home.png"
670 full_file_name = self.user_settings.local_root_dir + '/' + file_name
671 # Get the UTC time.
672 file_epoch_time = os.path.getmtime(full_file_name)
673 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
674 # Create a datetime object for the file.
675 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
676 # Check if the file time matches what we would see if we did ls -l <file_name>
677 computed = f"file {file_name:s} datetime {d.ctime():s}"
678 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
679 self.assertEqual(computed, actual)
680
681 def test_set_file_time_and_date(self):
682 """Test setting a file time and date."""
683 file_name = "./Images/home.png"
684 full_file_name = self.user_settings.local_root_dir + '/' + file_name
685 # Create a temporary file in the same directory.
686 temp_file_name = "temporal.tmp"
687 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
688 try:
689 with open(full_temp_file_name, 'w') as fp:
690 fp.write("The End of Eternity")
691 except OSError as detail:
692 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
693 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
694 # Get the old file time. Set the temporary file to the same time.
695 file_stat = os.stat(full_file_name)
696 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
697 # What is the temporary file's time now?
698 file_epoch_time = os.path.getmtime(full_temp_file_name)
699 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
700 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
701 # Is the temporary file time set properly?
702 computed = f"file {file_name:s} datetime {d.ctime():s}"
703 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
704 self.assertEqual(computed, actual)
705 os.remove(full_temp_file_name)
706
707 def test_difference_of_time_and_date(self):
708 """Test a date difference calculation."""
709 file_name = "./Images/home.png"
710 full_file_name = self.user_settings.local_root_dir + '/' + file_name
711 # Get the UTC time.
712 file_epoch_time = os.path.getmtime(full_file_name)
713 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
714 # Create a datetime object for the file.
715 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
716 # Slightly change the date and time by adding 1 minute.
717 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
718 time_delta = d2 - d
719 seconds_different = time_delta.total_seconds()
720 minutes_different = seconds_different / 60.0
721 hours_different = minutes_different / 60.0
722 days_different = hours_different / 24.0
723 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
724 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
725 self.assertEqual(computed, actual)
726
727 def test_pattern_match_dir_to_skip(self):
728 """Test if skipping certain named directories is recoginizing the dir names."""
729 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
730 pat = re.compile(self.user_settings.DIR_TO_SKIP)
731 if pat.search(dir_skip):
732 self.assertTrue(True)
733 else:
734 self.assertTrue(False)
735
736 def test_file_name_to_syntax_highlight(self):
737 """Test if syntax highlighting recognizes file names to highlight."""
738 file_name1 = "Computer/hello.lsp"
739 file_name2 = "Computer/life.html"
740 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
741 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
742 self.assertTrue(True)
743 else:
744 self.assertTrue(False)
745
746 def test_user_settings(self):
747 """Test whether user settings are correctly initialized."""
748 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
749 actual = "File size limit = 50000 K"
750 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
751
752 def test_check_replace_substring(self,debug=True):
753 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
754 For troubleshooting, turn on debug.
755 """
756 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
757 # Iterate over all test strings.
758 for pair in test_verify_pairs:
759 [test_string, verify_string] = pair
760 if debug:
761 print( f">>>>>>> next test string = {test_string}")
762 print( f">>>>>>> next verify string = {verify_string}")
763 # Iterate over all patterns and replacements.
764 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
765 [pat, rep_string] = match_replace_tuple
766 print( f"\t-------> next pattern = {pat}")
767 print( f"\t-------> next replacement = {rep_string}")
768 match = pat.search(test_string)
769 # The pattern match succeeds.
770 if match:
771 try:
772 sub = pat.sub(rep_string, test_string)
773 # String replacement succeeds for this pattern/replace pair iteration.
774 if debug:
775 print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
776 test_string = sub
777 except IndexError as detail:
778 print(f"\t\t.......> Caught an exception: {str(detail):s}. Replacement failed.")
779 if debug:
780 self.assertTrue(False)
781 elif debug:
782 print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
783 # No match, so go on to the next pattern and don't change test_string.
784 # Done with all pattern/replace on test string.
785 # Check this test string in the list.
786 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
787 if debug:
788 print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
789
790# ----------------------------------------------------------------------------
791# Command line options.
792# ----------------------------------------------------------------------------
793
794class CommandLineSettings(object):
795 """Get the command line options."""
796
797 def __init__(self, user_settings, raw_args=None):
798 """Get command line options"""
799 command_line_parser = argparse.ArgumentParser(
800 description="updateweb options")
801
802 # Log all changes, not just warnings and errors.
803 command_line_parser.add_argument(
804 "-v",
805 "--verbose",
806 help="Turn on verbose mode to log everything",
807 action="store_true")
808
809 # Clean up the local website only.
810 command_line_parser.add_argument(
811 "-c",
812 "--clean",
813 help="Do a cleanup on the local web site only.",
814 action="store_true")
815
816 # Clean up the local website only.
817 command_line_parser.add_argument(
818 "-m",
819 "--mathjax",
820 help="""ALSO upload mathjax directory.\
821 Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
822 You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
823 NOTE: If you did reset your server and delete all files, run the command find . -name '*.*' -exec touch {} \\; from the web page root directory.\
824 Also run find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
825 action="store_true")
826
827 # Run unit tests only.
828 command_line_parser.add_argument("-t", "--test",
829 help="Run unit tests.",
830 action="store_true")
831
832 args = command_line_parser.parse_args(raw_args)
833
834 if args.verbose:
835 user_settings.VERBOSE = True
836 if args.clean:
837 user_settings.CLEAN = True
838 if args.test:
839 user_settings.UNITTEST = True
840 if args.mathjax:
841 user_settings.MATHJAX = True
842
843# ----------------------------------------------------------------------------
844# Base class which describes my web site overall.
845# ----------------------------------------------------------------------------
846
847class WebSite(object):
848 """
849 Abstract class used for analyzing both local and remote (ftp server) websites.
850 Contains the web-walking functions which traverse the directory structures and files.
851 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
852 Child classes may define additional functions which only they need.
853 """
854
855 def __init__(self, settings):
856 """Set up root directories"""
857
858 # Import the user settings.
859 self.user_settings = settings
860
861 # Queue keeps track of directories not yet processed.
862 self.queue = []
863
864 # List of all directories traversed.
865 self.directories = []
866
867 # List of files traversed, with file information.
868 self.files = []
869
870 # Find out the root directory and go there.
871 self.root_dir = self.get_root_dir()
872 self.go_to_root_dir(self.root_dir)
873
874 # This is a Python decorator which says get_current_year is a class function. And so there is no self first argument, and you can call it without creating an
875 # instance of this class. Call it from anywhere, inside or outside the class, using WebSite.get_current_year(). You could just create a global function instead.)
876 @staticmethod
877 def get_current_year():
878 """Get the current year."""
879 return int(time.gmtime()[0])
880
881 @staticmethod
882 def get_current_two_digit_year():
883 """Get the last two digits of the current year."""
884 return WebSite.get_current_year() % 100
885
886 @staticmethod
887 def is_file_info_type(file_info):
888 """Check if we have a file information structure or merely a simple file name."""
889 try:
890 if isinstance(file_info, list):
891 return True
892 elif isinstance(file_info, str):
893 return False
894 else:
895 logging.error("is_file_info_type found a bad type. Aborting...")
896 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
897 except TypeError as detail:
898 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
899 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
900
901 def get_root_dir(self):
902 """Subclass: Put code here to get the root directory"""
903 return ""
904
905 def go_to_root_dir(self, root_dir):
906 """Subclass: Put code here to go to the root directory"""
907 pass # Pythons's do-nothing statement.
908
909 def one_level_down(self, d):
910 """Subclass: Fill in with a method which returns a list of the
911 directories and files immediately beneath dir"""
912 return [], []
913
914 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
915 """Walk a directory in either depth first or breadth first order. BFS is the default."""
916
917 # Get all subfiles and subdirectories off this node.
918 subdirectories, subfiles = self.one_level_down(d)
919
920 # Add all the subfiles in order.
921 for f in subfiles:
922
923 name = self.strip_root(f)
924 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
925
926 # Some files are private so skip them from consideration.
927 pat = re.compile(self.user_settings.FILE_TO_SKIP)
928
929 if pat.search(name[self.user_settings.FILE_NAME]):
930 logging.warning(
931 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
932 # Don't upload the log file due to file locking problems.
933 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
934 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
935 # File size limit on some servers.
936 else:
937 self.files.append(name)
938
939 # Queue up the subdirectories.
940 for d in subdirectories:
941 # Some directories are private such as .git or just temporary file
942 # caches so skip them from consideration.
943 pat = re.compile(self.user_settings.DIR_TO_SKIP)
944 if pat.search(d):
945 logging.warning(f"Webwalking: Skipping private dir {d:s}")
946 else:
947 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
948 self.queue.append(d)
949
950 # Search through the directories.
951 while len(self.queue) > 0:
952 # For breadth first search, remove from beginning of queue.
953 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
954 d = self.queue.pop(0)
955
956 # For depth first search, remove from end of queue.
957 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
958 d = self.queue.pop()
959 else:
960 d = self.queue.pop(0)
961
962 name = self.strip_root(d)
963 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
964 self.directories.append(name)
965
966 self.walk(d)
967
968 def strip_root(self, file_info):
969 """Return a path, but strip off the root directory"""
970
971 root = self.root_dir
972
973 # Extract the file name.
974 if self.is_file_info_type(file_info):
975 name = file_info[self.user_settings.FILE_NAME]
976 else:
977 name = file_info
978
979 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
980 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
981 # Art/foo.txt
982 lenroot = len(root)
983 if root == self.user_settings.DEFAULT_ROOT_DIR:
984 pass
985 else:
986 lenroot = lenroot + 1
987
988 stripped_path = name[lenroot:]
989
990 if self.is_file_info_type(file_info):
991 # Update the file name only.
992 return [stripped_path,
993 file_info[self.user_settings.FILE_TYPE],
994 file_info[self.user_settings.FILE_DATE_TIME],
995 file_info[self.user_settings.FILE_SIZE]]
996 else:
997 return stripped_path
998
999 def append_root_dir(self, root_dir, name):
1000 """Append the root directory to a path"""
1001
1002 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1003 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1004 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1005 return root_dir + name
1006 else:
1007 return root_dir + "/" + name
1008
1009 def scan(self):
1010 """Scan the directory tree recursively from the root"""
1011 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
1012 self.walk(self.root_dir)
1013
1014 def modtime(self, f):
1015 """Subclass: Get file modification time"""
1016 pass
1017
1018 def finish(self):
1019 """Quit web site"""
1020 logging.debug(f"Finished with WebSite object of class {type(self)}")
1021 pass
1022
1023# ----------------------------------------------------------------------------
1024# Subclass which knows about the local web site on disk.
1025# ----------------------------------------------------------------------------
1026
1027class LocalWebSite(WebSite):
1028 """Walk the local web directory on local disk down from the root.
1029 Clean up temporary files and do other cleanup work."""
1030
1031 def __init__(self, settings):
1032 """Go to web page root and list all files and directories."""
1033
1034 # Initialize the parent class.
1035 WebSite.__init__(self, settings)
1036
1037 self.root_dir = self.get_root_dir()
1038 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1039
1040 def get_root_dir(self):
1041 """Get the name of the root directory"""
1042 return self.user_settings.local_root_dir
1043
1044 def go_to_root_dir(self, root_dir):
1045 """Go to the root directory"""
1046
1047 # Go to the root directory.
1048 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1049 os.chdir(root_dir)
1050
1051 # Read it back.
1052 self.root_dir = os.getcwd()
1053 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1054
1055 def one_level_down(self, d):
1056 """List all files and subdirectories in the current directory, dir. For files, collect file info
1057 such as time, date and size."""
1058
1059 directories = []
1060 files = []
1061
1062 # Change to current directory.
1063 os.chdir(d)
1064
1065 # List all subdirectories and files.
1066 dir_list = os.listdir(d)
1067
1068 if dir_list:
1069 for line in dir_list:
1070 # Add the full path prefix from the root.
1071 name = self.append_root_dir(d, line)
1072 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1073
1074 # Is it a directory or a file?
1075 if os.path.isdir(name):
1076 directories.append(name)
1077 elif os.path.isfile(name):
1078 # First assemble the file information of name, time/date and size into a list.
1079 # Can index it like an array. For example,
1080 # file_info =
1081 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1082 # 1, -- Enum type FileType.FILE = 1.
1083 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1084 # 4675] -- File size in bytes.
1085 file_info = [name,
1086 FileType.FILE,
1087 self.get_file_date_time(name),
1088 self.get_file_size(name)]
1089 files.append(file_info)
1090
1091 # Sort the names into order.
1092 if directories:
1093 directories.sort()
1094 if files:
1095 files.sort()
1096
1097 return directories, files
1098
1099 @staticmethod
1100 def get_file_date_time(file_name):
1101 """Get a local file time and date in UTC."""
1102
1103 file_epoch_time = os.path.getmtime(file_name)
1104 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1105 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1106 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1107 return d
1108
1109 @staticmethod
1110 def get_file_size(file_name):
1111 """Get file size in bytes."""
1112 return os.path.getsize(file_name)
1113
1114 @staticmethod
1115 def clean_up_temp_file(temp_file_name, file_name, changed):
1116 """Remove the original file, rename the temporary file name to the original name.
1117 If there are no changes, just remove the temporary file.
1118 """
1119
1120 if changed:
1121 # Remove the old file now that we have the rewritten file.
1122 try:
1123 os.remove(file_name)
1124 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1125 except OSError as detail:
1126 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1127
1128 # Rename the new file to the old file name.
1129 try:
1130 os.rename(temp_file_name, file_name)
1131 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1132 except OSError as detail:
1133 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1134 else:
1135 # No changes? Remove the temporary file.
1136 try:
1137 os.remove(temp_file_name)
1138 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1139 except OSError as detail:
1140 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1141 return
1142
1143 @staticmethod
1144 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1145 """
1146 Process each line of a file with a list of functions. Create a new temporary file.
1147
1148 The default list is None which means make an exact copy.
1149 """
1150
1151 # Assume no changes.
1152 changed = False
1153
1154 # Open both input and output files for processing. Check if we cannot do it.
1155 fin = None
1156 try:
1157 fin = open(in_file_name, "r")
1158 except IOError as detail:
1159 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1160 if fin is not None:
1161 fin.close()
1162 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1163 fout = None
1164 try:
1165 fout = open(out_file_name, "w")
1166 except IOError as detail:
1167 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1168 if fout is not None:
1169 fout.close()
1170 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1171
1172 # Read each line of the file, aborting if there is a read error.
1173 try:
1174 line = fin.readline()
1175
1176 # Rewrite the next line of the file using all the rewrite functions.
1177 while line:
1178 original_line = line
1179 # If we have one or more rewrite functions...
1180 if process_line_function_list is not None:
1181 # ...apply each rewrite functions to the line, one after the other in order.
1182 for processLineFunction in process_line_function_list:
1183 if processLineFunction is not None:
1184 line = processLineFunction(line)
1185
1186 if original_line != line:
1187 logging.debug("Rewrote the line" + ":" + f"|{original_line:s}|" + "into" + ":" + f"|{line:s}| for file" + ":" + f"{in_file_name:s}")
1188 changed = True
1189
1190 fout.write(line)
1191
1192 line = fin.readline()
1193
1194 fin.close()
1195 fout.close()
1196 except IOError as detail:
1197 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1198 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1199
1200 if changed:
1201 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1202 f"Changes are in temporary copy {out_file_name:s}")
1203
1204 # Return True if any lines were changed.
1205 return changed
1206
1207 def clean(self):
1208 """Scan through all directories and files in the local on disk website and clean them up."""
1209
1210 num_source_files_changed = 0
1211 num_source_files_syntax_highlighted = 0
1212
1213 logging.debug("Cleaning up the local web page.")
1214
1215 if self.directories is None or self.files is None:
1216 logging.error("Web site has no directories or files. Aborting...")
1217 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1218
1219 for d in self.directories:
1220
1221 if self.is_temp_dir(d):
1222 # Add the full path prefix from the root.
1223 name = self.append_root_dir(self.get_root_dir(), d)
1224 try:
1225 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1226 shutil.rmtree(name)
1227 except OSError as detail:
1228 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1229
1230 for f in self.files:
1231 # Add the full path prefix from the root.
1232 full_file_name = self.append_root_dir(
1233 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1234
1235 # Remove all temporary files.
1236 if self.is_temp_file(f):
1237 try:
1238 logging.debug(f"Removing temp file {full_file_name:s}")
1239 os.remove(full_file_name)
1240 except OSError as detail:
1241 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1242
1243 # Update source code files.
1244 if self.is_source_or_hypertext_file(f):
1245 changed = self.rewrite_source_file(full_file_name)
1246 if changed:
1247 num_source_files_changed += 1
1248 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1249
1250 # Generate a syntax highlighted code listing.
1251 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1252 if self.is_file_to_syntax_highlight(f):
1253 # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1254 syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1255 if syntax_highlighted_file_name is not None:
1256 logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1257 else:
1258 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1259 num_source_files_syntax_highlighted += 1
1260
1261 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1262 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1263
1264 def is_temp_file(self, file_info):
1265 """Identify a file name as a temporary file"""
1266
1267 file_name = file_info[self.user_settings.FILE_NAME]
1268
1269 # Suffixes and names for temporary files be deleted.
1270 pat = self.user_settings.TEMP_FILE_SUFFIXES
1271 match = pat.search(file_name)
1272 # Remove any files containing twiddles anywhere in the name.
1273 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1274 return True
1275
1276 return False
1277
1278 def is_temp_dir(self, dir_name):
1279 """Identify a name as a temporary directory."""
1280
1281 p = self.user_settings.TEMP_DIR_SUFFIX
1282 return p.search(dir_name)
1283
1284 def is_source_or_hypertext_file(self, file_info):
1285 """ Check if the file name is a source file or a hypertext file."""
1286
1287 file_name = file_info[self.user_settings.FILE_NAME]
1288 p1 = self.user_settings.SOURCE_FILE_PATTERN
1289 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1290 if p1.search(file_name) or p2.search(file_name):
1291 return True
1292 else:
1293 return False
1294
1295 def is_file_to_syntax_highlight(self, file_info):
1296 """Check if this file type should have a syntax highlighted source listing."""
1297
1298 # Take apart the file name.
1299 full_file_name = file_info[self.user_settings.FILE_NAME]
1300 file_name = Path(full_file_name).name
1301
1302 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1303 if p.search(file_name):
1304 return True
1305 else:
1306 return False
1307
1308 def rewrite_substring(self, line):
1309 """Rewrite a line containing a pattern of your choice"""
1310
1311 # Start with the original unchanged line.
1312 rewritten_line = line
1313
1314 # Do the replacements in order from first to last.
1315 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1316 # Get the next pattern match replacement string tuple.
1317 [pat, rep_string] = match_replace_tuple
1318 # Does it match? Then do string substitution, else leave the line unchanged.
1319 match = pat.search(rewritten_line)
1320 if match:
1321 # Now we have these cases:
1322 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1323 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1324 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1325 try:
1326 sub = pat.sub(rep_string, rewritten_line)
1327 rewritten_line = sub
1328 except IndexError as detail:
1329 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1330
1331 return rewritten_line
1332
1333 def rewrite_email_address_line(self, line):
1334 """Rewrite lines containing old email addresses."""
1335
1336 # Search for the old email address.
1337 pat = self.user_settings.OLD_EMAIL_ADDRESS
1338 match = pat.search(line)
1339
1340 # Replace the old address with my new email address.
1341 if match:
1342 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1343 sub = pat.sub(new_address, line)
1344 line = sub
1345
1346 return line
1347
1348 def rewrite_version_line(self, line):
1349 """Rewrite lines containing the current version of software."""
1350
1351 # Search for the current version.
1352 pat = self.user_settings.CURRENT_SOFTWARE_VERSION
1353 match = pat.search(line)
1354
1355 # Replace with the new version.
1356 if match:
1357 # Note that since we are using raw strings leading and trailing
1358 # whitespace is ignored.
1359 new_version = self.user_settings.NEW_SOFTWARE_VERSION.lstrip().strip()
1360 sub = pat.sub(new_version, line)
1361 line = sub
1362
1363 return line
1364
1365 def rewrite_copyright_line(self, line):
1366 """Rewrite copyright lines if they are out of date."""
1367
1368 # Match the lines,
1369 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1370 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1371 # and pull out the old year and save it.
1372 pat = self.user_settings.COPYRIGHT_LINE
1373 match = pat.search(line)
1374
1375 # Found a match.
1376 if match:
1377 old_year = int(match.group('old_year'))
1378
1379 # Replace the old year with the current year.
1380 # We matched and extracted the old copyright symbol into the variable
1381 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1382 # We now insert it back by placing the special syntax \g<symbol>
1383 # into the replacement string.
1384 if old_year < WebSite.get_current_year():
1385 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + \
1386 str(WebSite.get_current_year())
1387 sub = pat.sub(new_copyright, line)
1388 line = sub
1389 return line
1390
1391 def rewrite_last_update_line(self, line):
1392 """Rewrite the Last Updated line if the year is out of date."""
1393
1394 # Match the last updated line and pull out the year.
1395 # last updated 01 Jan 25.
1396 p = self.user_settings.LAST_UPDATED_LINE
1397 m = p.search(line)
1398
1399 if m:
1400 last_update_year = int(m.group('year'))
1401
1402 # Convert to four digit years.
1403 if last_update_year > 90:
1404 last_update_year += 1900
1405 else:
1406 last_update_year += 2000
1407
1408 # If the year is old, rewrite to "01 Jan <current year>".
1409 if last_update_year < WebSite.get_current_year():
1410 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1411 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1412 line = sub
1413
1414 return line
1415
1416 def rewrite_source_file(self, file_name):
1417 """Rewrite copyright lines, last updated lines, etc."""
1418 changed = False
1419
1420 # Create a new temporary file name for the rewritten file.
1421 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1422
1423 # Apply changes to all lines of the temporary file. Apply change functions in
1424 # the sequence listed.
1425 if self.process_lines_of_file(file_name, temp_file_name,
1426 [self.rewrite_copyright_line,
1427 self.rewrite_last_update_line,
1428 self.rewrite_email_address_line,
1429 self.rewrite_substring,
1430 self.rewrite_version_line]):
1431 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1432 changed = True
1433
1434 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1435 self.clean_up_temp_file(temp_file_name, file_name, changed)
1436
1437 return changed
1438
1439 @staticmethod
1440 def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1441 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1442 Keep the same date/time as the original file."""
1443
1444 # kwargs is a dictionary for key, value in kwargs.items():
1445 # for key, value in kwargs.items():
1446 # if key in kwargs:
1447 # print( f"kwargs:" )
1448 # print( f" key = |{key}|")
1449 # print( f" value = |{value}|" )
1450 dry_run_value = kwargs.get('dry_run')
1451 dry_run = False
1452 if dry_run_value is not None and dry_run_value is True:
1453 dry_run = True
1454
1455 # Take apart the file name.
1456 file_name_without_extension = Path(source_file_name).stem
1457 file_extension = Path(source_file_name).suffix
1458
1459 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1460 syntax_highlighted_file_name = f"{source_file_name}.html"
1461
1462 # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1463 if file_extension == ".ipynb":
1464 if dry_run:
1465 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1466 return None
1467 # Python manual recommends using the run() command instead of Popen(). See https://docs.python.org/3/library/subprocess.html#subprocess.run
1468 try:
1469 shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1470 # Throw an exception if we can't run the process.
1471 # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1472 # Since the shell command is a single string, use shell=True in the run() command.
1473 subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1474 except subprocess.CalledProcessError as detail:
1475 logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s} Aborting...")
1476 return None
1477 # Otherwise, use the Pygments syntax highlighter.
1478 else:
1479 # First choose the language lexer from the file name itself if there's no extension.
1480 # Dotted file names are treated as the entire file name.
1481 match file_name_without_extension:
1482 case "makefile":
1483 lexer = MakefileLexer()
1484 case ".bash_profile"|".bashrc"|".bash_logout":
1485 lexer = BashLexer()
1486 case ".vimrc":
1487 lexer = VimLexer()
1488 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1489 lexer = OutputLexer() # No formatting.
1490 case _:
1491 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1492 match file_extension:
1493 case ".html":
1494 lexer = HtmlLexer()
1495 case ".css":
1496 lexer = CssLexer()
1497 case ".js":
1498 lexer = JavascriptLexer()
1499 case ".sh":
1500 lexer = BashLexer()
1501 case ".py":
1502 lexer = PythonLexer()
1503 case ".c" | ".h":
1504 lexer = CLexer()
1505 case ".hpp" | ".cpp":
1506 lexer = CppLexer()
1507 case ".lsp":
1508 lexer = CommonLispLexer()
1509 case ".for" | ".FOR" | ".f":
1510 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1511 case ".txt" | ".dat": # Generic data file; no formatting.
1512 lexer = OutputLexer()
1513 case ".tex":
1514 lexer = TexLexer() # LaTeX, TeX, or related files.
1515 case ".m":
1516 lexer = MatlabLexer()
1517 case ".yaml":
1518 lexer = YamlLexer()
1519 case _:
1520 logging.error(f"Can't find a lexer for file {source_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1521 return None
1522
1523 # Read the source code file into a single string.
1524 try:
1525 with open(source_file_name, 'r') as fp:
1526 source_file_string = fp.read()
1527 except OSError as detail:
1528 logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1529
1530 # Top level Pygments function generates the HTML for the highlighted code.
1531 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1532
1533 # The style sheet is always the same for all languages.
1534 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1535
1536 # Write out the syntax colored file.
1537 if dry_run:
1538 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1539 return None
1540 else:
1541 try:
1542 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1543 with open(syntax_highlighted_file_name, 'w') as fp:
1544 fp.write(UserSettings.BASIC_HTML_BEGIN)
1545 fp.write(style_sheet)
1546 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1547 fp.write(highlighted_html_source_file_string)
1548 fp.write(UserSettings.BASIC_HTML_END)
1549 except OSError as detail:
1550 logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s} Aborting...")
1551 # ------- end Pygments syntax highlighter
1552
1553 # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1554 file_stat = os.stat(source_file_name)
1555 os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1556
1557 # Are the original source and the syntax highlighted code the same data and time?
1558 dates_and_times_source_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1559 dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1560 if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1561 logging.error(f"Source code and syntax highlighted source don't have the same times. source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1562 return None
1563
1564 logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1565 return syntax_highlighted_file_name
1566
1567# ----------------------------------------------------------------------------
1568# Subclass which knows about the remote web site.
1569# ----------------------------------------------------------------------------
1570
1571class RemoteWebSite(WebSite):
1572 """Walk the remote web directory on a web server down from the root.
1573 Use FTP commands:
1574 https://en.wikipedia.org/wiki/List_of_FTP_commands
1575 Use the Python ftp library:
1576 https://docs.python.org/3/library/ftplib.html
1577 """
1578
1579 def __init__(self, user_settings):
1580 """Connect to FTP server and list all files and directories."""
1581
1582 # Root directory of FTP server.
1583 self.root_dir = user_settings.FTP_ROOT_NAME
1584 logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1585
1586 # Connect to FTP server and log in.
1587 try:
1588 # self.ftp.set_debuglevel( 2 )
1589 print( f"Trying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password = {user_settings.PASSWORD_NAME}")
1590 self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1591 self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1592 # Catch all exceptions with the parent class Exception: all built-in,
1593 # non-system-exiting exceptions are derived from this class.
1594 except Exception as detail:
1595 # Extract the string message from the exception class with str().
1596 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1597 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1598 else:
1599 logging.debug("Remote web site ftp login succeeded.")
1600
1601 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1602
1603 # Initialize the superclass.
1604 WebSite.__init__(self, user_settings)
1605
1606 def go_to_root_dir(self, root_dir):
1607 """Go to the root directory"""
1608
1609 try:
1610 # Go to the root directory.
1611 self.ftp.cwd(root_dir)
1612 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1613
1614 # Read it back.
1615 self.root_dir = self.ftp.pwd()
1616 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1617
1618 except Exception as detail:
1619 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1620 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1621
1622 def get_root_dir(self):
1623 """Get the root directory name"""
1624
1625 return self.root_dir
1626
1627 def finish(self):
1628 """Quit remote web site"""
1629 logging.debug(f"Finished with WebSite object of class {type(self)}")
1630 try:
1631 self.ftp.quit()
1632 except Exception as detail:
1633 logging.error(f"Cannot ftp quit: {str(detail):s}")
1634
1635 def one_level_down(self, d):
1636 """List files and directories in a subdirectory using ftp"""
1637
1638 directories = []
1639 files = []
1640
1641 try:
1642 # ftp listing from current dir.
1643 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1644 self.ftp.cwd(d)
1645 dir_list = []
1646
1647 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1648 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1649 # Note the second argument requires a callback function.
1650 self.ftp.retrlines('LIST -a', dir_list.append)
1651
1652 except Exception as detail:
1653 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1654 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1655
1656 for line in dir_list:
1657 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1658
1659 # Line should at least have the minimum FTP information.
1660 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1661 # Parse the FTP LIST and put the pieces into file_info.
1662 file_info = self.parse_ftp_list(line)
1663 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1664
1665 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1666 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1667 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1668 pass
1669 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1670 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1671 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1672 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1673 directories.append(dirname)
1674 # For a file: Add the full path prefix from the root to the file name.
1675 else:
1676 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1677 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1678 {file_info[self.user_settings.FILE_NAME]:s}")
1679 files.append(file_info)
1680 else:
1681 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1682
1683 directories.sort()
1684 files.sort()
1685
1686 return directories, files
1687
1688 def modtime(self, f):
1689 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1690 modtime = 0
1691
1692 try:
1693 response = self.ftp.sendcmd('MDTM ' + f)
1694 # MDTM returns the last modified time of the file in the format
1695 # "213 YYYYMMDDhhmmss \r\n <error-response>
1696 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1697 # error-response is 550 for info not available, and 500 or 501 if command cannot
1698 # be parsed.
1699 if response[:3] == '213':
1700 modtime = response[4:]
1701 except ftplib.error_perm as detail:
1702 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1703 modtime = 0
1704
1705 return modtime
1706
1707 def parse_ftp_list(self, line):
1708 """Parse the ftp file listing and return file name, datetime and file size.
1709
1710 An FTP LIST command will give output which looks like this for a file:
1711
1712 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1713
1714 and for a directory:
1715
1716 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1717
1718 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1719 We can have problems on New Year's Eve. For example, the local file date/time is
1720
1721 Mon Jan 1 06:23:12 2018
1722
1723 But the remote file date/time from FTP listing doesn't show a year even though we
1724 know it was written to the server in 2017.
1725
1726 Mon Dec 31 03:02:00
1727
1728 So we default the remote file year to current year 2018 and get
1729
1730 Mon Dec 31 03:02:00 2018
1731
1732 Now we think that the remote file is newer by 363.860278 days.
1733 """
1734
1735 # Find out if we've a directory or a file.
1736 if line[0] == 'd':
1737 dir_or_file = FileType.DIRECTORY
1738 else:
1739 dir_or_file = FileType.FILE
1740
1741 pattern = self.user_settings.FTP_LISTING
1742
1743 # Sensible defaults.
1744 filesize = 0
1745 filename = ""
1746 # Default the time to midnight.
1747 hour = 0
1748 minute = 0
1749 seconds = 0
1750 # Default the date to Jan 1 of the current year.
1751 month = 1
1752 day = 1
1753 year = WebSite.get_current_year()
1754
1755 # Extract time and date from the ftp listing.
1756 match = pattern.search(line)
1757
1758 if match:
1759 filesize = int(match.group('bytes'))
1760 month = self.user_settings.monthToNumber[match.group('mon')]
1761 day = int(match.group('day'))
1762
1763 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1764 if match.group('year'):
1765 year = int(match.group('year'))
1766 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1767 else:
1768 # Remote file listing omits the year. Default the year to the current UTC time year.
1769 # That may be incorrect (see comments above).
1770 year = WebSite.get_current_year()
1771 logging.debug(f"ftp is missing the year; use the current year = {year}")
1772
1773 # If the FTP listing has the hour and minute, it will omit the year.
1774 if match.group('hour') and match.group('min'):
1775 hour = int(match.group('hour'))
1776 minute = int(match.group('min'))
1777 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1778
1779 filename = match.group('filename')
1780
1781 # Package up the time and date nicely.
1782 # Note if we didn't get any matches, we'll default the remote date and
1783 # time to Jan 1 midnight of the current year.
1784 d = datetime.datetime(year, month, day, hour, minute, seconds)
1785
1786 return [filename, dir_or_file, d, filesize]
1787
1788# ----------------------------------------------------------------------------
1789# Class for synchronizing local and remote web sites.
1790# ----------------------------------------------------------------------------
1791
1792class UpdateWeb(object):
1793 """Given previously scanned local and remote directories, update the remote website."""
1794
1795 def __init__(
1796 self,
1797 user_settings,
1798 local_directory_list,
1799 local_file_info,
1800 remote_directory_list,
1801 remote_file_info):
1802 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1803
1804 # Initialize from args.
1805 self.user_settings = user_settings
1806 self.local_directory_list = local_directory_list
1807 self.remote_directory_list = remote_directory_list
1808 self.local_file_info = local_file_info
1809 self.remote_file_info = remote_file_info
1810
1811 # Initialize defaults.
1812 self.local_files_list = []
1813 self.remote_files_list = []
1814 self.local_file_to_size = {}
1815 self.local_file_to_date_time = {}
1816 self.remote_file_to_date_time = {}
1817 self.local_only_dirs = []
1818 self.local_only_files = []
1819 self.remote_only_dirs = []
1820 self.remote_only_files = []
1821 self.common_files = []
1822
1823 # Connect to FTP server and log in.
1824 try:
1825 self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1826 self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1827 except Exception as detail:
1828 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1829 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1830 else:
1831 logging.debug("ftp login succeeded.")
1832
1833 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1834
1835 # Local root directory.
1836 self.local_root_dir = self.user_settings.local_root_dir
1837 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1838
1839 # Root directory of FTP server.
1840 self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1841 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1842
1843 # Transform KB string to integer bytes. e.g. "200" => 2048000
1844 self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1845
1846 try:
1847 # Go to the root directory.
1848 self.ftp.cwd(self.ftp_root_dir)
1849
1850 # Read it back.
1851 self.ftp_root_dir = self.ftp.pwd()
1852 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1853 except Exception as detail:
1854 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1855
1856 def append_root_dir(self, root_dir, name):
1857 """Append the root directory to a path"""
1858
1859 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1860 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1861 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1862 return root_dir + name
1863 else:
1864 return root_dir + "/" + name
1865
1866 def file_info(self):
1867 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1868 dates, times, and sizes."""
1869
1870 # Extract file names.
1871 self.local_files_list = [
1872 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1873 self.remote_files_list = [
1874 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1875
1876 # Use a dictionary comprehension to create key/value pairs,
1877 # (file name, file date/time)
1878 # which map file names onto date/time.
1879 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1880 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1881
1882 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1883 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1884
1885 def update(self):
1886 """Scan through the local website, cleaning it up.
1887 Go to remote website on my servers and synchronize all files."""
1888
1889 self.file_info()
1890
1891 # Which files and directories are different.
1892 self.changes()
1893
1894 # Synchronize with the local web site.
1895 self.synchronize()
1896
1897 def changes(self):
1898 """Find the set of different directories and files on local and remote."""
1899
1900 # Add all directories which are only on local to the dictionary.
1901 dir_to_type = {
1902 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1903
1904 # Scan through all remote directories, adding those only on remote or
1905 # on both.
1906 for d in self.remote_directory_list:
1907 if d in dir_to_type:
1908 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1909 else:
1910 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1911
1912 # Add all files which are only on local to the dictionary.
1913 file_to_type = {
1914 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1915
1916 # Scan through all remote files, adding those only on remote or on
1917 # both.
1918 for f in self.remote_files_list:
1919 if f in file_to_type:
1920 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1921 else:
1922 file_to_type[f] = FileType.ON_REMOTE_ONLY
1923
1924 logging.debug("Raw dictionary dump of directories")
1925 for k, v in dir_to_type.items():
1926 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1927
1928 logging.debug("Raw dictionary dump of files")
1929 for k, v in file_to_type.items():
1930 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1931
1932 # List of directories only on local. Keep the ordering.
1933 self.local_only_dirs = [
1934 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1935
1936 # List of directories only on remote. Keep the ordering.
1937 self.remote_only_dirs = [
1938 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1939
1940 # We don't care about common directories, only their changed files, if
1941 # any.
1942
1943 # List of files only on local. Keep the ordering.
1944 self.local_only_files = [
1945 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1946
1947 # List of files only on remote. Keep the ordering.
1948 self.remote_only_files = [
1949 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1950
1951 # List of common files on both local and remote. Keep the ordering.
1952 self.common_files = [
1953 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1954
1955 logging.debug("*** Directories only on local ******************************")
1956 for d in self.local_only_dirs:
1957 logging.debug(f"\t {d:s}")
1958
1959 logging.debug("*** Directories only on remote ******************************")
1960 for d in self.remote_only_dirs:
1961 logging.debug(f"\t {d:s}")
1962
1963 logging.debug("*** Files only on local ******************************")
1964 for f in self.local_only_files:
1965 logging.debug(f"\t {f:s}")
1966
1967 logging.debug("*** Files only on remote ******************************")
1968 for f in self.remote_only_files:
1969 logging.debug(f"\t {f:s}")
1970
1971 logging.debug("*** Common files ******************************")
1972 for f in self.common_files:
1973 logging.debug(f"name {f:s}")
1974 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1975 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1976
1977 def synchronize(self):
1978 """Synchronize files and subdirectories in the remote directory with the local directory."""
1979
1980 # If we have the same files in local and remote, compare their times
1981 # and dates.
1982 for f in self.common_files:
1983 local_file_time = self.local_file_to_date_time[f]
1984 remote_file_time = self.remote_file_to_date_time[f]
1985
1986 # What's the time difference?
1987 time_delta = remote_file_time - local_file_time
1988 # How much difference, either earlier or later?
1989 seconds_different = abs(time_delta.total_seconds())
1990 minutes_different = seconds_different / 60.0
1991 hours_different = minutes_different / 60.0
1992 days_different = hours_different / 24.0
1993
1994 # Assume no upload initially.
1995 upload_to_host = False
1996
1997 logging.debug(f"Common file: {f:s}.")
1998
1999 # Remote file time is newer.
2000 # Allow 200 characters
2001 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
2002
2003 if remote_file_time > local_file_time:
2004 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
2005 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
2006 logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
2007 logging.error(f"\tlocal time {local_file_time.ctime():s}")
2008 logging.error(f"\tremote time {remote_file_time.ctime():s}")
2009
2010 # Set the local file to the current time.
2011 full_file_name = self.append_root_dir(
2012 self.local_root_dir, f)
2013 if os.path.exists(full_file_name):
2014 # Change the access and modify times of the file to the current time.
2015 os.utime(full_file_name, None)
2016 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
2017
2018 upload_to_host = True
2019 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
2020 else:
2021 logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2022 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2023 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2024 upload_to_host = False
2025
2026 # Local file time is newer.
2027 elif local_file_time > remote_file_time:
2028 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
2029 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
2030 logging.warning(f"Local file {f:20s} is SLIGHTLY newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes]. Uploading to remote server.")
2031 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2032 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2033 upload_to_host = True
2034 else:
2035 logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
2036 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
2037 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
2038 upload_to_host = False
2039
2040 # Cancel the upload if the file is too big for the server.
2041 size = self.local_file_to_size[f]
2042 if size >= self.file_size_limit:
2043 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
2044 upload_to_host = False
2045
2046 # Finally do the file upload.
2047 if upload_to_host:
2048 logging.debug(f"Uploading changed file {f:s}")
2049 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2050 print(f"Uploading changed file {f:s}... ", end='', flush=True)
2051 self.upload(f)
2052
2053 # Remote directory is not in local. Delete it.
2054 for d in self.remote_only_dirs:
2055 logging.debug(f"Deleting remote only directory {d:s}")
2056 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2057 self.rmdir(d)
2058
2059 # Local directory missing on remote. Create it.
2060 # Due to breadth first order scan, we'll create parent directories
2061 # before child directories.
2062 for d in self.local_only_dirs:
2063 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2064 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2065 self.mkdir(d)
2066
2067 # Local file missing on remote. Upload it.
2068 for f in self.local_only_files:
2069 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2070
2071 # But cancel the upload if the file is too big for the server.
2072 size = self.local_file_to_size[f]
2073 if size >= self.file_size_limit:
2074 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2075 f" too large for server, limit is {self.file_size_limit:d} bytes")
2076 else:
2077 logging.debug(f"Uploading new file {f:s}")
2078 print(f"Uploading new file {f:s}... ", end='', flush=True)
2079 self.upload(f)
2080
2081 # Remote contains a file not present on the local. Delete the file.
2082 for f in self.remote_only_files:
2083 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2084 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2085 self.del_remote(f)
2086
2087 def del_remote(self, relative_file_path):
2088 """Delete a file using ftp."""
2089
2090 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2091
2092 # Parse the relative file path into file name and relative directory.
2093 relative_dir, file_name = os.path.split(relative_file_path)
2094 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2095 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2096 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2097
2098 try:
2099 # Add the remote root path and go to the remote directory.
2100 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2101 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2102 self.ftp.cwd(remote_dir)
2103 except Exception as detail:
2104 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2105 else:
2106 try:
2107 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2108
2109 # Don't remove zero length file names.
2110 if len(file_name) > 0:
2111 self.ftp.delete(file_name)
2112 else:
2113 logging.warning(
2114 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2115 except Exception as detail:
2116 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2117
2118 def mkdir(self, relative_dir):
2119 """Create new remote directory using ftp."""
2120
2121 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2122 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2123
2124 # Parse the relative dir path into prefix dir and suffix dir.
2125 path, d = os.path.split(relative_dir)
2126 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2127 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2128
2129 try:
2130 # Add the remote root path and go to the remote directory.
2131 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2132 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2133 self.ftp.cwd(remote_dir)
2134 except Exception as detail:
2135 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2136 else:
2137 try:
2138 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2139 self.ftp.mkd(d)
2140 except Exception as detail:
2141 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2142
2143 def rmdir(self, relative_dir):
2144 """Delete an empty directory using ftp."""
2145
2146 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2147 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2148
2149 # Parse the relative dir path into prefix dir and suffix dir.
2150 path, d = os.path.split(relative_dir)
2151 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2152 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2153
2154 try:
2155 # Add the remote root path and go to the remote directory.
2156 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2157 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2158 self.ftp.cwd(remote_dir)
2159 except Exception as detail:
2160 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2161 else:
2162 try:
2163 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2164 self.ftp.rmd(d)
2165 except Exception as detail:
2166 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2167
2168 def download(self, relative_file_path):
2169 """Download a binary file using ftp."""
2170
2171 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2172
2173 # Parse the relative file path into file name and relative directory.
2174 relative_dir, file_name = os.path.split(relative_file_path)
2175 logging.debug(f"download(): \tfile name: {file_name:s}")
2176 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2177 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2178
2179 # Add the remote root path and go to the remote directory.
2180 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2181 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2182
2183 try:
2184 self.ftp.cwd(remote_dir)
2185 except Exception as detail:
2186 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2187 else:
2188 # Add the local root path to get the local file name.
2189 # Open local binary file to write into.
2190 local_file_name = self.append_root_dir(
2191 self.local_root_dir, relative_file_path)
2192 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2193 try:
2194 f = open(local_file_name, "wb")
2195 try:
2196 # Calls f.write() on each block of the binary file.
2197 # ftp.retrbinary( "RETR " + file_name, f.write )
2198 pass
2199 except Exception as detail:
2200 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2201 f.close()
2202 except IOError as detail:
2203 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2204
2205 def upload(self, relative_file_path):
2206 """Upload a binary file using ftp."""
2207
2208 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2209
2210 # Parse the relative file path into file name and relative directory.
2211 relative_dir, file_name = os.path.split(relative_file_path)
2212 logging.debug(f"upload(): \tfile name: {file_name:s}")
2213 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2214 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2215
2216 # Add the remote root path and go to the remote directory.
2217 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2218 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2219
2220 try:
2221 self.ftp.cwd(remote_dir)
2222 except Exception as detail:
2223 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2224 else:
2225 # Add the local root path to get the local file name.
2226 # Open local binary file to read from.
2227 local_file_name = self.append_root_dir(
2228 self.local_root_dir, relative_file_path)
2229 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2230
2231 try:
2232 f = open(local_file_name, "rb")
2233 try:
2234 # f.read() is called on each block of the binary file until
2235 # EOF.
2236 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2237 self.ftp.storbinary("STOR " + file_name, f)
2238 except Exception as detail:
2239 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2240 f.close()
2241 except IOError as detail:
2242 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2243
2244 def finish(self):
2245 """Log out of an ftp session"""
2246 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2247 try:
2248 self.ftp.quit()
2249 except Exception as detail:
2250 logging.error(f"Cannot ftp quit because {str(detail):s}")
2251
2252# ----------------------------------------------------------------------------
2253# Main function
2254# ----------------------------------------------------------------------------
2255
2256def main(raw_args=None):
2257 """Main program. Clean up and update my website."""
2258
2259 # Print the obligatory legal notice.
2260 print("""
2261 updateweb Version 7.2 - A Python utility program which maintains my web site.
2262 Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
2263
2264 It deletes temporary files, rewrites old copyright lines and email address
2265 lines in source files, then synchronizes all changes to my web sites.
2266
2267 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2268 GNU General Public License. This is free software, and you are welcome
2269 to redistribute it under certain conditions; see the GNU General Public
2270 License for details.
2271 """)
2272
2273 # Put ALL the main code into a try block!
2274 try:
2275 # ---------------------------------------------------------------------
2276 # Load default settings and start logging.
2277 # ---------------------------------------------------------------------
2278
2279 # Default user settings.
2280 user_settings = UserSettings()
2281
2282 print( f"Running main( {raw_args} ) Python version\
2283 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2284 local web directory\
2285 {user_settings.local_root_dir}\n")
2286 # Get command line options such as --verbose. Pass them back as flags in
2287 # user_settings.
2288 CommandLineSettings(user_settings, raw_args)
2289
2290 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2291 if user_settings.UNITTEST:
2292 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2293 unittest.TextTestRunner(verbosity=2).run(suite)
2294 # We are done!
2295 print(" ...done!", flush=True)
2296 return
2297
2298 # Start logging to file. Verbose turns on logging for
2299 # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2300 # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2301 if user_settings.VERBOSE:
2302 loglevel = logging.DEBUG
2303 else:
2304 loglevel = logging.WARNING
2305
2306 # Pick the log file name on the host.
2307 if user_settings.CLEAN:
2308 user_settings.LOGFILENAME = "/private/logLocal.txt"
2309 else:
2310 user_settings.LOGFILENAME = "/private/logRemote.txt"
2311
2312 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2313 if not user_settings.MATHJAX:
2314 user_settings.DIR_TO_SKIP += "|mathjax"
2315 else:
2316 print(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files... ", end='', flush=True)
2317 print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ", end='', flush=True)
2318 logging.debug(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files.")
2319 logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections...")
2320
2321 logging.basicConfig(
2322 level=loglevel,
2323 format='%(asctime)s %(levelname)-8s %(message)s',
2324 datefmt='%a, %d %b %Y %H:%M:%S',
2325 filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2326 filemode='w')
2327
2328 logging.debug("********** Begin logging")
2329
2330 # ---------------------------------------------------------------------
2331 # Scan the local website, finding out all files and directories.
2332 # ---------------------------------------------------------------------
2333
2334 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2335 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2336 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2337
2338 local = LocalWebSite(user_settings)
2339 local.scan()
2340
2341 # ---------------------------------------------------------------------
2342 # Clean up local website.
2343 # ---------------------------------------------------------------------
2344
2345 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2346 print("Cleaning local web site... ", end='', flush=True)
2347 logging.debug("========================== Cleaning the local web site")
2348 local.clean()
2349
2350 # We are done with the first scan of the local web site and will dispose of it.
2351 local.finish()
2352 del local
2353
2354 # ---------------------------------------------------------------------
2355 # Rescan the local website since there will be changes to source
2356 # files from the clean up stage.
2357 # ---------------------------------------------------------------------
2358
2359 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2360 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2361
2362 local = LocalWebSite(user_settings)
2363
2364 local.scan()
2365
2366 # ---------------------------------------------------------------------
2367 # List all the local directories and files and their sizes.
2368 # ---------------------------------------------------------------------
2369
2370 # Local website directories.
2371 local_directory_list = local.directories
2372 logging.debug("********** List of all the Local Directories")
2373 for d in local_directory_list:
2374 logging.debug(f"\t {d:s}")
2375
2376 # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2377 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2378 total_number_of_files = len( local_files_name_size_pairs )
2379 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2380 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2381
2382 # Local website filenames only, and their dates and times.
2383 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2384 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2385 for file_datetime_pair in local_file_datetime_pairs:
2386 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2387
2388 # Total number of bytes in the local files.
2389 total_number_of_bytes = 0
2390 for file_size_pair in local_files_name_size_pairs:
2391 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2392 total_number_of_bytes += file_size_pair[1]
2393 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2394
2395 local.finish()
2396
2397 if user_settings.CLEAN:
2398 logging.debug("========================== Done with local file and directory cleanup...")
2399 del local
2400 print("...done!", flush=True)
2401 return
2402
2403 # ---------------------------------------------------------------------
2404 # Scan the remote hosted web site.
2405 # ---------------------------------------------------------------------
2406
2407 print("Scanning remote web site...", end='', flush=True)
2408 logging.debug("========================== Scanning the remote web site...")
2409
2410 # Pick which website to update.
2411 logging.debug("Connecting to primary remote site.")
2412 remote = RemoteWebSite(user_settings)
2413 remote.scan()
2414 remote.finish()
2415
2416 # ---------------------------------------------------------------------
2417 # List all the remote server directories and files and their sizes.
2418 # ---------------------------------------------------------------------
2419
2420 remote_directory_list = remote.directories
2421 logging.debug("********** Remote Directories")
2422 for d in remote_directory_list:
2423 logging.debug(f"\t {d:s}")
2424
2425 # Local website filenames only, and their sizes in bytes.
2426 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2427 total_number_of_files = len( remote_files_name_size_list )
2428 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2429 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2430 total_number_of_bytes = 0
2431 for file_size in remote_files_name_size_list:
2432 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2433 total_number_of_bytes += file_size[1]
2434 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2435
2436 # ---------------------------------------------------------------------
2437 # Synchronize the local and remote web sites.
2438 # ---------------------------------------------------------------------
2439
2440 print("Synchronizing remote and local web sites...", end='', flush=True)
2441 logging.debug("========================= Synchronizing remote and local web sites...")
2442
2443 # Primary website.
2444 logging.debug("Connecting to primary remote site for synchronization.")
2445 sync = UpdateWeb(user_settings,
2446 local.directories,
2447 local.files,
2448 remote.directories,
2449 remote.files)
2450
2451 sync.update()
2452 sync.finish()
2453
2454 del sync
2455 del remote
2456 del local
2457 print("...done!", flush=True)
2458
2459 except UpdateWebException as detail:
2460 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2461
2462 except RecursionError as detail:
2463 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2464
2465if __name__ == '__main__':
2466 """Python executes all code in this file. Finally, we come here.
2467
2468 * If we are executing this file as a standalone Python script,
2469 the name of the current module is set to __main__ and thus we'll call the main() function.
2470
2471 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2472
2473 import updateweb
2474 updateweb.main(["--test"])"""
2475
2476 main()