1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.3 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import subprocess
81import shutil
82from pathlib import Path
83
84# Regular expressions
85import re
86
87# FTP stuff
88import ftplib
89
90# Date and time
91import time
92import stat
93import datetime
94
95# Logging
96import logging
97
98# Unit testing
99import unittest
100
101# Enumerated types (v3.4)
102from enum import Enum
103from typing import List, Any
104
105# YAML configuration files (a superset of JSON!)
106import yaml
107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
108try:
109 from yaml import CLoader as Loader
110except ImportError:
111 from yaml import Loader
112
113# Python syntax highlighter. See https://pygments.org
114from pygments import highlight
115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
117from pygments.formatters import HtmlFormatter
118
119
120# ----------------------------------------------------------------------------
121# Custom Top Level Exceptions.
122# ----------------------------------------------------------------------------
123
124class UpdateWebException(Exception):
125 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
126 Derive from Exception as recommended by Python manual"""
127 pass
128
129# ----------------------------------------------------------------------------
130# User settings.
131# ----------------------------------------------------------------------------
132
133class TreeWalkSettings(Enum):
134 """Enum types for how to walk the directory tree."""
135 BREADTH_FIRST_SEARCH = 1
136 DEPTH_FIRST_SEARCH = 2
137
138class FileType(Enum):
139 """'Enum' types for properties of directories and files."""
140 DIRECTORY = 0
141 FILE = 1
142 ON_LOCAL_ONLY = 2
143 ON_REMOTE_ONLY = 3
144 ON_BOTH_LOCAL_AND_REMOTE = 4
145
146class UserSettings:
147 """Megatons of user selectable settings."""
148 # Logging control.
149 LOGFILENAME = ""
150 VERBOSE = False # Verbose mode. Prints out everything.
151 CLEAN = False # Clean the local website only.
152 UNITTEST = False # Run a unit test of a function.
153 MATHJAX = False # Process and upload MathJax files to server.
154
155 # When diving into the MathJax directory, web walking the deep directories
156 # may exceed Python's default recursion limit of 1000.
157 RECURSION_DEPTH = 5000
158 sys.setrecursionlimit(RECURSION_DEPTH)
159
160 # Fields in the file information (file_info) structure.
161 # For example, file_info =
162 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
163 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
164 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
165 # 4675] -- File size in bytes.
166 FILE_NAME = 0
167 FILE_TYPE = 1
168 FILE_DATE_TIME = 2
169 FILE_SIZE = 3
170
171 # Server settings.
172 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
173 SERVER_NAME = None
174 USER_NAME = None
175 PASSWORD_NAME = None
176 FTP_ROOT_NAME = None
177 FILE_SIZE_LIMIT_NAME = None
178
179 # Map month names onto numbers.
180 monthToNumber = {
181 'Jan': 1,
182 'Feb': 2,
183 'Mar': 3,
184 'Apr': 4,
185 'May': 5,
186 'Jun': 6,
187 'Jul': 7,
188 'Aug': 8,
189 'Sep': 9,
190 'Oct': 10,
191 'Nov': 11,
192 'Dec': 12}
193
194 # List of directories to skip over when processing or uploading the web page.
195 # Some are private but most are dir of temporary files.
196 # They will be listed as WARNING in the log.
197 # Examples:
198 # My private admin settings directory.
199 # Git or SVN local admin directories.
200 # Compile build directories fromXCode.
201 # PyCharm build directories.
202 # Python cache directories.
203 # Jupyter checkpoint directories.
204 # XCode temporary file crap.
205 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
206
207 # List of files to skip when processing or uploading to the web page.
208 # They will be listed as WARNING in the log.
209 # Examples:
210 # MathJax yml file.
211 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
212 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
213
214 # Suffixes for temporary files which will be deleted during the cleanup
215 # phase.
216 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
217 \. # Match the dot in the file name.
218 # Now begin matching the file name suffix.
219 # (?: non-capturing match for the regex inside the parentheses,
220 # i.e. matching string cannot be retrieved later.
221 # Now match any of the following file extensions:
222 (?: o | obj | lib | # Object files generated by C, C++, etc compilers
223 pyc | # Object file generated by the Python compiler
224 ilk | pdb | sup | # Temp files from VC++ compiler
225 idb | ncb | opt | plg | # Temp files from VC++ compiler
226 sbr | bsc | map | bce | # Temp files from VC++ compiler
227 res | aps | dep | db | # Temp files from VC++ compiler
228 jbf | # Paintshop Pro
229 class | jar | # Java compiler
230 fas | # CLISP compiler
231 swp | swo | # Vim editor
232 toc | aux | # TeX auxilliary files (not .synctex.gz or .log)
233 DS_Store | _\.DS_Store | # macOS finder folder settings.
234 _\.Trashes | # macOS recycle bin
235 gdb_history) # GDB history
236 $ # Now we should see only the end of line.
237 """
238
239 # Special case: Vim temporary files contain a twiddle anywhere in the
240 # name.
241 VIM_TEMP_FILE_EXT = "~"
242
243 # Suffixes for temporary directories which should be deleted during the
244 # cleanup phase.
245 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
246 (?: Debug | Release | # C++ compiler
247 ipch | \.vs | # Temp directories from VC++ compiler
248 \.Trashes | \.Trash) # macOS recycle bin
249 $
250 """
251
252 # File extension for an internally created temporary file.
253 TEMP_FILE_EXT = ".new"
254
255 # Identify source file types.
256 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
257 (\. # Match the filename suffix after the .
258 (?: html | htm | # HTML hypertext
259 css) # CSS style sheet
260 $) # End of line.
261 """
262
263 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
264 (?: makefile$ | # Any file called makefile is a source file.
265 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
266 .vimrc$ | # Vim script
267 (.bashrc$ | # Bash configuration files.
268 .bash_profile$ |
269 .bash_logout$)
270 |
271 (.gitignore$ | # Git configuration files.
272 .gitignore_global$ |
273 .gitconfig$)
274 |
275 (\. # Match the filename suffix after the .
276 # Now match any of these suffixes:
277 (?:
278 c | cpp | h | hpp | # C++ and C
279 js | # Javascript
280 py | # Python
281 lsp | # LISP
282 ipynb | # Jupyter notebook
283 m | # MATLAB
284 FOR | for | f | # FORTRAN
285 yaml | # YAML = JSON superset
286 tex | # LaTeX
287 txt | dat | # Data files
288 sh) # Bash
289 $) # End of line.
290 )
291 """
292
293 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
294 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
295 (?: ^life\.html$ | # We want a listing of this particular HTML file.
296 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
297 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
298 ^StyleSheet\.css$ ) # I want to list my style sheet.
299 """
300
301 # Files for which we want to generate a syntax highlighted source code listing.
302 # Uses an f-string combined with a raw-string.
303 FILE_TO_HIGHLIGHT_PATTERN = fr"""
304 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
305 {SOURCE_FILE_PATTERN} )
306 """
307
308 # Update my email address.
309 # This is tricky: Prevent matching and updating the name within in this
310 # Python source file by using the character class brackets.
311 OLD_EMAIL_ADDRESS = r"""
312 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
313 """
314 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
315
316 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
317 # Read patterns and strings from the updateweb.yaml file.
318 STRING_REPLACEMENT_LIST = []
319 # Pairs of test strings and their correct match/replacements.
320 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
321
322 # Match a copyright line like this:
323 # Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved.
324 # Extract the copyright symbol which can be ascii (C) or HTML © and extract the old year.
325 TWO_DIGIT_YEAR_FORMAT = "%02d"
326 COPYRIGHT_LINE = r"""
327 Copyright # Copyright.
328 \s+ # One or more spaces.
329 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
330 \D+ # Any non-digits.
331 (?P<old_year>[0-9]+) # Match and extract the old copyright year, place it into variable 'old_year'
332 - # hyphen
333 ([0-9]+) # New copyright year.
334 \s+ # One or more spaces.
335 by\s+Sean\sErik # Start of my name. This way we don't rewrite somebody else's copyright notice.
336 """
337
338 # Match a line containing the words,
339 # last updated YY
340 # and extract the two digit year YY.
341 LAST_UPDATED_LINE = r"""
342 last\s+ # Match the words "last updated"
343 updated\s+
344 \d+ # Day number
345 \s+ # One or more blanks or tab(
346 [A-Za-z]+ # Month
347 \s+ # One or more blanks or tabs
348 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
349 """
350
351 # Web server root directory.
352 DEFAULT_ROOT_DIR = "/"
353
354 # The ftp listing occasionally shows a date newer than the actual date.
355 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
356 # But if the remote file time is much newer, it might be an old file with a bad date/time.
357 # Upload the file to be safe.
358 # How to see the time differences from the log if they are large:
359 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
360 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
361 # How to see the time differences from the log if they are small and we wait and NOT upload:
362 # egrep -o "Remote file.*is newer.*days" logRemote.txt
363 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
364 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
365 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
366
367 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
368 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
369
370 # An ftp list command line should be at least this many chars, or we'll
371 # suspect and error.
372 MIN_FTP_LINE_LENGTH = 7
373
374 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
375 # ftp listings are generally similar to UNIX ls -l listings.
376 #
377 # Some examples:
378 #
379 # (1) Freeservers ftp listing,
380 #
381 # 0 1 2 3 4 5 6 7 8
382 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
383 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
384 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
385 #
386 # (2) atspace ftp listing,
387 #
388 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
389 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
390 #
391 FTP_LISTING = r"""
392 [drwx-]+ # Unix type file mode.
393 \s+ # One or more blanks or tabs.
394 \d+ # Number of links.
395 \s+
396 \w+ # Owner.
397 \s+
398 \w+ # Group.
399 \s+
400 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
401 \s+
402 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
403 \s+
404 (?P<day> \d+) # Day modified, placed into the variable 'day'.
405 \s+
406 (
407 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
408 :
409 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
410 |
411 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
412 # extract the year instead.
413 )
414 \s+
415 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
416 # and funny characters. We must escape some of
417 # these characters with a backslash, \.
418 """
419
420 # HTML header up to the style sheet.
421 BASIC_HTML_BEGIN = \
422 """
423 <!DOCTYPE html>
424 <html lang="en-US"> <!-- Set language of this page to USA English. -->
425
426 <head>
427 <!-- This page uses Unicode characters. -->
428 <meta charset="utf-8">
429
430 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
431 <meta name="viewport" content="width=device-width, initial-scale=1">
432
433 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
434 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
435
436 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
437 <meta name="description" content="Syntax Colored Source Code Listing">
438
439 <!-- Some content management software uses the author's name. -->
440 <meta name="author" content="Sean Erik O'Connor">
441
442 <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor. All Rights Reserved.">
443
444 <!-- Begin style sheet insertion -->
445 <style>
446 /* Default settings for all my main web pages. */
447 body
448 {
449 /* A wide sans-serif font is more readable on the web. */
450 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
451
452 /* Set the body font size a little smaller than the user's default browser setting. */
453 font-size: 0.8em ;
454
455 /* Black text is easier to read. */
456 color: black ;
457
458 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
459 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
460 line-height: 1.7 ;
461 }
462
463 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
464 """
465
466 # After the style sheet and up to the start of the article in the body.
467 BASIC_HTML_MIDDLE = \
468 """
469 </style>
470 </head>
471
472 <body>
473 <article class="content">
474 """
475
476 # After the source code listing, finish the article, body and html document.
477 BASIC_HTML_END = \
478 """
479 </article>
480 </body>
481
482 </html>
483 """
484
485 def __init__(self):
486 """Set up the user settings."""
487
488 self.local_root_dir = ""
489
490 # Import the user settings from the parameter file.
491 self.get_local_root_dir()
492 self.get_server_settings()
493
494 self.precompile_regular_expressions()
495
496 def get_server_settings(self):
497 """
498 Read web account private settings from a secret offline parameter file.
499 These also hold patterns to match and replace in all of our source pages.
500 """
501
502 # Private file which contains my account settings.
503 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
504 # Recommended by
505 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
506 try:
507 stream = open(settings_file_name, "r")
508 except OSError as detail:
509 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
510 # Rethrow the exception higher.
511 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
512 # Read all the YAML documents in the file.
513 yaml_contents = yaml.load_all(stream, Loader)
514 yaml_document_list: list[Any] = []
515 for yaml_doc in yaml_contents:
516 yaml_document_list.append(yaml_doc)
517 num_yaml_docs = len(yaml_document_list)
518 if num_yaml_docs != 2:
519 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
520 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
521
522 # Load all the server settings.
523 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
524 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
525 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
526 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
527 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
528
529 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
530 self.STRING_REPLACEMENT_LIST = []
531 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
532 for pat_rep in pat_rep_yaml_list:
533 # Fetch the regular expression and compile it for speed.
534 verbose_regex = pat_rep['pattern']
535 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
536 # Since we use raw strings, we need to strip off leading and trailing whitespace.
537 replacement_string = pat_rep['replacement_string'].strip().lstrip()
538 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
539
540 # Load the test and verify strings.
541 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
542 for test_verify_string in test_verify_strings_list:
543 test_string = test_verify_string['test_string'].strip().lstrip()
544 verify_string = test_verify_string['verify_string'].strip().lstrip()
545 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
546
547 print(" ...done!", flush=True)
548 return
549
550 def get_local_root_dir(self):
551 """Get the local website root directory on this platform."""
552
553 # Each platform has a definite directory for the web page.
554 local_web_dir_path = "/Desktop/Sean/WebSite"
555
556 if sys.platform.startswith('darwin'):
557 self.local_root_dir = str(Path.home()) + local_web_dir_path
558 # My Cyperpower PC running Ubuntu Linux.
559 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
560 self.local_root_dir = str(Path.home()) + local_web_dir_path
561 return
562
563 def precompile_regular_expressions(self):
564 """For speed precompile the regular expression search patterns."""
565 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
566 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
567 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
568 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
569 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
570 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
571 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
572 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
573 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
574
575# ----------------------------------------------------------------------------
576# Unit test individual functions.
577# ----------------------------------------------------------------------------
578
579class UnitTest(unittest.TestCase):
580 """Initialize the UnitTest class."""
581 def setUp(self):
582 self.user_settings = UserSettings()
583 self.user_settings.get_local_root_dir()
584
585 def tearDown(self):
586 """Clean up the UnitTest class."""
587 self.user_settings = None
588
589 def test_copyright_updating(self):
590 """Test copyright line updating to the current year."""
591 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
592 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
593 line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
594 pat = self.user_settings.COPYRIGHT_LINE
595 match = pat.search(line_before_update)
596
597 if match:
598 old_year = int(match.group('old_year'))
599 # Same as call to self.get_current_year():
600 current_year = int(time.gmtime()[0])
601 if old_year < current_year:
602 # We matched and extracted the old copyright symbol into the variable
603 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
604 # We now insert it back by placing the special syntax
605 # \g<symbol> into the replacement string.
606 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
607 line_after_update_computed = pat.sub(new_copyright, line_before_update)
608 self.assertEqual(
609 line_after_update_actual,
610 line_after_update_computed,
611 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
612 else:
613 print( "old_year >= current_year" )
614 self.fail()
615 else:
616 print( "no match for copyright pattern" )
617 self.fail()
618
619 def test_extract_filename_from_ftp_listing(self):
620 """Test parsing an FTP listing."""
621 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
622 extracted_file_name = "allclasses-frame.html"
623 pat = self.user_settings.FTP_LISTING
624 match = pat.search(ftp_line)
625 if match:
626 filename = match.group('filename')
627 self.assertEqual(
628 filename,
629 extracted_file_name,
630 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
631 else:
632 self.fail()
633
634 def test_get_file_time_and_date(self):
635 """Test getting a file time and date."""
636 # Point to an old file.
637 file_name = "./Images/home.png"
638 full_file_name = self.user_settings.local_root_dir + '/' + file_name
639 # Get the UTC time.
640 file_epoch_time = os.path.getmtime(full_file_name)
641 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
642 # Create a datetime object for the file.
643 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
644 # Check if the file time matches what we would see if we did ls -l <file_name>
645 computed = f"file {file_name:s} datetime {d.ctime():s}"
646 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
647 self.assertEqual(computed, actual)
648
649 def test_set_file_time_and_date(self):
650 """Test setting a file time and date."""
651 file_name = "./Images/home.png"
652 full_file_name = self.user_settings.local_root_dir + '/' + file_name
653 # Create a temporary file in the same directory.
654 temp_file_name = "temporal.tmp"
655 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
656 try:
657 with open(full_temp_file_name, 'w') as fp:
658 fp.write("The End of Eternity")
659 except OSError as detail:
660 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
661 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
662 # Get the old file time. Set the temporary file to the same time.
663 file_stat = os.stat(full_file_name)
664 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
665 # What is the temporary file's time now?
666 file_epoch_time = os.path.getmtime(full_temp_file_name)
667 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
668 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
669 # Is the temporary file time set properly?
670 computed = f"file {file_name:s} datetime {d.ctime():s}"
671 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
672 self.assertEqual(computed, actual)
673 os.remove(full_temp_file_name)
674
675 def test_difference_of_time_and_date(self):
676 """Test a date difference calculation."""
677 file_name = "./Images/home.png"
678 full_file_name = self.user_settings.local_root_dir + '/' + file_name
679 # Get the UTC time.
680 file_epoch_time = os.path.getmtime(full_file_name)
681 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
682 # Create a datetime object for the file.
683 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
684 # Slightly change the date and time by adding 1 minute.
685 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
686 time_delta = d2 - d
687 seconds_different = time_delta.total_seconds()
688 minutes_different = seconds_different / 60.0
689 hours_different = minutes_different / 60.0
690 days_different = hours_different / 24.0
691 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
692 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
693 self.assertEqual(computed, actual)
694
695 def test_pattern_match_dir_to_skip(self):
696 """Test if skipping certain named directories is recoginizing the dir names."""
697 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
698 pat = re.compile(self.user_settings.DIR_TO_SKIP)
699 if pat.search(dir_skip):
700 self.assertTrue(True)
701 else:
702 self.assertTrue(False)
703
704 def test_file_name_to_syntax_highlight(self):
705 """Test if syntax highlighting recognizes file names to highlight."""
706 file_name1 = "Computer/hello.lsp"
707 file_name2 = "Computer/life.html"
708 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
709 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
710 self.assertTrue(True)
711 else:
712 self.assertTrue(False)
713
714 def test_user_settings(self):
715 """Test whether user settings are correctly initialized."""
716 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
717 actual = "File size limit = 50000 K"
718 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
719
720 def test_check_replace_substring(self,debug=True):
721 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
722 For troubleshooting, turn on debug.
723 """
724 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
725 # Iterate over all test strings.
726 for pair in test_verify_pairs:
727 [test_string, verify_string] = pair
728 if debug:
729 print( f">>>>>>> next test string = {test_string}")
730 print( f">>>>>>> next verify string = {verify_string}")
731 # Iterate over all patterns and replacements.
732 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
733 [pat, rep_string] = match_replace_tuple
734 print( f"\t-------> next pattern = {pat}")
735 print( f"\t-------> next replacement = {rep_string}")
736 match = pat.search(test_string)
737 # The pattern match succeeds.
738 if match:
739 try:
740 sub = pat.sub(rep_string, test_string)
741 # String replacement succeeds for this pattern/replace pair iteration.
742 if debug:
743 print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
744 test_string = sub
745 except IndexError as detail:
746 print(f"\t\t.......> Caught an exception: {str(detail):s}. Replacement failed.")
747 if debug:
748 self.assertTrue(False)
749 elif debug:
750 print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
751 # No match, so go on to the next pattern and don't change test_string.
752 # Done with all pattern/replace on test string.
753 # Check this test string in the list.
754 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
755 if debug:
756 print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
757
758# ----------------------------------------------------------------------------
759# Command line options.
760# ----------------------------------------------------------------------------
761
762class CommandLineSettings(object):
763 """Get the command line options."""
764
765 def __init__(self, user_settings, raw_args=None):
766 """Get command line options"""
767 command_line_parser = argparse.ArgumentParser(
768 description="updateweb options")
769
770 # Log all changes, not just warnings and errors.
771 command_line_parser.add_argument(
772 "-v",
773 "--verbose",
774 help="Turn on verbose mode to log everything",
775 action="store_true")
776
777 # Clean up the local website only.
778 command_line_parser.add_argument(
779 "-c",
780 "--clean",
781 help="Do a cleanup on the local web site only.",
782 action="store_true")
783
784 # Clean up the local website only.
785 command_line_parser.add_argument(
786 "-m",
787 "--mathjax",
788 help="""ALSO upload mathjax directory.\
789 Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
790 You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
791 NOTE: If you did reset your server and delete all files, run the command find . -name '*.*' -exec touch {} \\; from the web page root directory.\
792 Also run find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
793 action="store_true")
794
795 # Run unit tests only.
796 command_line_parser.add_argument("-t", "--test",
797 help="Run unit tests.",
798 action="store_true")
799
800 args = command_line_parser.parse_args(raw_args)
801
802 if args.verbose:
803 user_settings.VERBOSE = True
804 if args.clean:
805 user_settings.CLEAN = True
806 if args.test:
807 user_settings.UNITTEST = True
808 if args.mathjax:
809 user_settings.MATHJAX = True
810
811# ----------------------------------------------------------------------------
812# Base class which describes my web site overall.
813# ----------------------------------------------------------------------------
814
815class WebSite(object):
816 """
817 Abstract class used for analyzing both local and remote (ftp server) websites.
818 Contains the web-walking functions which traverse the directory structures and files.
819 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
820 Child classes may define additional functions which only they need.
821 """
822
823 def __init__(self, settings):
824 """Set up root directories"""
825
826 # Import the user settings.
827 self.user_settings = settings
828
829 # Queue keeps track of directories not yet processed.
830 self.queue = []
831
832 # List of all directories traversed.
833 self.directories = []
834
835 # List of files traversed, with file information.
836 self.files = []
837
838 # Find out the root directory and go there.
839 self.root_dir = self.get_root_dir()
840 self.go_to_root_dir(self.root_dir)
841
842 # This is a Python decorator which says get_current_year is a class function. And so there is no self first argument, and you can call it without creating an
843 # instance of this class. Call it from anywhere, inside or outside the class, using WebSite.get_current_year(). You could just create a global function instead.)
844 @staticmethod
845 def get_current_year():
846 """Get the current year."""
847 return int(time.gmtime()[0])
848
849 @staticmethod
850 def get_current_two_digit_year():
851 """Get the last two digits of the current year."""
852 return WebSite.get_current_year() % 100
853
854 @staticmethod
855 def is_file_info_type(file_info):
856 """Check if we have a file information structure or merely a simple file name."""
857 try:
858 if isinstance(file_info, list):
859 return True
860 elif isinstance(file_info, str):
861 return False
862 else:
863 logging.error("is_file_info_type found a bad type. Aborting...")
864 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
865 except TypeError as detail:
866 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
867 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
868
869 def get_root_dir(self):
870 """Subclass: Put code here to get the root directory"""
871 return ""
872
873 def go_to_root_dir(self, root_dir):
874 """Subclass: Put code here to go to the root directory"""
875 pass # Pythons's do-nothing statement.
876
877 def one_level_down(self, d):
878 """Subclass: Fill in with a method which returns a list of the
879 directories and files immediately beneath dir"""
880 return [], []
881
882 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
883 """Walk a directory in either depth first or breadth first order. BFS is the default."""
884
885 # Get all subfiles and subdirectories off this node.
886 subdirectories, subfiles = self.one_level_down(d)
887
888 # Add all the subfiles in order.
889 for f in subfiles:
890
891 name = self.strip_root(f)
892 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
893
894 # Some files are private so skip them from consideration.
895 pat = re.compile(self.user_settings.FILE_TO_SKIP)
896
897 if pat.search(name[self.user_settings.FILE_NAME]):
898 logging.warning(
899 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
900 # Don't upload the log file due to file locking problems.
901 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
902 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
903 # File size limit on some servers.
904 else:
905 self.files.append(name)
906
907 # Queue up the subdirectories.
908 for d in subdirectories:
909 # Some directories are private such as .git or just temporary file
910 # caches so skip them from consideration.
911 pat = re.compile(self.user_settings.DIR_TO_SKIP)
912 if pat.search(d):
913 logging.warning(f"Webwalking: Skipping private dir {d:s}")
914 else:
915 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
916 self.queue.append(d)
917
918 # Search through the directories.
919 while len(self.queue) > 0:
920 # For breadth first search, remove from beginning of queue.
921 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
922 d = self.queue.pop(0)
923
924 # For depth first search, remove from end of queue.
925 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
926 d = self.queue.pop()
927 else:
928 d = self.queue.pop(0)
929
930 name = self.strip_root(d)
931 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
932 self.directories.append(name)
933
934 self.walk(d)
935
936 def strip_root(self, file_info):
937 """Return a path, but strip off the root directory"""
938
939 root = self.root_dir
940
941 # Extract the file name.
942 if self.is_file_info_type(file_info):
943 name = file_info[self.user_settings.FILE_NAME]
944 else:
945 name = file_info
946
947 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
948 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
949 # Art/foo.txt
950 lenroot = len(root)
951 if root == self.user_settings.DEFAULT_ROOT_DIR:
952 pass
953 else:
954 lenroot = lenroot + 1
955
956 stripped_path = name[lenroot:]
957
958 if self.is_file_info_type(file_info):
959 # Update the file name only.
960 return [stripped_path,
961 file_info[self.user_settings.FILE_TYPE],
962 file_info[self.user_settings.FILE_DATE_TIME],
963 file_info[self.user_settings.FILE_SIZE]]
964 else:
965 return stripped_path
966
967 def append_root_dir(self, root_dir, name):
968 """Append the root directory to a path"""
969
970 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
971 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
972 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
973 return root_dir + name
974 else:
975 return root_dir + "/" + name
976
977 def scan(self):
978 """Scan the directory tree recursively from the root"""
979 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
980 self.walk(self.root_dir)
981
982 def modtime(self, f):
983 """Subclass: Get file modification time"""
984 pass
985
986 def finish(self):
987 """Quit web site"""
988 logging.debug(f"Finished with WebSite object of class {type(self)}")
989 pass
990
991# ----------------------------------------------------------------------------
992# Subclass which knows about the local web site on disk.
993# ----------------------------------------------------------------------------
994
995class LocalWebSite(WebSite):
996 """Walk the local web directory on local disk down from the root.
997 Clean up temporary files and do other cleanup work."""
998
999 def __init__(self, settings):
1000 """Go to web page root and list all files and directories."""
1001
1002 # Initialize the parent class.
1003 WebSite.__init__(self, settings)
1004
1005 self.root_dir = self.get_root_dir()
1006 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1007
1008 def get_root_dir(self):
1009 """Get the name of the root directory"""
1010 return self.user_settings.local_root_dir
1011
1012 def go_to_root_dir(self, root_dir):
1013 """Go to the root directory"""
1014
1015 # Go to the root directory.
1016 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1017 os.chdir(root_dir)
1018
1019 # Read it back.
1020 self.root_dir = os.getcwd()
1021 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1022
1023 def one_level_down(self, d):
1024 """List all files and subdirectories in the current directory, dir. For files, collect file info
1025 such as time, date and size."""
1026
1027 directories = []
1028 files = []
1029
1030 # Change to current directory.
1031 os.chdir(d)
1032
1033 # List all subdirectories and files.
1034 dir_list = os.listdir(d)
1035
1036 if dir_list:
1037 for line in dir_list:
1038 # Add the full path prefix from the root.
1039 name = self.append_root_dir(d, line)
1040 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1041
1042 # Is it a directory or a file?
1043 if os.path.isdir(name):
1044 directories.append(name)
1045 elif os.path.isfile(name):
1046 # First assemble the file information of name, time/date and size into a list.
1047 # Can index it like an array. For example,
1048 # file_info =
1049 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1050 # 1, -- Enum type FileType.FILE = 1.
1051 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1052 # 4675] -- File size in bytes.
1053 file_info = [name,
1054 FileType.FILE,
1055 self.get_file_date_time(name),
1056 self.get_file_size(name)]
1057 files.append(file_info)
1058
1059 # Sort the names into order.
1060 if directories:
1061 directories.sort()
1062 if files:
1063 files.sort()
1064
1065 return directories, files
1066
1067 @staticmethod
1068 def get_file_date_time(file_name):
1069 """Get a local file time and date in UTC."""
1070
1071 file_epoch_time = os.path.getmtime(file_name)
1072 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1073 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1074 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1075 return d
1076
1077 @staticmethod
1078 def get_file_size(file_name):
1079 """Get file size in bytes."""
1080 return os.path.getsize(file_name)
1081
1082 @staticmethod
1083 def clean_up_temp_file(temp_file_name, file_name, changed):
1084 """Remove the original file, rename the temporary file name to the original name.
1085 If there are no changes, just remove the temporary file.
1086 """
1087
1088 if changed:
1089 # Remove the old file now that we have the rewritten file.
1090 try:
1091 os.remove(file_name)
1092 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1093 except OSError as detail:
1094 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1095
1096 # Rename the new file to the old file name.
1097 try:
1098 os.rename(temp_file_name, file_name)
1099 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1100 except OSError as detail:
1101 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1102 else:
1103 # No changes? Remove the temporary file.
1104 try:
1105 os.remove(temp_file_name)
1106 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1107 except OSError as detail:
1108 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1109 return
1110
1111 @staticmethod
1112 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1113 """
1114 Process each line of a file with a list of functions. Create a new temporary file.
1115
1116 The default list is None which means make an exact copy.
1117 """
1118
1119 # Assume no changes.
1120 changed = False
1121
1122 # Open both input and output files for processing. Check if we cannot do it.
1123 fin = None
1124 try:
1125 fin = open(in_file_name, "r")
1126 except IOError as detail:
1127 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1128 if fin is not None:
1129 fin.close()
1130 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1131 fout = None
1132 try:
1133 fout = open(out_file_name, "w")
1134 except IOError as detail:
1135 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1136 if fout is not None:
1137 fout.close()
1138 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1139
1140 # Read each line of the file, aborting if there is a read error.
1141 try:
1142 line = fin.readline()
1143
1144 # Rewrite the next line of the file using all the rewrite functions.
1145 while line:
1146 original_line = line
1147 # If we have one or more rewrite functions...
1148 if process_line_function_list is not None:
1149 # ...apply each rewrite functions to the line, one after the other in order.
1150 for processLineFunction in process_line_function_list:
1151 if processLineFunction is not None:
1152 line = processLineFunction(line)
1153
1154 if original_line != line:
1155 logging.debug(f"Rewrote the line: >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1156 changed = True
1157
1158 fout.write(line)
1159
1160 line = fin.readline()
1161
1162 fin.close()
1163 fout.close()
1164 except IOError as detail:
1165 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1166 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1167
1168 if changed:
1169 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1170 f"Changes are in temporary copy {out_file_name:s}")
1171
1172 # Return True if any lines were changed.
1173 return changed
1174
1175 def clean(self):
1176 """Scan through all directories and files in the local on disk website and clean them up."""
1177
1178 num_source_files_changed = 0
1179 num_source_files_syntax_highlighted = 0
1180
1181 logging.debug("Cleaning up the local web page.")
1182
1183 if self.directories is None or self.files is None:
1184 logging.error("Web site has no directories or files. Aborting...")
1185 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1186
1187 for d in self.directories:
1188
1189 if self.is_temp_dir(d):
1190 # Add the full path prefix from the root.
1191 name = self.append_root_dir(self.get_root_dir(), d)
1192 try:
1193 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1194 shutil.rmtree(name)
1195 except OSError as detail:
1196 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1197
1198 for f in self.files:
1199 # Add the full path prefix from the root.
1200 full_file_name = self.append_root_dir(
1201 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1202
1203 # Remove all temporary files.
1204 if self.is_temp_file(f):
1205 try:
1206 logging.debug(f"Removing temp file {full_file_name:s}")
1207 os.remove(full_file_name)
1208 except OSError as detail:
1209 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1210
1211 # Update source code files.
1212 if self.is_source_or_hypertext_file(f):
1213 changed = self.rewrite_source_file(full_file_name)
1214 if changed:
1215 num_source_files_changed += 1
1216 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1217
1218 # Generate a syntax highlighted code listing.
1219 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1220 if self.is_file_to_syntax_highlight(f):
1221 # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1222 syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1223 if syntax_highlighted_file_name is not None:
1224 logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1225 else:
1226 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1227 num_source_files_syntax_highlighted += 1
1228
1229 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1230 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1231
1232 def is_temp_file(self, file_info):
1233 """Identify a file name as a temporary file"""
1234
1235 file_name = file_info[self.user_settings.FILE_NAME]
1236
1237 # Suffixes and names for temporary files be deleted.
1238 pat = self.user_settings.TEMP_FILE_SUFFIXES
1239 match = pat.search(file_name)
1240 # Remove any files containing twiddles anywhere in the name.
1241 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1242 return True
1243
1244 return False
1245
1246 def is_temp_dir(self, dir_name):
1247 """Identify a name as a temporary directory."""
1248
1249 p = self.user_settings.TEMP_DIR_SUFFIX
1250 return p.search(dir_name)
1251
1252 def is_source_or_hypertext_file(self, file_info):
1253 """ Check if the file name is a source file or a hypertext file."""
1254
1255 file_name = file_info[self.user_settings.FILE_NAME]
1256 p1 = self.user_settings.SOURCE_FILE_PATTERN
1257 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1258 if p1.search(file_name) or p2.search(file_name):
1259 return True
1260 else:
1261 return False
1262
1263 def is_file_to_syntax_highlight(self, file_info):
1264 """Check if this file type should have a syntax highlighted source listing."""
1265
1266 # Take apart the file name.
1267 full_file_name = file_info[self.user_settings.FILE_NAME]
1268 file_name = Path(full_file_name).name
1269
1270 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1271 if p.search(file_name):
1272 return True
1273 else:
1274 return False
1275
1276 def rewrite_substring(self, line):
1277 """Rewrite a line containing a pattern of your choice"""
1278
1279 # Start with the original unchanged line.
1280 rewritten_line = line
1281
1282 # Do the replacements in order from first to last.
1283 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1284 # Get the next pattern match replacement string tuple.
1285 [pat, rep_string] = match_replace_tuple
1286 # Does it match? Then do string substitution, else leave the line unchanged.
1287 match = pat.search(rewritten_line)
1288 if match:
1289 # Now we have these cases:
1290 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1291 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1292 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1293 try:
1294 sub = pat.sub(rep_string, rewritten_line)
1295 rewritten_line = sub
1296 except IndexError as detail:
1297 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1298
1299 return rewritten_line
1300
1301 def rewrite_email_address_line(self, line):
1302 """Rewrite lines containing old email addresses."""
1303
1304 # Search for the old email address.
1305 pat = self.user_settings.OLD_EMAIL_ADDRESS
1306 match = pat.search(line)
1307
1308 # Replace the old address with my new email address.
1309 if match:
1310 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1311 sub = pat.sub(new_address, line)
1312 line = sub
1313
1314 return line
1315
1316 def rewrite_copyright_line(self, line):
1317 """Rewrite copyright lines if they are out of date."""
1318
1319 # Match the lines,
1320 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1321 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1322 # and pull out the old year and save it.
1323 pat = self.user_settings.COPYRIGHT_LINE
1324 match = pat.search(line)
1325
1326 # Found a match.
1327 if match:
1328 old_year = int(match.group('old_year'))
1329
1330 # Replace the old year with the current year.
1331 # We matched and extracted the old copyright symbol into the variable
1332 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1333 # We now insert it back by placing the special syntax \g<symbol>
1334 # into the replacement string.
1335 if old_year < WebSite.get_current_year():
1336 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1337 sub = pat.sub(new_copyright, line)
1338 line = sub
1339 return line
1340
1341 def rewrite_last_update_line(self, line):
1342 """Rewrite the Last Updated line if the year is out of date."""
1343
1344 # Match the last updated line and pull out the year.
1345 # last updated 01 Jan 25.
1346 p = self.user_settings.LAST_UPDATED_LINE
1347 m = p.search(line)
1348
1349 if m:
1350 last_update_year = int(m.group('year'))
1351
1352 # Convert to four digit years.
1353 if last_update_year > 90:
1354 last_update_year += 1900
1355 else:
1356 last_update_year += 2000
1357
1358 # If the year is old, rewrite to "01 Jan <current year>".
1359 if last_update_year < WebSite.get_current_year():
1360 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1361 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1362 line = sub
1363
1364 return line
1365
1366 def rewrite_source_file(self, file_name):
1367 """Rewrite copyright lines, last updated lines, etc."""
1368 changed = False
1369
1370 # Create a new temporary file name for the rewritten file.
1371 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1372
1373 # Apply changes to all lines of the temporary file. Apply change functions in
1374 # the sequence listed.
1375 if self.process_lines_of_file(file_name, temp_file_name,
1376 [self.rewrite_copyright_line,
1377 self.rewrite_last_update_line,
1378 self.rewrite_email_address_line,
1379 self.rewrite_substring]):
1380 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1381 changed = True
1382
1383 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1384 self.clean_up_temp_file(temp_file_name, file_name, changed)
1385
1386 return changed
1387
1388 @staticmethod
1389 def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1390 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1391 Keep the same date/time as the original file."""
1392
1393 # kwargs is a dictionary for key, value in kwargs.items():
1394 # for key, value in kwargs.items():
1395 # if key in kwargs:
1396 # print( f"kwargs:" )
1397 # print( f" key = |{key}|")
1398 # print( f" value = |{value}|" )
1399 dry_run_value = kwargs.get('dry_run')
1400 dry_run = False
1401 if dry_run_value is not None and dry_run_value is True:
1402 dry_run = True
1403
1404 # Take apart the file name.
1405 file_name_without_extension = Path(source_file_name).stem
1406 file_extension = Path(source_file_name).suffix
1407
1408 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1409 syntax_highlighted_file_name = f"{source_file_name}.html"
1410
1411 # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1412 if file_extension == ".ipynb":
1413 if dry_run:
1414 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1415 return None
1416 # Python manual recommends using the run() command instead of Popen(). See https://docs.python.org/3/library/subprocess.html#subprocess.run
1417 try:
1418 shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1419 # Throw an exception if we can't run the process.
1420 # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1421 # Since the shell command is a single string, use shell=True in the run() command.
1422 subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1423 except subprocess.CalledProcessError as detail:
1424 logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s} Aborting...")
1425 return None
1426 # Otherwise, use the Pygments syntax highlighter.
1427 else:
1428 # First choose the language lexer from the file name itself if there's no extension.
1429 # Dotted file names are treated as the entire file name.
1430 match file_name_without_extension:
1431 case "makefile":
1432 lexer = MakefileLexer()
1433 case ".bash_profile"|".bashrc"|".bash_logout":
1434 lexer = BashLexer()
1435 case ".vimrc":
1436 lexer = VimLexer()
1437 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1438 lexer = OutputLexer() # No formatting.
1439 case _:
1440 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1441 match file_extension:
1442 case ".html":
1443 lexer = HtmlLexer()
1444 case ".css":
1445 lexer = CssLexer()
1446 case ".js":
1447 lexer = JavascriptLexer()
1448 case ".sh":
1449 lexer = BashLexer()
1450 case ".py":
1451 lexer = PythonLexer()
1452 case ".c" | ".h":
1453 lexer = CLexer()
1454 case ".hpp" | ".cpp":
1455 lexer = CppLexer()
1456 case ".lsp":
1457 lexer = CommonLispLexer()
1458 case ".for" | ".FOR" | ".f":
1459 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1460 case ".txt" | ".dat": # Generic data file; no formatting.
1461 lexer = OutputLexer()
1462 case ".tex":
1463 lexer = TexLexer() # LaTeX, TeX, or related files.
1464 case ".m":
1465 lexer = MatlabLexer()
1466 case ".yaml":
1467 lexer = YamlLexer()
1468 case _:
1469 logging.error(f"Can't find a lexer for file {source_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1470 return None
1471
1472 # Read the source code file into a single string.
1473 try:
1474 with open(source_file_name, 'r') as fp:
1475 source_file_string = fp.read()
1476 except OSError as detail:
1477 logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1478
1479 # Top level Pygments function generates the HTML for the highlighted code.
1480 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1481
1482 # The style sheet is always the same for all languages.
1483 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1484
1485 # Write out the syntax colored file.
1486 if dry_run:
1487 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1488 return None
1489 else:
1490 try:
1491 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1492 with open(syntax_highlighted_file_name, 'w') as fp:
1493 fp.write(UserSettings.BASIC_HTML_BEGIN)
1494 fp.write(style_sheet)
1495 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1496 fp.write(highlighted_html_source_file_string)
1497 fp.write(UserSettings.BASIC_HTML_END)
1498 except OSError as detail:
1499 logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s} Aborting...")
1500 # ------- end Pygments syntax highlighter
1501
1502 # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1503 file_stat = os.stat(source_file_name)
1504 os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1505
1506 # Are the original source and the syntax highlighted code the same data and time?
1507 dates_and_times_source_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1508 dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1509 if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1510 logging.error(f"Source code and syntax highlighted source don't have the same times. source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1511 return None
1512
1513 logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1514 return syntax_highlighted_file_name
1515
1516# ----------------------------------------------------------------------------
1517# Subclass which knows about the remote web site.
1518# ----------------------------------------------------------------------------
1519
1520class RemoteWebSite(WebSite):
1521 """Walk the remote web directory on a web server down from the root.
1522 Use FTP commands:
1523 https://en.wikipedia.org/wiki/List_of_FTP_commands
1524 Use the Python ftp library:
1525 https://docs.python.org/3/library/ftplib.html
1526 """
1527
1528 def __init__(self, user_settings):
1529 """Connect to FTP server and list all files and directories."""
1530
1531 # Root directory of FTP server.
1532 self.root_dir = user_settings.FTP_ROOT_NAME
1533 logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1534
1535 # Connect to FTP server and log in.
1536 try:
1537 # Turn on for troubleshooting ftp on the remote server.
1538 # self.ftp.set_debuglevel( 2 )
1539 # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password = {user_settings.PASSWORD_NAME}\n")
1540 self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1541 self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1542 # Catch all exceptions with the parent class Exception: all built-in,
1543 # non-system-exiting exceptions are derived from this class.
1544 except Exception as detail:
1545 # Extract the string message from the exception class with str().
1546 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1547 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1548 else:
1549 logging.debug("Remote web site ftp login succeeded.")
1550
1551 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1552
1553 # Initialize the superclass.
1554 WebSite.__init__(self, user_settings)
1555
1556 def go_to_root_dir(self, root_dir):
1557 """Go to the root directory"""
1558
1559 try:
1560 # Go to the root directory.
1561 self.ftp.cwd(root_dir)
1562 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1563
1564 # Read it back.
1565 self.root_dir = self.ftp.pwd()
1566 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1567
1568 except Exception as detail:
1569 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1570 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1571
1572 def get_root_dir(self):
1573 """Get the root directory name"""
1574
1575 return self.root_dir
1576
1577 def finish(self):
1578 """Quit remote web site"""
1579 logging.debug(f"Finished with WebSite object of class {type(self)}")
1580 try:
1581 self.ftp.quit()
1582 except Exception as detail:
1583 logging.error(f"Cannot ftp quit: {str(detail):s}")
1584
1585 def one_level_down(self, d):
1586 """List files and directories in a subdirectory using ftp"""
1587
1588 directories = []
1589 files = []
1590
1591 try:
1592 # ftp listing from current dir.
1593 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1594 self.ftp.cwd(d)
1595 dir_list = []
1596
1597 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1598 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1599 # Note the second argument requires a callback function.
1600 self.ftp.retrlines('LIST -a', dir_list.append)
1601
1602 except Exception as detail:
1603 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1604 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1605
1606 for line in dir_list:
1607 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1608
1609 # Line should at least have the minimum FTP information.
1610 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1611 # Parse the FTP LIST and put the pieces into file_info.
1612 file_info = self.parse_ftp_list(line)
1613 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1614
1615 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1616 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1617 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1618 pass
1619 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1620 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1621 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1622 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1623 directories.append(dirname)
1624 # For a file: Add the full path prefix from the root to the file name.
1625 else:
1626 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1627 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1628 {file_info[self.user_settings.FILE_NAME]:s}")
1629 files.append(file_info)
1630 else:
1631 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1632
1633 directories.sort()
1634 files.sort()
1635
1636 return directories, files
1637
1638 def modtime(self, f):
1639 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1640 modtime = 0
1641
1642 try:
1643 response = self.ftp.sendcmd('MDTM ' + f)
1644 # MDTM returns the last modified time of the file in the format
1645 # "213 YYYYMMDDhhmmss \r\n <error-response>
1646 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1647 # error-response is 550 for info not available, and 500 or 501 if command cannot
1648 # be parsed.
1649 if response[:3] == '213':
1650 modtime = response[4:]
1651 except ftplib.error_perm as detail:
1652 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1653 modtime = 0
1654
1655 return modtime
1656
1657 def parse_ftp_list(self, line):
1658 """Parse the ftp file listing and return file name, datetime and file size.
1659
1660 An FTP LIST command will give output which looks like this for a file:
1661
1662 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1663
1664 and for a directory:
1665
1666 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1667
1668 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1669 We can have problems on New Year's Eve. For example, the local file date/time is
1670
1671 Mon Jan 1 06:23:12 2018
1672
1673 But the remote file date/time from FTP listing doesn't show a year even though we
1674 know it was written to the server in 2017.
1675
1676 Mon Dec 31 03:02:00
1677
1678 So we default the remote file year to current year 2018 and get
1679
1680 Mon Dec 31 03:02:00 2018
1681
1682 Now we think that the remote file is newer by 363.860278 days.
1683 """
1684
1685 # Find out if we've a directory or a file.
1686 if line[0] == 'd':
1687 dir_or_file = FileType.DIRECTORY
1688 else:
1689 dir_or_file = FileType.FILE
1690
1691 pattern = self.user_settings.FTP_LISTING
1692
1693 # Sensible defaults.
1694 filesize = 0
1695 filename = ""
1696 # Default the time to midnight.
1697 hour = 0
1698 minute = 0
1699 seconds = 0
1700 # Default the date to Jan 1 of the current year.
1701 month = 1
1702 day = 1
1703 year = WebSite.get_current_year()
1704
1705 # Extract time and date from the ftp listing.
1706 match = pattern.search(line)
1707
1708 if match:
1709 filesize = int(match.group('bytes'))
1710 month = self.user_settings.monthToNumber[match.group('mon')]
1711 day = int(match.group('day'))
1712
1713 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1714 if match.group('year'):
1715 year = int(match.group('year'))
1716 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1717 else:
1718 # Remote file listing omits the year. Default the year to the current UTC time year.
1719 # That may be incorrect (see comments above).
1720 year = WebSite.get_current_year()
1721 logging.debug(f"ftp is missing the year; use the current year = {year}")
1722
1723 # If the FTP listing has the hour and minute, it will omit the year.
1724 if match.group('hour') and match.group('min'):
1725 hour = int(match.group('hour'))
1726 minute = int(match.group('min'))
1727 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1728
1729 filename = match.group('filename')
1730
1731 # Package up the time and date nicely.
1732 # Note if we didn't get any matches, we'll default the remote date and
1733 # time to Jan 1 midnight of the current year.
1734 d = datetime.datetime(year, month, day, hour, minute, seconds)
1735
1736 return [filename, dir_or_file, d, filesize]
1737
1738# ----------------------------------------------------------------------------
1739# Class for synchronizing local and remote web sites.
1740# ----------------------------------------------------------------------------
1741
1742class UpdateWeb(object):
1743 """Given previously scanned local and remote directories, update the remote website."""
1744
1745 def __init__(
1746 self,
1747 user_settings,
1748 local_directory_list,
1749 local_file_info,
1750 remote_directory_list,
1751 remote_file_info):
1752 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1753
1754 # Initialize from args.
1755 self.user_settings = user_settings
1756 self.local_directory_list = local_directory_list
1757 self.remote_directory_list = remote_directory_list
1758 self.local_file_info = local_file_info
1759 self.remote_file_info = remote_file_info
1760
1761 # Initialize defaults.
1762 self.local_files_list = []
1763 self.remote_files_list = []
1764 self.local_file_to_size = {}
1765 self.local_file_to_date_time = {}
1766 self.remote_file_to_date_time = {}
1767 self.local_only_dirs = []
1768 self.local_only_files = []
1769 self.remote_only_dirs = []
1770 self.remote_only_files = []
1771 self.common_files = []
1772
1773 # Connect to FTP server and log in.
1774 try:
1775 self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1776 self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1777 except Exception as detail:
1778 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1779 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1780 else:
1781 logging.debug("ftp login succeeded.")
1782
1783 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1784
1785 # Local root directory.
1786 self.local_root_dir = self.user_settings.local_root_dir
1787 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1788
1789 # Root directory of FTP server.
1790 self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1791 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1792
1793 # Transform KB string to integer bytes. e.g. "200" => 2048000
1794 self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1795
1796 try:
1797 # Go to the root directory.
1798 self.ftp.cwd(self.ftp_root_dir)
1799
1800 # Read it back.
1801 self.ftp_root_dir = self.ftp.pwd()
1802 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1803 except Exception as detail:
1804 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1805
1806 def append_root_dir(self, root_dir, name):
1807 """Append the root directory to a path"""
1808
1809 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1810 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1811 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1812 return root_dir + name
1813 else:
1814 return root_dir + "/" + name
1815
1816 def file_info(self):
1817 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1818 dates, times, and sizes."""
1819
1820 # Extract file names.
1821 self.local_files_list = [
1822 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1823 self.remote_files_list = [
1824 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1825
1826 # Use a dictionary comprehension to create key/value pairs,
1827 # (file name, file date/time)
1828 # which map file names onto date/time.
1829 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1830 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1831
1832 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1833 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1834
1835 def update(self):
1836 """Scan through the local website, cleaning it up.
1837 Go to remote website on my servers and synchronize all files."""
1838
1839 self.file_info()
1840
1841 # Which files and directories are different.
1842 self.changes()
1843
1844 # Synchronize with the local web site.
1845 self.synchronize()
1846
1847 def changes(self):
1848 """Find the set of different directories and files on local and remote."""
1849
1850 # Add all directories which are only on local to the dictionary.
1851 dir_to_type = {
1852 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1853
1854 # Scan through all remote directories, adding those only on remote or
1855 # on both.
1856 for d in self.remote_directory_list:
1857 if d in dir_to_type:
1858 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1859 else:
1860 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1861
1862 # Add all files which are only on local to the dictionary.
1863 file_to_type = {
1864 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1865
1866 # Scan through all remote files, adding those only on remote or on
1867 # both.
1868 for f in self.remote_files_list:
1869 if f in file_to_type:
1870 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1871 else:
1872 file_to_type[f] = FileType.ON_REMOTE_ONLY
1873
1874 logging.debug("Raw dictionary dump of directories")
1875 for k, v in dir_to_type.items():
1876 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1877
1878 logging.debug("Raw dictionary dump of files")
1879 for k, v in file_to_type.items():
1880 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1881
1882 # List of directories only on local. Keep the ordering.
1883 self.local_only_dirs = [
1884 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1885
1886 # List of directories only on remote. Keep the ordering.
1887 self.remote_only_dirs = [
1888 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1889
1890 # We don't care about common directories, only their changed files, if
1891 # any.
1892
1893 # List of files only on local. Keep the ordering.
1894 self.local_only_files = [
1895 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1896
1897 # List of files only on remote. Keep the ordering.
1898 self.remote_only_files = [
1899 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1900
1901 # List of common files on both local and remote. Keep the ordering.
1902 self.common_files = [
1903 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1904
1905 logging.debug("*** Directories only on local ******************************")
1906 for d in self.local_only_dirs:
1907 logging.debug(f"\t {d:s}")
1908
1909 logging.debug("*** Directories only on remote ******************************")
1910 for d in self.remote_only_dirs:
1911 logging.debug(f"\t {d:s}")
1912
1913 logging.debug("*** Files only on local ******************************")
1914 for f in self.local_only_files:
1915 logging.debug(f"\t {f:s}")
1916
1917 logging.debug("*** Files only on remote ******************************")
1918 for f in self.remote_only_files:
1919 logging.debug(f"\t {f:s}")
1920
1921 logging.debug("*** Common files ******************************")
1922 for f in self.common_files:
1923 logging.debug(f"name {f:s}")
1924 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1925 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1926
1927 def synchronize(self):
1928 """Synchronize files and subdirectories in the remote directory with the local directory."""
1929
1930 # If we have the same files in local and remote, compare their times
1931 # and dates.
1932 for f in self.common_files:
1933 local_file_time = self.local_file_to_date_time[f]
1934 remote_file_time = self.remote_file_to_date_time[f]
1935
1936 # What's the time difference?
1937 time_delta = remote_file_time - local_file_time
1938 # How much difference, either earlier or later?
1939 seconds_different = abs(time_delta.total_seconds())
1940 minutes_different = seconds_different / 60.0
1941 hours_different = minutes_different / 60.0
1942 days_different = hours_different / 24.0
1943
1944 # Assume no upload initially.
1945 upload_to_host = False
1946
1947 logging.debug(f"Common file: {f:s}.")
1948
1949 # Remote file time is newer.
1950 # Allow 200 characters
1951 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1952
1953 if remote_file_time > local_file_time:
1954 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
1955 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1956 logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1957 logging.error(f"\tlocal time {local_file_time.ctime():s}")
1958 logging.error(f"\tremote time {remote_file_time.ctime():s}")
1959
1960 # Set the local file to the current time.
1961 full_file_name = self.append_root_dir(
1962 self.local_root_dir, f)
1963 if os.path.exists(full_file_name):
1964 # Change the access and modify times of the file to the current time.
1965 os.utime(full_file_name, None)
1966 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1967
1968 upload_to_host = True
1969 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
1970 else:
1971 logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1972 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1973 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1974 upload_to_host = False
1975
1976 # Local file time is newer.
1977 elif local_file_time > remote_file_time:
1978 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
1979 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1980 logging.warning(f"Local file {f:20s} is SLIGHTLY newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes]. Uploading to remote server.")
1981 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1982 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1983 upload_to_host = True
1984 else:
1985 logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1986 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1987 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1988 upload_to_host = False
1989
1990 # Cancel the upload if the file is too big for the server.
1991 size = self.local_file_to_size[f]
1992 if size >= self.file_size_limit:
1993 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1994 upload_to_host = False
1995
1996 # Finally do the file upload.
1997 if upload_to_host:
1998 logging.debug(f"Uploading changed file {f:s}")
1999 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2000 print(f"Uploading changed file {f:s}... ", end='', flush=True)
2001 self.upload(f)
2002
2003 # Remote directory is not in local. Delete it.
2004 for d in self.remote_only_dirs:
2005 logging.debug(f"Deleting remote only directory {d:s}")
2006 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2007 self.rmdir(d)
2008
2009 # Local directory missing on remote. Create it.
2010 # Due to breadth first order scan, we'll create parent directories
2011 # before child directories.
2012 for d in self.local_only_dirs:
2013 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2014 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2015 self.mkdir(d)
2016
2017 # Local file missing on remote. Upload it.
2018 for f in self.local_only_files:
2019 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2020
2021 # But cancel the upload if the file is too big for the server.
2022 size = self.local_file_to_size[f]
2023 if size >= self.file_size_limit:
2024 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2025 f" too large for server, limit is {self.file_size_limit:d} bytes")
2026 else:
2027 logging.debug(f"Uploading new file {f:s}")
2028 print(f"Uploading new file {f:s}... ", end='', flush=True)
2029 self.upload(f)
2030
2031 # Remote contains a file not present on the local. Delete the file.
2032 for f in self.remote_only_files:
2033 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2034 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2035 self.del_remote(f)
2036
2037 def del_remote(self, relative_file_path):
2038 """Delete a file using ftp."""
2039
2040 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2041
2042 # Parse the relative file path into file name and relative directory.
2043 relative_dir, file_name = os.path.split(relative_file_path)
2044 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2045 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2046 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2047
2048 try:
2049 # Add the remote root path and go to the remote directory.
2050 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2051 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2052 self.ftp.cwd(remote_dir)
2053 except Exception as detail:
2054 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2055 else:
2056 try:
2057 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2058
2059 # Don't remove zero length file names.
2060 if len(file_name) > 0:
2061 self.ftp.delete(file_name)
2062 else:
2063 logging.warning(
2064 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2065 except Exception as detail:
2066 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2067
2068 def mkdir(self, relative_dir):
2069 """Create new remote directory using ftp."""
2070
2071 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2072 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2073
2074 # Parse the relative dir path into prefix dir and suffix dir.
2075 path, d = os.path.split(relative_dir)
2076 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2077 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2078
2079 try:
2080 # Add the remote root path and go to the remote directory.
2081 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2082 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2083 self.ftp.cwd(remote_dir)
2084 except Exception as detail:
2085 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2086 else:
2087 try:
2088 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2089 self.ftp.mkd(d)
2090 except Exception as detail:
2091 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2092
2093 def rmdir(self, relative_dir):
2094 """Delete an empty directory using ftp."""
2095
2096 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2097 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2098
2099 # Parse the relative dir path into prefix dir and suffix dir.
2100 path, d = os.path.split(relative_dir)
2101 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2102 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2103
2104 try:
2105 # Add the remote root path and go to the remote directory.
2106 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2107 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2108 self.ftp.cwd(remote_dir)
2109 except Exception as detail:
2110 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2111 else:
2112 try:
2113 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2114 self.ftp.rmd(d)
2115 except Exception as detail:
2116 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2117
2118 def download(self, relative_file_path):
2119 """Download a binary file using ftp."""
2120
2121 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2122
2123 # Parse the relative file path into file name and relative directory.
2124 relative_dir, file_name = os.path.split(relative_file_path)
2125 logging.debug(f"download(): \tfile name: {file_name:s}")
2126 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2127 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2128
2129 # Add the remote root path and go to the remote directory.
2130 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2131 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2132
2133 try:
2134 self.ftp.cwd(remote_dir)
2135 except Exception as detail:
2136 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2137 else:
2138 # Add the local root path to get the local file name.
2139 # Open local binary file to write into.
2140 local_file_name = self.append_root_dir(
2141 self.local_root_dir, relative_file_path)
2142 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2143 try:
2144 f = open(local_file_name, "wb")
2145 try:
2146 # Calls f.write() on each block of the binary file.
2147 # ftp.retrbinary( "RETR " + file_name, f.write )
2148 pass
2149 except Exception as detail:
2150 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2151 f.close()
2152 except IOError as detail:
2153 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2154
2155 def upload(self, relative_file_path):
2156 """Upload a binary file using ftp."""
2157
2158 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2159
2160 # Parse the relative file path into file name and relative directory.
2161 relative_dir, file_name = os.path.split(relative_file_path)
2162 logging.debug(f"upload(): \tfile name: {file_name:s}")
2163 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2164 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2165
2166 # Add the remote root path and go to the remote directory.
2167 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2168 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2169
2170 try:
2171 self.ftp.cwd(remote_dir)
2172 except Exception as detail:
2173 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2174 else:
2175 # Add the local root path to get the local file name.
2176 # Open local binary file to read from.
2177 local_file_name = self.append_root_dir(
2178 self.local_root_dir, relative_file_path)
2179 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2180
2181 try:
2182 f = open(local_file_name, "rb")
2183 try:
2184 # f.read() is called on each block of the binary file until
2185 # EOF.
2186 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2187 self.ftp.storbinary("STOR " + file_name, f)
2188 except Exception as detail:
2189 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2190 f.close()
2191 except IOError as detail:
2192 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2193
2194 def finish(self):
2195 """Log out of an ftp session"""
2196 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2197 try:
2198 self.ftp.quit()
2199 except Exception as detail:
2200 logging.error(f"Cannot ftp quit because {str(detail):s}")
2201
2202# ----------------------------------------------------------------------------
2203# Main function
2204# ----------------------------------------------------------------------------
2205
2206def main(raw_args=None):
2207 """Main program. Clean up and update my website."""
2208
2209 # Print the obligatory legal notice.
2210 print("""
2211 updateweb Version 7.3 - A Python utility program which maintains my web site.
2212 Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
2213
2214 It deletes temporary files, rewrites old copyright lines and email address
2215 lines in source files, then synchronizes all changes to my web sites.
2216
2217 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2218 GNU General Public License. This is free software, and you are welcome
2219 to redistribute it under certain conditions; see the GNU General Public
2220 License for details.
2221 """)
2222
2223 # Put ALL the main code into a try block!
2224 try:
2225 # ---------------------------------------------------------------------
2226 # Load default settings and start logging.
2227 # ---------------------------------------------------------------------
2228
2229 # Default user settings.
2230 user_settings = UserSettings()
2231
2232 print( f"Running main( {raw_args} ) Python version\
2233 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2234 local web directory\
2235 {user_settings.local_root_dir}\n")
2236 # Get command line options such as --verbose. Pass them back as flags in
2237 # user_settings.
2238 CommandLineSettings(user_settings, raw_args)
2239
2240 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2241 if user_settings.UNITTEST:
2242 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2243 unittest.TextTestRunner(verbosity=2).run(suite)
2244 # We are done!
2245 print(" ...done!", flush=True)
2246 return
2247
2248 # Start logging to file. Verbose turns on logging for
2249 # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2250 # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2251 if user_settings.VERBOSE:
2252 loglevel = logging.DEBUG
2253 else:
2254 loglevel = logging.WARNING
2255
2256 # Pick the log file name on the host.
2257 if user_settings.CLEAN:
2258 user_settings.LOGFILENAME = "/private/logLocal.txt"
2259 else:
2260 user_settings.LOGFILENAME = "/private/logRemote.txt"
2261
2262 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2263 if not user_settings.MATHJAX:
2264 user_settings.DIR_TO_SKIP += "|mathjax"
2265 else:
2266 print(f"Processing and uploading new or changed mathjax files. Did you first git restore any changed files and git clean -f to remove extra files? ... ", end='', flush=True)
2267 print(f"If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client... ", end='', flush=True)
2268 print( "If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ", end='', flush=True)
2269 logging.debug(f"Processing and uploading new or changed mathjax files. Did you first git restore any changed files and git clean -f to remove extra files?", end='', flush=True)
2270 logging.debug(f"If you are loading MathJax for the first time --- don't forget to upload the file .htaccess manually using FileZilla or another ftp client.", end='', flush=True)
2271 logging.debug( "If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections.", end='', flush=True)
2272
2273 # Configure the logging and start it.
2274 logging.basicConfig( level=loglevel, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%a, %d %b %Y %H:%M:%S', filename=user_settings.local_root_dir + user_settings.LOGFILENAME, filemode='w')
2275 logging.debug("********** Begin logging")
2276
2277 # ---------------------------------------------------------------------
2278 # Scan the local website, finding out all files and directories.
2279 # ---------------------------------------------------------------------
2280
2281 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2282 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2283 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2284
2285 local = LocalWebSite(user_settings)
2286 local.scan()
2287
2288 # ---------------------------------------------------------------------
2289 # Clean up local website.
2290 # ---------------------------------------------------------------------
2291
2292 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2293 print("Cleaning local web site... ", end='', flush=True)
2294 logging.debug("========================== Cleaning the local web site")
2295 local.clean()
2296
2297 # We are done with the first scan of the local web site and will dispose of it.
2298 local.finish()
2299 del local
2300
2301 # ---------------------------------------------------------------------
2302 # Rescan the local website since there will be changes to source
2303 # files from the clean up stage.
2304 # ---------------------------------------------------------------------
2305
2306 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2307 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2308
2309 local = LocalWebSite(user_settings)
2310
2311 local.scan()
2312
2313 # ---------------------------------------------------------------------
2314 # List all the local directories and files and their sizes.
2315 # ---------------------------------------------------------------------
2316
2317 # Local website directories.
2318 local_directory_list = local.directories
2319 logging.debug("********** List of all the Local Directories")
2320 for d in local_directory_list:
2321 logging.debug(f"\t {d:s}")
2322
2323 # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2324 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2325 total_number_of_files = len( local_files_name_size_pairs )
2326 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2327 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2328
2329 # Local website filenames only, and their dates and times.
2330 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2331 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2332 for file_datetime_pair in local_file_datetime_pairs:
2333 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2334
2335 # Total number of bytes in the local files.
2336 total_number_of_bytes = 0
2337 for file_size_pair in local_files_name_size_pairs:
2338 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2339 total_number_of_bytes += file_size_pair[1]
2340 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2341
2342 local.finish()
2343
2344 if user_settings.CLEAN:
2345 logging.debug("========================== Done with local file and directory cleanup...")
2346 del local
2347 print("...done!", flush=True)
2348 return
2349
2350 # ---------------------------------------------------------------------
2351 # Scan the remote hosted web site.
2352 # ---------------------------------------------------------------------
2353
2354 print("Scanning remote web site... ", end='', flush=True)
2355 logging.debug("========================== Scanning the remote web site...")
2356
2357 # Pick which website to update.
2358 logging.debug("Connecting to primary remote site.")
2359 remote = RemoteWebSite(user_settings)
2360 remote.scan()
2361 remote.finish()
2362
2363 # ---------------------------------------------------------------------
2364 # List all the remote server directories and files and their sizes.
2365 # ---------------------------------------------------------------------
2366
2367 remote_directory_list = remote.directories
2368 logging.debug("********** Remote Directories")
2369 for d in remote_directory_list:
2370 logging.debug(f"\t {d:s}")
2371
2372 # Local website filenames only, and their sizes in bytes.
2373 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2374 total_number_of_files = len( remote_files_name_size_list )
2375 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2376 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2377 total_number_of_bytes = 0
2378 for file_size in remote_files_name_size_list:
2379 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2380 total_number_of_bytes += file_size[1]
2381 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2382
2383 # ---------------------------------------------------------------------
2384 # Synchronize the local and remote web sites.
2385 # ---------------------------------------------------------------------
2386
2387 print("Synchronizing remote and local web sites... ", end='', flush=True)
2388 logging.debug("========================= Synchronizing remote and local web sites...")
2389
2390 # Primary website.
2391 logging.debug("Connecting to primary remote site for synchronization.")
2392 sync = UpdateWeb(user_settings,
2393 local.directories,
2394 local.files,
2395 remote.directories,
2396 remote.files)
2397
2398 sync.update()
2399 sync.finish()
2400
2401 del sync
2402 del remote
2403 del local
2404 print("...done!", flush=True)
2405
2406 except UpdateWebException as detail:
2407 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2408
2409 except RecursionError as detail:
2410 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2411
2412if __name__ == '__main__':
2413 """Python executes all code in this file. Finally, we come here.
2414
2415 * If we are executing this file as a standalone Python script,
2416 the name of the current module is set to __main__ and thus we'll call the main() function.
2417
2418 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2419
2420 import updateweb
2421 updateweb.main(["--test"])"""
2422
2423 main()