1#!/usr/bin/env python3
2# ============================================================================
3#
4# NAME
5#
6# updateweb.py
7#
8# DESCRIPTION
9#
10# Python script which updates my web sites.
11#
12# It does miscellaneous cleanup on my local copy of the web site on disk,
13# including updating copyright information, then synchronizes the local
14# copy to my remote server web sites using FTP.
15#
16# USAGE
17#
18# It's best to use the associated makefile.
19# But you can call this Python utility from the command line,
20#
21# $ python updateweb.py Clean up my local copy, then use it
22# to update my remote web server site.
23# Log warnings and errors.
24# $ python updateweb.py -v Same, but log debug messages also.
25# $ python updateweb.py -c Clean up my local copy only.
26# $ python updateweb.py -t Run unit tests only.
27# $ python updateweb.py -m Upload MathJax files (only need to do this once).
28#
29# We get username and password information from the file PARAMETERS_FILE.
30#
31# Logs are written to the files,
32#
33# logLocal.txt Local web site cleanup log.
34# logRemote.txt Remote web server update log.
35#
36# AUTHOR
37#
38# Sean E. O'Connor 23 Aug 2007 Version 1.0 released.
39#
40# LEGAL
41#
42# updateweb.py Version 7.3 - A Python utility program which maintains my web site.
43# Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
44#
45# This program is free software: you can redistribute it and/or modify
46# it under the terms of the GNU General Public License as published by
47# the Free Software Foundation, either version 3 of the License, or
48# (at your option) any later version.
49#
50# This program is distributed in the hope that it will be useful,
51# but WITHOUT ANY WARRANTY; without even the implied warranty of
52# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
53# GNU General Public License for more details.
54#
55# You should have received a copy of the GNU General Public License
56# along with this program. If not, see <http://www.gnu.org/licenses/>.
57#
58# The author's address is seanerikoconnor!AT!gmail!DOT!com
59# with !DOT! replaced by . and the !AT! replaced by @
60#
61# NOTES
62#
63# DOCUMENTATION
64#
65# Python interpreter: https://www.python.org/
66# Python tutorial and reference: https://docs.python.org/lib/lib.html
67# Python debugger: https://docs.python.org/3/library/pdb.html
68# Python regular expression howto: https://docs.python.org/3.7/howto/regex.html
69#
70# ============================================================================
71
72# ----------------------------------------------------------------------------
73# Load Python Packages
74# ----------------------------------------------------------------------------
75
76# OS stuff
77import sys
78import os
79import argparse
80import subprocess
81import shutil
82from pathlib import Path
83
84# Regular expressions
85import re
86
87# FTP stuff
88import ftplib
89
90# Date and time
91import time
92import stat
93import datetime
94
95# Logging
96import logging
97
98# Unit testing
99import unittest
100
101# Enumerated types (v3.4)
102from enum import Enum
103from typing import List, Any
104
105# YAML configuration files (a superset of JSON!)
106import yaml
107# Recommended by https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
108try:
109 from yaml import CLoader as Loader
110except ImportError:
111 from yaml import Loader
112
113# Python syntax highlighter. See https://pygments.org
114from pygments import highlight
115from pygments.lexers import HtmlLexer, CssLexer, JavascriptLexer, YamlLexer, MakefileLexer, BashLexer, VimLexer, TexLexer
116from pygments.lexers import PythonLexer, CppLexer, CLexer, CommonLispLexer, FortranFixedLexer, MatlabLexer, OutputLexer
117from pygments.formatters import HtmlFormatter
118
119
120# ----------------------------------------------------------------------------
121# Custom Top Level Exceptions.
122# ----------------------------------------------------------------------------
123
124class UpdateWebException(Exception):
125 """Something went wrong at a deep level when searching local files, searching remote files, or trying to sync local and remote, and we could not recover.
126 Derive from Exception as recommended by Python manual"""
127 pass
128
129# ----------------------------------------------------------------------------
130# User settings.
131# ----------------------------------------------------------------------------
132
133class TreeWalkSettings(Enum):
134 """Enum types for how to walk the directory tree."""
135 BREADTH_FIRST_SEARCH = 1
136 DEPTH_FIRST_SEARCH = 2
137
138class FileType(Enum):
139 """'Enum' types for properties of directories and files."""
140 DIRECTORY = 0
141 FILE = 1
142 ON_LOCAL_ONLY = 2
143 ON_REMOTE_ONLY = 3
144 ON_BOTH_LOCAL_AND_REMOTE = 4
145
146class UserSettings:
147 """Megatons of user selectable settings."""
148 # Logging control.
149 LOGFILENAME = ""
150 VERBOSE = False # Verbose mode. Prints out everything.
151 CLEAN = False # Clean the local website only.
152 UNITTEST = False # Run a unit test of a function.
153 MATHJAX = False # Process and upload MathJax files to server.
154
155 # When diving into the MathJax directory, web walking the deep directories
156 # may exceed Python's default recursion limit of 1000.
157 RECURSION_DEPTH = 5000
158 sys.setrecursionlimit(RECURSION_DEPTH)
159
160 # Fields in the file information (file_info) structure.
161 # For example, file_info =
162 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
163 # 1, -- Enum type: Is it a file? dir? on local? on remote? on both?
164 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a datetime class.
165 # 4675] -- File size in bytes.
166 FILE_NAME = 0
167 FILE_TYPE = 1
168 FILE_DATE_TIME = 2
169 FILE_SIZE = 3
170
171 # Server settings.
172 SERVER_SETTINGS_FILE_NAME = "/private/updateweb.yaml"
173 SERVER_NAME = None
174 USER_NAME = None
175 PASSWORD_NAME = None
176 FTP_ROOT_NAME = None
177 FILE_SIZE_LIMIT_NAME = None
178
179 # Map month names onto numbers.
180 monthToNumber = {
181 'Jan': 1,
182 'Feb': 2,
183 'Mar': 3,
184 'Apr': 4,
185 'May': 5,
186 'Jun': 6,
187 'Jul': 7,
188 'Aug': 8,
189 'Sep': 9,
190 'Oct': 10,
191 'Nov': 11,
192 'Dec': 12}
193
194 # List of directories to skip over when processing or uploading the web page.
195 # Some are private but most are dir of temporary files.
196 # They will be listed as WARNING in the log.
197 # Examples:
198 # My private admin settings directory.
199 # Git or SVN local admin directories.
200 # Compile build directories fromXCode.
201 # PyCharm build directories.
202 # Python cache directories.
203 # Jupyter checkpoint directories.
204 # XCode temporary file crap.
205 DIR_TO_SKIP = "private|.git|.github|.svn|build|XCodeOutput|Debug|Release|PyCharm|.idea|__pycache__|.ipynb_checkpoints|ModuleCache.noindex|SymbolCache.noindex|Primpoly-[a-z]"
206
207 # List of files to skip when processing or uploading to the web page.
208 # They will be listed as WARNING in the log.
209 # Examples:
210 # MathJax yml file.
211 # .htaccess (because it doesn't show up on the output of ftp LIST, so we must upload manually)
212 FILE_TO_SKIP = ".travis.yml|.svnignore|.htaccess"
213
214 # Suffixes for temporary files which will be deleted during the cleanup
215 # phase.
216 TEMP_FILE_SUFFIXES = r""" # Use Python raw strings.
217 \. # Match the dot in the file name.
218 # Now begin matching the file name suffix.
219 # (?: non-capturing match for the regex inside the parentheses,
220 # i.e. matching string cannot be retrieved later.
221 # Now match any of the following file extensions:
222 (?: o | obj | lib | # Object files generated by C, C++, etc compilers
223 pyc | # Object file generated by the Python compiler
224 ilk | pdb | sup | # Temp files from VC++ compiler
225 idb | ncb | opt | plg | # Temp files from VC++ compiler
226 sbr | bsc | map | bce | # Temp files from VC++ compiler
227 res | aps | dep | db | # Temp files from VC++ compiler
228 jbf | # Paintshop Pro
229 class | jar | # Java compiler
230 fas | # CLISP compiler
231 swp | swo | # Vim editor
232 toc | aux | # TeX auxilliary files (not .synctex.gz or .log)
233 DS_Store | _\.DS_Store | # macOS finder folder settings.
234 _\.Trashes | # macOS recycle bin
235 gdb_history) # GDB history
236 $ # Now we should see only the end of line.
237 """
238
239 # Special case: Vim temporary files contain a twiddle anywhere in the
240 # name.
241 VIM_TEMP_FILE_EXT = "~"
242
243 # Suffixes for temporary directories which should be deleted during the
244 # cleanup phase.
245 TEMP_DIR_SUFFIX = r""" # Use Python raw strings.
246 (?: Debug | Release | # C++ compiler
247 ipch | \.vs | # Temp directories from VC++ compiler
248 \.Trashes | \.Trash) # macOS recycle bin
249 $
250 """
251
252 # File extension for an internally created temporary file.
253 TEMP_FILE_EXT = ".new"
254
255 # Identify source file types.
256 HYPERTEXT_FILE_PATTERN = r""" # Use Python raw strings.
257 (\. # Match the filename suffix after the .
258 (?: html | htm | # HTML hypertext
259 css) # CSS style sheet
260 $) # End of line.
261 """
262
263 SOURCE_FILE_PATTERN = r""" # Use Python raw strings.
264 (?: makefile$ | # Any file called makefile is a source file.
265 # Note the $ at the end so we don't reprocess .gitconfig.html -> .gitconfig.html.html
266 .vimrc$ | # Vim script
267 (.bashrc$ | # Bash configuration files.
268 .bash_profile$ |
269 .bash_logout$)
270 |
271 (.gitignore$ | # Git configuration files.
272 .gitignore_global$ |
273 .gitconfig$)
274 |
275 (\. # Match the filename suffix after the .
276 # Now match any of these suffixes:
277 (?:
278 c | cpp | h | hpp | # C++ and C
279 js | # Javascript
280 py | # Python
281 lsp | # LISP
282 ipynb | # Jupyter notebook
283 m | # MATLAB
284 FOR | for | f | # FORTRAN
285 yaml | # YAML = JSON superset
286 tex | # LaTeX
287 txt | dat | # Data files
288 sh) # Bash
289 $) # End of line.
290 )
291 """
292
293 # Special case of certain HTML and CSS files for which we want to generate a syntax highlighted source code listing.
294 SPECIAL_FILE_TO_HIGHLIGHT_PATTERN = r"""
295 (?: ^life\.html$ | # We want a listing of this particular HTML file.
296 ^index\.html$ | # I want to list my top level HTML file. (There is only one file with this name at the top level web directory.)
297 ^webPageDesign\.html$ | # and also this HTML example file, but no others.
298 ^StyleSheet\.css$ ) # I want to list my style sheet.
299 """
300
301 # Files for which we want to generate a syntax highlighted source code listing.
302 # Uses an f-string combined with a raw-string.
303 FILE_TO_HIGHLIGHT_PATTERN = fr"""
304 (?: {SPECIAL_FILE_TO_HIGHLIGHT_PATTERN} |
305 {SOURCE_FILE_PATTERN} )
306 """
307
308 # Update my email address.
309 # This is tricky: Prevent matching and updating the name within in this
310 # Python source file by using the character class brackets.
311 OLD_EMAIL_ADDRESS = r"""
312 artificer\!AT\!sean[e]rikoconnor\!DOT\!freeservers\!DOT\!com
313 """
314 NEW_EMAIL_ADDRESS = "seanerikoconnor!AT!gmail!DOT!com"
315
316 # List of patterns to match, match groups to pull out of the old string, new strings to generate from these two items.
317 # Read patterns and strings from the updateweb.yaml file.
318 STRING_REPLACEMENT_LIST = []
319 # Pairs of test strings and their correct match/replacements.
320 STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST = []
321
322 # Match a copyright line like this:
323 # Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved.
324 # Extract the copyright symbol which can be ascii (C) or HTML © and extract the old year.
325 TWO_DIGIT_YEAR_FORMAT = "%02d"
326 COPYRIGHT_LINE = r"""
327 Copyright # Copyright.
328 \s+ # One or more spaces.
329 (?P<symbol> \(C\) | ©) # Match and extract the copyright symbol.
330 \D+ # Any non-digits.
331 (?P<old_year>[0-9]+) # Match and extract the old copyright year, place it into variable 'old_year'
332 - # hyphen
333 ([0-9]+) # New copyright year.
334 \s+ # One or more spaces.
335 by\s+Sean\sErik # Start of my name. This way we don't rewrite somebody else's copyright notice.
336 """
337
338 # Match a line containing the words,
339 # last updated YY
340 # and extract the two digit year YY.
341 LAST_UPDATED_LINE = r"""
342 last\s+ # Match the words "last updated"
343 updated\s+
344 \d+ # Day number
345 \s+ # One or more blanks or tab(
346 [A-Za-z]+ # Month
347 \s+ # One or more blanks or tabs
348 (?P<year>\d+) # Two digit year. Place it into the variable 'year'
349 """
350
351 # Web server root directory.
352 DEFAULT_ROOT_DIR = "/"
353
354 # The ftp listing occasionally shows a date newer than the actual date.
355 # On my server, it could be 6 months newer when we are near New Year's Day. Typically the server file time is only a 1 or 2 minutes newer.
356 # But if the remote file time is much newer, it might be an old file with a bad date/time.
357 # Upload the file to be safe.
358 # How to see the time differences from the log if they are large:
359 # egrep -o "Remote file.*is MUCH newer.*days" logRemote.txt
360 # Remote file Finance/MortgageLoanDerivation.tex.html is MUCH newer[8.0 minutes] by 885753.0 seconds = 14762.5 minutes = 246.0 hours = 10.3 days
361 # How to see the time differences from the log if they are small and we wait and NOT upload:
362 # egrep -o "Remote file.*is newer.*days" logRemote.txt
363 # Remote file error404.html is newer by 102.0 seconds = 1.7 minutes = 0.0 hours = 0.0 days
364 # Remote file index.html is newer by 113.0 seconds = 1.9 minutes = 0.0 hours = 0.0 days
365 MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD = 8.0
366
367 # Upload only if we are newer by more than a few minutes. Allows for a little slop in time stamps on server or host.
368 MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD = 3.0
369
370 # An ftp list command line should be at least this many chars, or we'll
371 # suspect and error.
372 MIN_FTP_LINE_LENGTH = 7
373
374 # Parse an ftp listing, extracting <bytes> <mon> <day> <hour> <min> <year> <filename>
375 # ftp listings are generally similar to UNIX ls -l listings.
376 #
377 # Some examples:
378 #
379 # (1) Freeservers ftp listing,
380 #
381 # 0 1 2 3 4 5 6 7 8
382 # drwxr-xr-x 3 1000 1000 4096 Nov 18 2006 Electronics
383 # -rw-r--r-- 1 1000 1000 21984 Jun 4 03:46 StyleSheet.css
384 # -rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html
385 #
386 # (2) atspace ftp listing,
387 #
388 # drwxr-xr-x 3 seanerikoconnor vusers 49 Apr 7 2006 Electronics
389 # -rw-r--r-- 1 seanerikoconnor vusers 21984 Jun 4 04:03 StyleSheet.css
390 #
391 FTP_LISTING = r"""
392 [drwx-]+ # Unix type file mode.
393 \s+ # One or more blanks or tabs.
394 \d+ # Number of links.
395 \s+
396 \w+ # Owner.
397 \s+
398 \w+ # Group.
399 \s+
400 (?P<bytes> \d+) # File size in bytes, placed into the variable 'bytes'.
401 \s+
402 (?P<mon> \w+) # Month modified, placed into the variable 'mon'.
403 \s+
404 (?P<day> \d+) # Day modified, placed into the variable 'day'.
405 \s+
406 (
407 (?P<hour> \d+) # Hour modified, placed into the variable 'hour'.
408 :
409 (?P<min> \d+) # Minute modified, placed into the variable 'min'.
410 |
411 (?P<year> \d+) # If hours and minutes are absent (happens when year is not the current year),
412 # extract the year instead.
413 )
414 \s+
415 (?P<filename> [A-Za-z0-9"'.\-_,~()=+#]+) # Path and file name containing letters, numbers,
416 # and funny characters. We must escape some of
417 # these characters with a backslash, \.
418 """
419
420 # HTML header up to the style sheet.
421 BASIC_HTML_BEGIN = \
422 """
423 <!DOCTYPE html>
424 <html lang="en-US"> <!-- Set language of this page to USA English. -->
425
426 <head>
427 <!-- This page uses Unicode characters. -->
428 <meta charset="utf-8">
429
430 <!-- Set viewport to actual device width. Any other settings makes the web page initially appear zoomed-in on mobile devices. -->
431 <meta name="viewport" content="width=device-width, initial-scale=1">
432
433 <!-- Title appears in the web browser tab for this page. The browser also uses it to bookmark this page. -->
434 <title>Sean Erik O'Connor - Home Page and Free Mathematical Software.</title>
435
436 <!-- Search engines will search using words in this description. They will also display title in their search results. -->
437 <meta name="description" content="Syntax Colored Source Code Listing">
438
439 <!-- Some content management software uses the author's name. -->
440 <meta name="author" content="Sean Erik O'Connor">
441
442 <meta name="copyright" content="Copyright (C) 1986-2025 by Sean Erik O'Connor. All Rights Reserved.">
443
444 <!-- Begin style sheet insertion -->
445 <style>
446 /* Default settings for all my main web pages. */
447 body
448 {
449 /* A wide sans-serif font is more readable on the web. */
450 font-family: Verdana, Geneva, "Trebuchet MS", sans-serif ;
451
452 /* Set the body font size a little smaller than the user's default browser setting. */
453 font-size: 0.8em ;
454
455 /* Black text is easier to read. */
456 color: black ;
457
458 /* More vertical space between lines for more pleasant reading. Use a unitless font height multiplier.
459 Length and percentage percentage values can give scrunched text due to poor inheritance behavior. */
460 line-height: 1.7 ;
461 }
462
463 <!-- Now prepare to add the syntax coloring style sheet from Pygment -->
464 """
465
466 # After the style sheet and up to the start of the article in the body.
467 BASIC_HTML_MIDDLE = \
468 """
469 </style>
470 </head>
471
472 <body>
473 <article class="content">
474 """
475
476 # After the source code listing, finish the article, body and html document.
477 BASIC_HTML_END = \
478 """
479 </article>
480 </body>
481
482 </html>
483 """
484
485 def __init__(self):
486 """Set up the user settings."""
487
488 self.local_root_dir = ""
489
490 # Import the user settings from the parameter file.
491 self.get_local_root_dir()
492 self.get_server_settings()
493
494 self.precompile_regular_expressions()
495
496 def get_server_settings(self):
497 """
498 Read web account private settings from a secret offline parameter file.
499 These also hold patterns to match and replace in all of our source pages.
500 """
501
502 # Private file which contains my account settings.
503 settings_file_name = self.local_root_dir + self.SERVER_SETTINGS_FILE_NAME
504 # Recommended by
505 # https://www.cloudbees.com/blog/yaml-tutorial-everything-you-need-get-started
506 try:
507 stream = open(settings_file_name, "r")
508 except OSError as detail:
509 logging.error(f"Cannot open the YAML file {settings_file_name:s}. Unable to read the settings because: {str(detail):s}")
510 # Rethrow the exception higher.
511 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ") from detail
512 # Read all the YAML documents in the file.
513 yaml_contents = yaml.load_all(stream, Loader)
514 yaml_document_list: list[Any] = []
515 for yaml_doc in yaml_contents:
516 yaml_document_list.append(yaml_doc)
517 num_yaml_docs = len(yaml_document_list)
518 if num_yaml_docs != 2:
519 logging.error(f"Wrong number of YAML documents = {num_yaml_docs:3d} in the user settings file. Aborting...")
520 raise UpdateWebException("Cannot load the settings. See the log file for details. Aborting... ")
521
522 # Load all the server settings.
523 self.SERVER_NAME = yaml_document_list[0]['ftp_server_name']
524 self.USER_NAME = yaml_document_list[0]['ftp_user_name']
525 self.PASSWORD_NAME = yaml_document_list[0]['ftp_password']
526 self.FTP_ROOT_NAME = yaml_document_list[0]['remote_directory']
527 self.FILE_SIZE_LIMIT_NAME = int(yaml_document_list[0]['file_size_limit_Kb'])
528
529 # Load all the tuples which contain patterns to match and the strings to replace, from document #1 in the YAML file.
530 self.STRING_REPLACEMENT_LIST = []
531 pat_rep_yaml_list = yaml_document_list[1]['pattern_match_replacement_string_list']
532 for pat_rep in pat_rep_yaml_list:
533 # Fetch the regular expression and compile it for speed.
534 verbose_regex = pat_rep['pattern']
535 pat = re.compile(verbose_regex, re.VERBOSE | re.IGNORECASE)
536 # Since we use raw strings, we need to strip off leading and trailing whitespace.
537 replacement_string = pat_rep['replacement_string'].strip().lstrip()
538 self.STRING_REPLACEMENT_LIST.append([pat, replacement_string])
539
540 # Load the test and verify strings.
541 test_verify_strings_list = yaml_document_list[1]['test_verify_string_list']
542 for test_verify_string in test_verify_strings_list:
543 test_string = test_verify_string['test_string'].strip().lstrip()
544 verify_string = test_verify_string['verify_string'].strip().lstrip()
545 self.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST.append([test_string,verify_string])
546
547 print(" ...done!", flush=True)
548 return
549
550 def get_local_root_dir(self):
551 """Get the local website root directory on this platform."""
552
553 # Each platform has a definite directory for the web page.
554 local_web_dir_path = "/Desktop/Sean/WebSite"
555
556 if sys.platform.startswith('darwin'):
557 self.local_root_dir = str(Path.home()) + local_web_dir_path
558 # My Cyperpower PC running Ubuntu Linux.
559 elif sys.platform.startswith('linux') or sys.platform.startswith('cygwin'):
560 self.local_root_dir = str(Path.home()) + local_web_dir_path
561 return
562
563 def precompile_regular_expressions(self):
564 """For speed precompile the regular expression search patterns."""
565 self.COPYRIGHT_LINE = re.compile(self.COPYRIGHT_LINE, re.VERBOSE | re.IGNORECASE)
566 self.FTP_LISTING = re.compile(self.FTP_LISTING, re.VERBOSE | re.IGNORECASE)
567 self.TEMP_FILE_SUFFIXES = re.compile(self.TEMP_FILE_SUFFIXES, re.VERBOSE | re.IGNORECASE)
568 self.TEMP_DIR_SUFFIX = re.compile(self.TEMP_DIR_SUFFIX, re.VERBOSE)
569 self.SOURCE_FILE_PATTERN = re.compile(self.SOURCE_FILE_PATTERN, re.VERBOSE)
570 self.HYPERTEXT_FILE_PATTERN = re.compile(self.HYPERTEXT_FILE_PATTERN, re.VERBOSE)
571 self.OLD_EMAIL_ADDRESS = re.compile(self.OLD_EMAIL_ADDRESS, re.VERBOSE | re.IGNORECASE)
572 self.FILE_TO_HIGHLIGHT_PATTERN = re.compile(self.FILE_TO_HIGHLIGHT_PATTERN, re.VERBOSE)
573 self.LAST_UPDATED_LINE = re.compile(self.LAST_UPDATED_LINE, re.VERBOSE | re.IGNORECASE)
574
575# ----------------------------------------------------------------------------
576# Unit test individual functions.
577# ----------------------------------------------------------------------------
578
579class UnitTest(unittest.TestCase):
580 """Initialize the UnitTest class."""
581 def setUp(self):
582 self.user_settings = UserSettings()
583 self.user_settings.get_local_root_dir()
584
585 def tearDown(self):
586 """Clean up the UnitTest class."""
587 self.user_settings = None
588
589 def test_copyright_updating(self):
590 """Test copyright line updating to the current year."""
591 # Prevent web cleaning from rewriting strings by splitting them up and concatenating them.
592 line_before_update = "Copyright (C) 19" + "99-20" + "20" + " by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
593 line_after_update_actual = "Copyright (C) 1999-2025 by Sean Erik O'Connor. All Rights Reserved. Copyright © 1999-2025 by Sean Erik O'Connor"
594 pat = self.user_settings.COPYRIGHT_LINE
595 match = pat.search(line_before_update)
596
597 if match:
598 old_year = int(match.group('old_year'))
599 # Same as call to self.get_current_year():
600 current_year = int(time.gmtime()[0])
601 if old_year < current_year:
602 # We matched and extracted the old copyright symbol into the variable
603 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
604 # We now insert it back by placing the special syntax
605 # \g<symbol> into the replacement string.
606 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(current_year) + " by Sean Erik"
607 line_after_update_computed = pat.sub(new_copyright, line_before_update)
608 self.assertEqual(
609 line_after_update_actual,
610 line_after_update_computed,
611 f"newline = |{line_after_update_actual:s}| line_after_update_computed = |{line_after_update_computed:s}|")
612 else:
613 print( "old_year >= current_year" )
614 self.fail()
615 else:
616 print( "no match for copyright pattern" )
617 self.fail()
618
619 def test_extract_filename_from_ftp_listing(self):
620 """Test parsing an FTP listing."""
621 ftp_line = "-rw-r--r-- 1 1000 1000 2901 Sep 26 17:12 allclasses-frame.html"
622 extracted_file_name = "allclasses-frame.html"
623 pat = self.user_settings.FTP_LISTING
624 match = pat.search(ftp_line)
625 if match:
626 filename = match.group('filename')
627 self.assertEqual(
628 filename,
629 extracted_file_name,
630 f"ftp_line = {ftp_line:s} extracted file name = {extracted_file_name:s}")
631 else:
632 self.fail()
633
634 def test_get_file_time_and_date(self):
635 """Test getting a file time and date."""
636 # Point to an old file.
637 file_name = "./Images/home.png"
638 full_file_name = self.user_settings.local_root_dir + '/' + file_name
639 # Get the UTC time.
640 file_epoch_time = os.path.getmtime(full_file_name)
641 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
642 # Create a datetime object for the file.
643 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
644 # Check if the file time matches what we would see if we did ls -l <file_name>
645 computed = f"file {file_name:s} datetime {d.ctime():s}"
646 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
647 self.assertEqual(computed, actual)
648
649 def test_set_file_time_and_date(self):
650 """Test setting a file time and date."""
651 file_name = "./Images/home.png"
652 full_file_name = self.user_settings.local_root_dir + '/' + file_name
653 # Create a temporary file in the same directory.
654 temp_file_name = "temporal.tmp"
655 full_temp_file_name = self.user_settings.local_root_dir + temp_file_name
656 try:
657 with open(full_temp_file_name, 'w') as fp:
658 fp.write("The End of Eternity")
659 except OSError as detail:
660 logging.error(f"Cannot open or write to the file {full_temp_file_name:s}: {str(detail):s} Aborting...")
661 raise UpdateWebException("Failed the unit test for setting time and date of a file. See the log file for details. Aborting...") from detail
662 # Get the old file time. Set the temporary file to the same time.
663 file_stat = os.stat(full_file_name)
664 os.utime(full_temp_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
665 # What is the temporary file's time now?
666 file_epoch_time = os.path.getmtime(full_temp_file_name)
667 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
668 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
669 # Is the temporary file time set properly?
670 computed = f"file {file_name:s} datetime {d.ctime():s}"
671 actual = "file ./Images/home.png datetime Wed Jan 1 03:42:41 2025"
672 self.assertEqual(computed, actual)
673 os.remove(full_temp_file_name)
674
675 def test_difference_of_time_and_date(self):
676 """Test a date difference calculation."""
677 file_name = "./Images/home.png"
678 full_file_name = self.user_settings.local_root_dir + '/' + file_name
679 # Get the UTC time.
680 file_epoch_time = os.path.getmtime(full_file_name)
681 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
682 # Create a datetime object for the file.
683 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]) # datetime class; year, month, day, hour, minute, seconds.
684 # Slightly change the date and time by adding 1 minute.
685 d2 = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5]+1) # year, month, day, hour, minute, second
686 time_delta = d2 - d
687 seconds_different = time_delta.total_seconds()
688 minutes_different = seconds_different / 60.0
689 hours_different = minutes_different / 60.0
690 days_different = hours_different / 24.0
691 computed = f"difference {days_different:8.5f} days, {hours_different:8.5f} hours {minutes_different:8.5f} minutes, {seconds_different:8.5f} seconds"
692 actual = "difference 0.00001 days, 0.00028 hours 0.01667 minutes, 1.00000 seconds"
693 self.assertEqual(computed, actual)
694
695 def test_pattern_match_dir_to_skip(self):
696 """Test if skipping certain named directories is recoginizing the dir names."""
697 dir_skip = "Primpoly-cswhfrwgwdikgzfdpiorbeaiennz"
698 pat = re.compile(self.user_settings.DIR_TO_SKIP)
699 if pat.search(dir_skip):
700 self.assertTrue(True)
701 else:
702 self.assertTrue(False)
703
704 def test_file_name_to_syntax_highlight(self):
705 """Test if syntax highlighting recognizes file names to highlight."""
706 file_name1 = "Computer/hello.lsp"
707 file_name2 = "Computer/life.html"
708 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
709 if p.search(Path(file_name1).name) and p.search(Path(file_name2).name):
710 self.assertTrue(True)
711 else:
712 self.assertTrue(False)
713
714 def test_user_settings(self):
715 """Test whether user settings are correctly initialized."""
716 computed = f"File size limit = {int(self.user_settings.FILE_SIZE_LIMIT_NAME):d} K"
717 actual = "File size limit = 50000 K"
718 self.assertEqual(computed, actual, "File size limit settings are incorrect.")
719
720 def test_check_replace_substring(self,debug=True):
721 """Test the substring pattern match and replacement functions which use the list of match/replace pairs in the YAML file.
722 For troubleshooting, turn on debug.
723 """
724 test_verify_pairs = self.user_settings.STRING_REPLACEMENT_TEST_VERIFY_STRING_LIST
725 # Iterate over all test strings.
726 for pair in test_verify_pairs:
727 [test_string, verify_string] = pair
728 if debug:
729 print( f">>>>>>> next test string = {test_string}")
730 print( f">>>>>>> next verify string = {verify_string}")
731 # Iterate over all patterns and replacements.
732 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
733 [pat, rep_string] = match_replace_tuple
734 print( f"\t-------> next pattern = {pat}")
735 print( f"\t-------> next replacement = {rep_string}")
736 match = pat.search(test_string)
737 # The pattern match succeeds.
738 if match:
739 try:
740 sub = pat.sub(rep_string, test_string)
741 # String replacement succeeds for this pattern/replace pair iteration.
742 if debug:
743 print( f"\t\t.......> match and replace: {test_string} ---> {sub}")
744 test_string = sub
745 except IndexError as detail:
746 print(f"\t\t.......> Caught an exception: {str(detail):s}. Replacement failed.")
747 if debug:
748 self.assertTrue(False)
749 elif debug:
750 print( f"\t\t.......> no match for pattern = {pat} in test string = {test_string}")
751 # No match, so go on to the next pattern and don't change test_string.
752 # Done with all pattern/replace on test string.
753 # Check this test string in the list.
754 self.assertEqual(test_string, verify_string, f"\ntest_string = |{test_string:s}|\nverify_string = |{verify_string:s}|\n")
755 if debug:
756 print( f"\t******* DONE with all pattern matches and replacements on this test/verify string pair.\n" )
757
758# ----------------------------------------------------------------------------
759# Command line options.
760# ----------------------------------------------------------------------------
761
762class CommandLineSettings(object):
763 """Get the command line options."""
764
765 def __init__(self, user_settings, raw_args=None):
766 """Get command line options"""
767 command_line_parser = argparse.ArgumentParser(
768 description="updateweb options")
769
770 # Log all changes, not just warnings and errors.
771 command_line_parser.add_argument(
772 "-v",
773 "--verbose",
774 help="Turn on verbose mode to log everything",
775 action="store_true")
776
777 # Clean up the local website only.
778 command_line_parser.add_argument(
779 "-c",
780 "--clean",
781 help="Do a cleanup on the local web site only.",
782 action="store_true")
783
784 # Clean up the local website only.
785 command_line_parser.add_argument(
786 "-m",
787 "--mathjax",
788 help="""ALSO upload mathjax directory.\
789 Do this if you have a new version of MathJax or if you've reset your server and deleted the /mathjax remote directory.\
790 You'll need to do git restore on any altered files in the local branch and git clean -f to remove any newly created files.\
791 NOTE: If you did reset your server and delete all files, run the command find . -name '*.*' -exec touch {} \\; from the web page root directory.\
792 Also run find . -name '*' -exec touch {} \\; This will ensure accurate times on the server.""",
793 action="store_true")
794
795 # Run unit tests only.
796 command_line_parser.add_argument("-t", "--test",
797 help="Run unit tests.",
798 action="store_true")
799
800 args = command_line_parser.parse_args(raw_args)
801
802 if args.verbose:
803 user_settings.VERBOSE = True
804 if args.clean:
805 user_settings.CLEAN = True
806 if args.test:
807 user_settings.UNITTEST = True
808 if args.mathjax:
809 user_settings.MATHJAX = True
810
811# ----------------------------------------------------------------------------
812# Base class which describes my web site overall.
813# ----------------------------------------------------------------------------
814
815class WebSite(object):
816 """
817 Abstract class used for analyzing both local and remote (ftp server) websites.
818 Contains the web-walking functions which traverse the directory structures and files.
819 These will be overloaded in the subclasses with differently specialized methods for either walking a disk drive directory with ls commands or else walking a remote directory with FTP commands.
820 Child classes may define additional functions which only they need.
821 """
822
823 def __init__(self, settings):
824 """Set up root directories"""
825
826 # Import the user settings.
827 self.user_settings = settings
828
829 # Queue keeps track of directories not yet processed.
830 self.queue = []
831
832 # List of all directories traversed.
833 self.directories = []
834
835 # List of files traversed, with file information.
836 self.files = []
837
838 # Find out the root directory and go there.
839 self.root_dir = self.get_root_dir()
840 self.go_to_root_dir(self.root_dir)
841
842 # This is a Python decorator which says get_current_year is a class function. And so there is no self first argument, and you can call it without creating an
843 # instance of this class. Call it from anywhere, inside or outside the class, using WebSite.get_current_year(). You could just create a global function instead.)
844 @staticmethod
845 def get_current_year():
846 """Get the current year."""
847 return int(time.gmtime()[0])
848
849 @staticmethod
850 def get_current_two_digit_year():
851 """Get the last two digits of the current year."""
852 return WebSite.get_current_year() % 100
853
854 @staticmethod
855 def is_file_info_type(file_info):
856 """Check if we have a file information structure or merely a simple file name."""
857 try:
858 if isinstance(file_info, list):
859 return True
860 elif isinstance(file_info, str):
861 return False
862 else:
863 logging.error("is_file_info_type found a bad type. Aborting...")
864 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ")
865 except TypeError as detail:
866 logging.error(f"is_file_info_type found a bad type {str(detail):s}. Aborting...")
867 raise UpdateWebException("Internal error for file type. See the log file for details. Aborting... ") from detail
868
869 def get_root_dir(self):
870 """Subclass: Put code here to get the root directory"""
871 return ""
872
873 def go_to_root_dir(self, root_dir):
874 """Subclass: Put code here to go to the root directory"""
875 pass # Pythons's do-nothing statement.
876
877 def one_level_down(self, d):
878 """Subclass: Fill in with a method which returns a list of the
879 directories and files immediately beneath dir"""
880 return [], []
881
882 def walk(self, d, type_of_tree_search=TreeWalkSettings.BREADTH_FIRST_SEARCH):
883 """Walk a directory in either depth first or breadth first order. BFS is the default."""
884
885 # Get all subfiles and subdirectories off this node.
886 subdirectories, subfiles = self.one_level_down(d)
887
888 # Add all the subfiles in order.
889 for f in subfiles:
890
891 name = self.strip_root(f)
892 logging.debug(f"Webwalking: Adding file {name[self.user_settings.FILE_NAME]:s} to list.")
893
894 # Some files are private so skip them from consideration.
895 pat = re.compile(self.user_settings.FILE_TO_SKIP)
896
897 if pat.search(name[self.user_settings.FILE_NAME]):
898 logging.warning(
899 f"Webwalking: Skipping private file {name[self.user_settings.FILE_NAME]:s}")
900 # Don't upload the log file due to file locking problems.
901 elif name[self.user_settings.FILE_NAME].find(self.user_settings.LOGFILENAME) >= 0:
902 logging.debug(f"Webwalking: Skipping log file {name[self.user_settings.FILE_NAME]:s}")
903 # File size limit on some servers.
904 else:
905 self.files.append(name)
906
907 # Queue up the subdirectories.
908 for d in subdirectories:
909 # Some directories are private such as .git or just temporary file
910 # caches so skip them from consideration.
911 pat = re.compile(self.user_settings.DIR_TO_SKIP)
912 if pat.search(d):
913 logging.warning(f"Webwalking: Skipping private dir {d:s}")
914 else:
915 logging.debug(f"Webwalking: Pushing dir {d:s} on the queue.")
916 self.queue.append(d)
917
918 # Search through the directories.
919 while len(self.queue) > 0:
920 # For breadth first search, remove from beginning of queue.
921 if type_of_tree_search == TreeWalkSettings.BREADTH_FIRST_SEARCH:
922 d = self.queue.pop(0)
923
924 # For depth first search, remove from end of queue.
925 elif type_of_tree_search == TreeWalkSettings.DEPTH_FIRST_SEARCH:
926 d = self.queue.pop()
927 else:
928 d = self.queue.pop(0)
929
930 name = self.strip_root(d)
931 logging.debug(f"Webwalking: Adding relative directory {name:s} to list, full path = {d:s}.")
932 self.directories.append(name)
933
934 self.walk(d)
935
936 def strip_root(self, file_info):
937 """Return a path, but strip off the root directory"""
938
939 root = self.root_dir
940
941 # Extract the file name.
942 if self.is_file_info_type(file_info):
943 name = file_info[self.user_settings.FILE_NAME]
944 else:
945 name = file_info
946
947 # e.g. root = / and name = /Art/foo.txt yields stripped_path = Art/foo.txt
948 # but root = /Sean and name = /Sean/Art/foo.txt yields stripped_path =
949 # Art/foo.txt
950 lenroot = len(root)
951 if root == self.user_settings.DEFAULT_ROOT_DIR:
952 pass
953 else:
954 lenroot = lenroot + 1
955
956 stripped_path = name[lenroot:]
957
958 if self.is_file_info_type(file_info):
959 # Update the file name only.
960 return [stripped_path,
961 file_info[self.user_settings.FILE_TYPE],
962 file_info[self.user_settings.FILE_DATE_TIME],
963 file_info[self.user_settings.FILE_SIZE]]
964 else:
965 return stripped_path
966
967 def append_root_dir(self, root_dir, name):
968 """Append the root directory to a path"""
969
970 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
971 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
972 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
973 return root_dir + name
974 else:
975 return root_dir + "/" + name
976
977 def scan(self):
978 """Scan the directory tree recursively from the root"""
979 logging.debug(f"Webwalking: Beginning recursive directory scan from root directory {self.root_dir:s}")
980 self.walk(self.root_dir)
981
982 def modtime(self, f):
983 """Subclass: Get file modification time"""
984 pass
985
986 def finish(self):
987 """Quit web site"""
988 logging.debug(f"Finished with WebSite object of class {type(self)}")
989 pass
990
991# ----------------------------------------------------------------------------
992# Subclass which knows about the local web site on disk.
993# ----------------------------------------------------------------------------
994
995class LocalWebSite(WebSite):
996 """Walk the local web directory on local disk down from the root.
997 Clean up temporary files and do other cleanup work."""
998
999 def __init__(self, settings):
1000 """Go to web page root and list all files and directories."""
1001
1002 # Initialize the parent class.
1003 WebSite.__init__(self, settings)
1004
1005 self.root_dir = self.get_root_dir()
1006 logging.debug(f"LocalWebSite.__init__(): \tRoot directory: {self.root_dir:s}")
1007
1008 def get_root_dir(self):
1009 """Get the name of the root directory"""
1010 return self.user_settings.local_root_dir
1011
1012 def go_to_root_dir(self, root_dir):
1013 """Go to the root directory"""
1014
1015 # Go to the root directory.
1016 logging.debug(f"LocalWebSite.go_to_root_dir(): \tchdir to root directory: {root_dir:s}")
1017 os.chdir(root_dir)
1018
1019 # Read it back.
1020 self.root_dir = os.getcwd()
1021 logging.debug(f"LocalWebSite.go_to_root_dir(): \tgetcwd root directory: {self.root_dir:s}")
1022
1023 def one_level_down(self, d):
1024 """List all files and subdirectories in the current directory, dir. For files, collect file info
1025 such as time, date and size."""
1026
1027 directories = []
1028 files = []
1029
1030 # Change to current directory.
1031 os.chdir(d)
1032
1033 # List all subdirectories and files.
1034 dir_list = os.listdir(d)
1035
1036 if dir_list:
1037 for line in dir_list:
1038 # Add the full path prefix from the root.
1039 name = self.append_root_dir(d, line)
1040 logging.debug(f"LocalWebSite.one_level_down(): \tlocal dir or file {name:s}")
1041
1042 # Is it a directory or a file?
1043 if os.path.isdir(name):
1044 directories.append(name)
1045 elif os.path.isfile(name):
1046 # First assemble the file information of name, time/date and size into a list.
1047 # Can index it like an array. For example,
1048 # file_info =
1049 # [ '/WebDesign/EquationImages/equation001.png', -- The file name.
1050 # 1, -- Enum type FileType.FILE = 1.
1051 # datetime.datetime(2010, 2, 3, 17, 15), -- UTC encoded in a date/time class.
1052 # 4675] -- File size in bytes.
1053 file_info = [name,
1054 FileType.FILE,
1055 self.get_file_date_time(name),
1056 self.get_file_size(name)]
1057 files.append(file_info)
1058
1059 # Sort the names into order.
1060 if directories:
1061 directories.sort()
1062 if files:
1063 files.sort()
1064
1065 return directories, files
1066
1067 @staticmethod
1068 def get_file_date_time(file_name):
1069 """Get a local file time and date in UTC."""
1070
1071 file_epoch_time = os.path.getmtime(file_name)
1072 file_time_utc = time.gmtime(file_epoch_time)[0: 6]
1073 # Create a datetime class from the UTC year, month, day, hour, minute, seconds.
1074 d = datetime.datetime(file_time_utc[0], file_time_utc[1], file_time_utc[2], file_time_utc[3], file_time_utc[4], file_time_utc[5])
1075 return d
1076
1077 @staticmethod
1078 def get_file_size(file_name):
1079 """Get file size in bytes."""
1080 return os.path.getsize(file_name)
1081
1082 @staticmethod
1083 def clean_up_temp_file(temp_file_name, file_name, changed):
1084 """Remove the original file, rename the temporary file name to the original name.
1085 If there are no changes, just remove the temporary file.
1086 """
1087
1088 if changed:
1089 # Remove the old file now that we have the rewritten file.
1090 try:
1091 os.remove(file_name)
1092 logging.debug(f"Changes were made. Removed original file {file_name:s}")
1093 except OSError as detail:
1094 logging.error(f"Cannot remove old file {file_name:s}: {str(detail):s}. Need to remove it manually.")
1095
1096 # Rename the new file to the old file name.
1097 try:
1098 os.rename(temp_file_name, file_name)
1099 logging.debug(f"Renamed temp file {temp_file_name:s} to original file {file_name:s}")
1100 except OSError as detail:
1101 logging.error(f"Cannot rename temporary file {temp_file_name:s} to old file name {file_name:s}: {str(detail):s}. Need to rename manually")
1102 else:
1103 # No changes? Remove the temporary file.
1104 try:
1105 os.remove(temp_file_name)
1106 logging.debug(f"No changes were made. Removed temporary file {temp_file_name:s}")
1107 except OSError as detail:
1108 logging.error(f"Cannot remove temporary file {temp_file_name:s}: {str(detail):s}. Need to remove it manually.")
1109 return
1110
1111 @staticmethod
1112 def process_lines_of_file(in_file_name, out_file_name, process_line_function_list=None):
1113 """
1114 Process each line of a file with a list of functions. Create a new temporary file.
1115
1116 The default list is None which means make an exact copy.
1117 """
1118
1119 # Assume no changes.
1120 changed = False
1121
1122 # Open both input and output files for processing. Check if we cannot do it.
1123 fin = None
1124 try:
1125 fin = open(in_file_name, "r")
1126 except IOError as detail:
1127 logging.error(f"process_lines_of_file(): \tCannot open file {in_file_name:s} for reading: {str(detail):s} Aborting...")
1128 if fin is not None:
1129 fin.close()
1130 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1131 fout = None
1132 try:
1133 fout = open(out_file_name, "w")
1134 except IOError as detail:
1135 logging.error(f"process_lines_of_file(): \tCannot open file {out_file_name:s} for writing: {str(detail):s} Aborting...")
1136 if fout is not None:
1137 fout.close()
1138 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1139
1140 # Read each line of the file, aborting if there is a read error.
1141 try:
1142 line = fin.readline()
1143
1144 # Rewrite the next line of the file using all the rewrite functions.
1145 while line:
1146 original_line = line
1147 # If we have one or more rewrite functions...
1148 if process_line_function_list is not None:
1149 # ...apply each rewrite functions to the line, one after the other in order.
1150 for processLineFunction in process_line_function_list:
1151 if processLineFunction is not None:
1152 line = processLineFunction(line)
1153
1154 if original_line != line:
1155 logging.debug(f"Rewrote the line: >>>{original_line:s}<<< into >>>{line:s}<<< for file {in_file_name:s}")
1156 changed = True
1157
1158 fout.write(line)
1159
1160 line = fin.readline()
1161
1162 fin.close()
1163 fout.close()
1164 except IOError as detail:
1165 logging.error(f"File I/O error during reading/writing file {in_file_name:s} in process_lines_of_file: {str(detail):s} Aborting...")
1166 raise UpdateWebException("Internal error for processing a file. See the log file for details. Aborting... ") from detail
1167
1168 if changed:
1169 logging.debug(f"process_lines_of_file(): \tRewrote original file {in_file_name:s}."
1170 f"Changes are in temporary copy {out_file_name:s}")
1171
1172 # Return True if any lines were changed.
1173 return changed
1174
1175 def clean(self):
1176 """Scan through all directories and files in the local on disk website and clean them up."""
1177
1178 num_source_files_changed = 0
1179 num_source_files_syntax_highlighted = 0
1180
1181 logging.debug("Cleaning up the local web page.")
1182
1183 if self.directories is None or self.files is None:
1184 logging.error("Web site has no directories or files. Aborting...")
1185 raise UpdateWebException("Internal error for cleaning up the local web site. See the log file for details. Aborting... ")
1186
1187 for d in self.directories:
1188
1189 if self.is_temp_dir(d):
1190 # Add the full path prefix from the root.
1191 name = self.append_root_dir(self.get_root_dir(), d)
1192 try:
1193 logging.debug(f"Removing temp dir {self.root_dir:s} recursively")
1194 shutil.rmtree(name)
1195 except OSError as detail:
1196 logging.error(f"Cannot remove temp dir {name:s}: {str(detail):s}")
1197
1198 for f in self.files:
1199 # Add the full path prefix from the root.
1200 full_file_name = self.append_root_dir(
1201 self.get_root_dir(), f[self.user_settings.FILE_NAME])
1202
1203 # Remove all temporary files.
1204 if self.is_temp_file(f):
1205 try:
1206 logging.debug(f"Removing temp file {full_file_name:s}")
1207 os.remove(full_file_name)
1208 except OSError as detail:
1209 logging.error(f"Cannot remove temp dir {full_file_name:s}: {str(detail):s}")
1210
1211 # Update source code files.
1212 if self.is_source_or_hypertext_file(f):
1213 changed = self.rewrite_source_file(full_file_name)
1214 if changed:
1215 num_source_files_changed += 1
1216 logging.debug(f"Rewrote source code file {self.root_dir:s}")
1217
1218 # Generate a syntax highlighted code listing.
1219 # Make it the same time and date as the original code. Then, only if there are recent changes, we will update the remote server.
1220 if self.is_file_to_syntax_highlight(f):
1221 # syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name, dry_run=True)
1222 syntax_highlighted_file_name = self.create_syntax_highlighted_code_listing(full_file_name)
1223 if syntax_highlighted_file_name is not None:
1224 logging.debug(f"Generated a syntax highlighted source listing file {syntax_highlighted_file_name:s} for the file {full_file_name:s}")
1225 else:
1226 logging.debug(f"Failed to generate a syntax highlighted source listing file for {full_file_name:s}")
1227 num_source_files_syntax_highlighted += 1
1228
1229 logging.debug(f"Number of source files rewritten = {num_source_files_changed:10d}")
1230 logging.debug(f"Number of source files syntax highlighted = {num_source_files_syntax_highlighted:10d}")
1231
1232 def is_temp_file(self, file_info):
1233 """Identify a file name as a temporary file"""
1234
1235 file_name = file_info[self.user_settings.FILE_NAME]
1236
1237 # Suffixes and names for temporary files be deleted.
1238 pat = self.user_settings.TEMP_FILE_SUFFIXES
1239 match = pat.search(file_name)
1240 # Remove any files containing twiddles anywhere in the name.
1241 if match or file_name.find(self.user_settings.VIM_TEMP_FILE_EXT) >= 0:
1242 return True
1243
1244 return False
1245
1246 def is_temp_dir(self, dir_name):
1247 """Identify a name as a temporary directory."""
1248
1249 p = self.user_settings.TEMP_DIR_SUFFIX
1250 return p.search(dir_name)
1251
1252 def is_source_or_hypertext_file(self, file_info):
1253 """ Check if the file name is a source file or a hypertext file."""
1254
1255 file_name = file_info[self.user_settings.FILE_NAME]
1256 p1 = self.user_settings.SOURCE_FILE_PATTERN
1257 p2 = self.user_settings.HYPERTEXT_FILE_PATTERN
1258 if p1.search(file_name) or p2.search(file_name):
1259 return True
1260 else:
1261 return False
1262
1263 def is_file_to_syntax_highlight(self, file_info):
1264 """Check if this file type should have a syntax highlighted source listing."""
1265
1266 # Take apart the file name.
1267 full_file_name = file_info[self.user_settings.FILE_NAME]
1268 file_name = Path(full_file_name).name
1269
1270 p = self.user_settings.FILE_TO_HIGHLIGHT_PATTERN
1271 if p.search(file_name):
1272 return True
1273 else:
1274 return False
1275
1276 def rewrite_substring(self, line):
1277 """Rewrite a line containing a pattern of your choice"""
1278
1279 # Start with the original unchanged line.
1280 rewritten_line = line
1281
1282 # Do the replacements in order from first to last.
1283 for match_replace_tuple in self.user_settings.STRING_REPLACEMENT_LIST:
1284 # Get the next pattern match replacement string tuple.
1285 [pat, rep_string] = match_replace_tuple
1286 # Does it match? Then do string substitution, else leave the line unchanged.
1287 match = pat.search(rewritten_line)
1288 if match:
1289 # Now we have these cases:
1290 # -No capture variables at all, but just a straightforward pattern match followed by a string substitution.
1291 # -One or more capture variable names in the pattern (?P<varname> ... ) along with the same corresponding match group names in replacement string \\g<varname> ...
1292 # If pat.sub() finds any inconsistency here such as the capture variable names not matching the group names, it will throw an exception.
1293 try:
1294 sub = pat.sub(rep_string, rewritten_line)
1295 rewritten_line = sub
1296 except IndexError as detail:
1297 logging.error(f"ERROR: {str(detail):s}. Did not find a capture variable name in the pattern (?P<varname> ... ) along with its corresponding match group name in replacement string \\g<varname> in updateweb.yaml. Did not rewrite the line |{rewritten_line:s}|")
1298
1299 return rewritten_line
1300
1301 def rewrite_email_address_line(self, line):
1302 """Rewrite lines containing old email addresses."""
1303
1304 # Search for the old email address.
1305 pat = self.user_settings.OLD_EMAIL_ADDRESS
1306 match = pat.search(line)
1307
1308 # Replace the old address with my new email address.
1309 if match:
1310 new_address = self.user_settings.NEW_EMAIL_ADDRESS
1311 sub = pat.sub(new_address, line)
1312 line = sub
1313
1314 return line
1315
1316 def rewrite_copyright_line(self, line):
1317 """Rewrite copyright lines if they are out of date."""
1318
1319 # Match the lines,
1320 # Copyright (C) nnnn-mmmm by Sean Erik O'Connor.
1321 # Copyright © nnnn-mmmm by Sean Erik O'Connor.
1322 # and pull out the old year and save it.
1323 pat = self.user_settings.COPYRIGHT_LINE
1324 match = pat.search(line)
1325
1326 # Found a match.
1327 if match:
1328 old_year = int(match.group('old_year'))
1329
1330 # Replace the old year with the current year.
1331 # We matched and extracted the old copyright symbol into the variable
1332 # 'symbol' using the pattern syntax (?P<symbol> \(C\) | ©)
1333 # We now insert it back by placing the special syntax \g<symbol>
1334 # into the replacement string.
1335 if old_year < WebSite.get_current_year():
1336 new_copyright = r"Copyright \g<symbol> \g<old_year>-" + str(WebSite.get_current_year()) + " by Sean Erik"
1337 sub = pat.sub(new_copyright, line)
1338 line = sub
1339 return line
1340
1341 def rewrite_last_update_line(self, line):
1342 """Rewrite the Last Updated line if the year is out of date."""
1343
1344 # Match the last updated line and pull out the year.
1345 # last updated 01 Jan 25.
1346 p = self.user_settings.LAST_UPDATED_LINE
1347 m = p.search(line)
1348
1349 if m:
1350 last_update_year = int(m.group('year'))
1351
1352 # Convert to four digit years.
1353 if last_update_year > 90:
1354 last_update_year += 1900
1355 else:
1356 last_update_year += 2000
1357
1358 # If the year is old, rewrite to "01 Jan <current year>".
1359 if last_update_year < WebSite.get_current_year():
1360 two_digit_year = self.user_settings.TWO_DIGIT_YEAR_FORMAT % self.get_current_two_digit_year()
1361 sub = p.sub('last updated 01 Jan ' + two_digit_year, line)
1362 line = sub
1363
1364 return line
1365
1366 def rewrite_source_file(self, file_name):
1367 """Rewrite copyright lines, last updated lines, etc."""
1368 changed = False
1369
1370 # Create a new temporary file name for the rewritten file.
1371 temp_file_name = file_name + self.user_settings.TEMP_FILE_EXT
1372
1373 # Apply changes to all lines of the temporary file. Apply change functions in
1374 # the sequence listed.
1375 if self.process_lines_of_file(file_name, temp_file_name,
1376 [self.rewrite_copyright_line,
1377 self.rewrite_last_update_line,
1378 self.rewrite_email_address_line,
1379 self.rewrite_substring]):
1380 logging.debug(f"Changed (rewritten) source file {file_name:s}")
1381 changed = True
1382
1383 # Rename the temporary file to the original file name. If no changes, just delete the temp file.
1384 self.clean_up_temp_file(temp_file_name, file_name, changed)
1385
1386 return changed
1387
1388 @staticmethod
1389 def create_syntax_highlighted_code_listing(source_file_name, **kwargs):
1390 """Create a syntax highlighted source listing for the file and return its name. Return None if there is an error.
1391 Keep the same date/time as the original file."""
1392
1393 # kwargs is a dictionary for key, value in kwargs.items():
1394 # for key, value in kwargs.items():
1395 # if key in kwargs:
1396 # print( f"kwargs:" )
1397 # print( f" key = |{key}|")
1398 # print( f" value = |{value}|" )
1399 dry_run_value = kwargs.get('dry_run')
1400 dry_run = False
1401 if dry_run_value is not None and dry_run_value is True:
1402 dry_run = True
1403
1404 # Take apart the file name.
1405 file_name_without_extension = Path(source_file_name).stem
1406 file_extension = Path(source_file_name).suffix
1407
1408 # Append *.html to the source code file name. This will be the syntax highlighted code listing.
1409 syntax_highlighted_file_name = f"{source_file_name}.html"
1410
1411 # In the special case of Jupyter notebooks, use the Jupyter to HTML converter.
1412 if file_extension == ".ipynb":
1413 if dry_run:
1414 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1415 return None
1416 # Python manual recommends using the run() command instead of Popen(). See https://docs.python.org/3/library/subprocess.html#subprocess.run
1417 try:
1418 shell_command = f"jupyter nbconvert {source_file_name} --to html --output {syntax_highlighted_file_name}"
1419 # Throw an exception if we can't run the process.
1420 # Capture the standard output and standar error and dump to /dev/null so it doesn't print to the command line when running this script.
1421 # Since the shell command is a single string, use shell=True in the run() command.
1422 subprocess.run([shell_command],shell=True,check=True,stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL)
1423 except subprocess.CalledProcessError as detail:
1424 logging.error(f"Cannot convert the Jupyter file {source_file_name:s} to a syntax highlighted file: {str(detail):s} Aborting...")
1425 return None
1426 # Otherwise, use the Pygments syntax highlighter.
1427 else:
1428 # First choose the language lexer from the file name itself if there's no extension.
1429 # Dotted file names are treated as the entire file name.
1430 match file_name_without_extension:
1431 case "makefile":
1432 lexer = MakefileLexer()
1433 case ".bash_profile"|".bashrc"|".bash_logout":
1434 lexer = BashLexer()
1435 case ".vimrc":
1436 lexer = VimLexer()
1437 case ".gitignore_global" | ".gitignore" | ".gitconfig":
1438 lexer = OutputLexer() # No formatting.
1439 case _:
1440 # Choose the language lexer from the file extension. Web stuff first, then programming languages.
1441 match file_extension:
1442 case ".html":
1443 lexer = HtmlLexer()
1444 case ".css":
1445 lexer = CssLexer()
1446 case ".js":
1447 lexer = JavascriptLexer()
1448 case ".sh":
1449 lexer = BashLexer()
1450 case ".py":
1451 lexer = PythonLexer()
1452 case ".c" | ".h":
1453 lexer = CLexer()
1454 case ".hpp" | ".cpp":
1455 lexer = CppLexer()
1456 case ".lsp":
1457 lexer = CommonLispLexer()
1458 case ".for" | ".FOR" | ".f":
1459 lexer = FortranFixedLexer() # Fixed format FORTRAN, not FORTRAN 90.
1460 case ".txt" | ".dat": # Generic data file; no formatting.
1461 lexer = OutputLexer()
1462 case ".tex":
1463 lexer = TexLexer() # LaTeX, TeX, or related files.
1464 case ".m":
1465 lexer = MatlabLexer()
1466 case ".yaml":
1467 lexer = YamlLexer()
1468 case _:
1469 logging.error(f"Can't find a lexer for file {source_file_name}. Cannot generate a syntax highlighted source listing. Aborting...")
1470 return None
1471
1472 # Read the source code file into a single string.
1473 try:
1474 with open(source_file_name, 'r') as fp:
1475 source_file_string = fp.read()
1476 except OSError as detail:
1477 logging.error(f"Cannot read the source code file {source_file_name:s} for syntax highlighting: {str(detail):s} Aborting...")
1478
1479 # Top level Pygments function generates the HTML for the highlighted code.
1480 highlighted_html_source_file_string = highlight(source_file_string, lexer, HtmlFormatter(linenos="inline"))
1481
1482 # The style sheet is always the same for all languages.
1483 style_sheet = HtmlFormatter().get_style_defs('.highlight')
1484
1485 # Write out the syntax colored file.
1486 if dry_run:
1487 logging.debug(f"Dry run only: don't generate the syntax highlighted file {syntax_highlighted_file_name:s}")
1488 return None
1489 else:
1490 try:
1491 # Write out the highlighted code listing in HTML with CSS style sheet attached.
1492 with open(syntax_highlighted_file_name, 'w') as fp:
1493 fp.write(UserSettings.BASIC_HTML_BEGIN)
1494 fp.write(style_sheet)
1495 fp.write(UserSettings.BASIC_HTML_MIDDLE)
1496 fp.write(highlighted_html_source_file_string)
1497 fp.write(UserSettings.BASIC_HTML_END)
1498 except OSError as detail:
1499 logging.error(f"Cannot write the syntax highlighted file {syntax_highlighted_file_name:s}: {str(detail):s} Aborting...")
1500 # ------- end Pygments syntax highlighter
1501
1502 # Set the syntax highlighted code file to the same modification and access time and date as the source file.
1503 file_stat = os.stat(source_file_name)
1504 os.utime(syntax_highlighted_file_name, (file_stat[stat.ST_ATIME], file_stat[stat.ST_MTIME]))
1505
1506 # Are the original source and the syntax highlighted code the same data and time?
1507 dates_and_times_source_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1508 dates_and_times_syntax_highlighted_file_name = LocalWebSite.get_file_date_time(syntax_highlighted_file_name)
1509 if dates_and_times_source_file_name != dates_and_times_syntax_highlighted_file_name:
1510 logging.error(f"Source code and syntax highlighted source don't have the same times. source time = {dates_and_times_source_file_name.ctime():s} syntax highlighted time = {dates_and_times_syntax_highlighted_file_name.ctime():s} Aborting...")
1511 return None
1512
1513 logging.debug(f"Generated a syntax highlighted listing {syntax_highlighted_file_name:s} for the source code file {source_file_name:s} with the same time and date = {dates_and_times_source_file_name.ctime():s}")
1514 return syntax_highlighted_file_name
1515
1516# ----------------------------------------------------------------------------
1517# Subclass which knows about the remote web site.
1518# ----------------------------------------------------------------------------
1519
1520class RemoteWebSite(WebSite):
1521 """Walk the remote web directory on a web server down from the root.
1522 Use FTP commands:
1523 https://en.wikipedia.org/wiki/List_of_FTP_commands
1524 Use the Python ftp library:
1525 https://docs.python.org/3/library/ftplib.html
1526 """
1527
1528 def __init__(self, user_settings):
1529 """Connect to FTP server and list all files and directories."""
1530
1531 # Root directory of FTP server.
1532 self.root_dir = user_settings.FTP_ROOT_NAME
1533 logging.debug(f"Set the remote web site ftp root dir = {self.root_dir:s}")
1534
1535 # Connect to FTP server and log in.
1536 try:
1537 # self.ftp.set_debuglevel( 2 )
1538 # print( f"\nTrying ftp login to server name = {user_settings.SERVER_NAME} user name = {user_settings.USER_NAME} password = {user_settings.PASSWORD_NAME}\n")
1539 self.ftp = ftplib.FTP(user_settings.SERVER_NAME)
1540 self.ftp.login(user_settings.USER_NAME, user_settings.PASSWORD_NAME)
1541 # Catch all exceptions with the parent class Exception: all built-in,
1542 # non-system-exiting exceptions are derived from this class.
1543 except Exception as detail:
1544 # Extract the string message from the exception class with str().
1545 logging.error(f"Remote web site cannot login to ftp server: {str(detail):s} Aborting...")
1546 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1547 else:
1548 logging.debug("Remote web site ftp login succeeded.")
1549
1550 logging.debug(f"Remote web site ftp welcome message {self.ftp.getwelcome():s}")
1551
1552 # Initialize the superclass.
1553 WebSite.__init__(self, user_settings)
1554
1555 def go_to_root_dir(self, root_dir):
1556 """Go to the root directory"""
1557
1558 try:
1559 # Go to the root directory.
1560 self.ftp.cwd(root_dir)
1561 logging.debug(f"ftp root directory (requested) = {self.root_dir:s}")
1562
1563 # Read it back.
1564 self.root_dir = self.ftp.pwd()
1565 logging.debug(f"ftp root directory (read back from server): {self.root_dir:s}")
1566
1567 except Exception as detail:
1568 logging.error(f"go_to_root_dir(): \tCannot ftp cwd or pwd root dir {root_dir:s} {str(detail):s} Aborting...")
1569 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1570
1571 def get_root_dir(self):
1572 """Get the root directory name"""
1573
1574 return self.root_dir
1575
1576 def finish(self):
1577 """Quit remote web site"""
1578 logging.debug(f"Finished with WebSite object of class {type(self)}")
1579 try:
1580 self.ftp.quit()
1581 except Exception as detail:
1582 logging.error(f"Cannot ftp quit: {str(detail):s}")
1583
1584 def one_level_down(self, d):
1585 """List files and directories in a subdirectory using ftp"""
1586
1587 directories = []
1588 files = []
1589
1590 try:
1591 # ftp listing from current dir.
1592 logging.debug(f"RemoteWebSite.one_level_down(): \tftp cwd: {d:s}")
1593 self.ftp.cwd(d)
1594 dir_list = []
1595
1596 # Use the nonstandard -a option in LIST to show all the hidden .* files.
1597 # But now we have the problem that . and .. (the UNIX current and parent directories) will be in the ftp list of directories.
1598 # Note the second argument requires a callback function.
1599 self.ftp.retrlines('LIST -a', dir_list.append)
1600
1601 except Exception as detail:
1602 logging.error(f"one_level_down(): \tCannot ftp cwd or ftp LIST dir {d:s}: {str(detail):s} Aborting...")
1603 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1604
1605 for line in dir_list:
1606 logging.debug(f"RemoteWebSite.one_level_down(): \tftp LIST: {line:s}")
1607
1608 # Line should at least have the minimum FTP information.
1609 if len(line) >= self.user_settings.MIN_FTP_LINE_LENGTH:
1610 # Parse the FTP LIST and put the pieces into file_info.
1611 file_info = self.parse_ftp_list(line)
1612 logging.debug(f"RemoteWebSite.one_level_down(): \tftp parsed file information: {file_info[self.user_settings.FILE_NAME]:s}")
1613
1614 # Skip over the UNIX hidden files for current and parent directories . and .. Also skip over any NULL file names.
1615 if file_info[self.user_settings.FILE_NAME] == "" or file_info[self.user_settings.FILE_NAME] == "." or file_info[self.user_settings.FILE_NAME] == "..":
1616 logging.debug(f"RemoteWebSite.one_level_down(): \tftp skipping the file name: {file_info[self.user_settings.FILE_NAME]:s}")
1617 pass
1618 # For a directory, prefix the full path prefix from the root to the directory name and add to the directory list.
1619 elif file_info[self.user_settings.FILE_TYPE] == FileType.DIRECTORY:
1620 dirname = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1621 logging.debug(f"RemoteWebSite.one_level_down(): \tftp dir (full path): {dirname:s}")
1622 directories.append(dirname)
1623 # For a file: Add the full path prefix from the root to the file name.
1624 else:
1625 file_info[self.user_settings.FILE_NAME] = self.append_root_dir( d, file_info[self.user_settings.FILE_NAME])
1626 logging.debug(f"RemoteWebSite.one_level_down(): \tftp file (full path):\
1627 {file_info[self.user_settings.FILE_NAME]:s}")
1628 files.append(file_info)
1629 else:
1630 logging.error(f"RemoteWebSite.one_level_down(): \tFTP LIST line is too short: {line:s}")
1631
1632 directories.sort()
1633 files.sort()
1634
1635 return directories, files
1636
1637 def modtime(self, f):
1638 """Get the modification time of a file via ftp. Return 0 if ftp cannot get it."""
1639 modtime = 0
1640
1641 try:
1642 response = self.ftp.sendcmd('MDTM ' + f)
1643 # MDTM returns the last modified time of the file in the format
1644 # "213 YYYYMMDDhhmmss \r\n <error-response>
1645 # MM is 01 to 12, DD is 01 to 31, hh is 00 to 23, mm is 00 to 59, ss is 0 to 59.
1646 # error-response is 550 for info not available, and 500 or 501 if command cannot
1647 # be parsed.
1648 if response[:3] == '213':
1649 modtime = response[4:]
1650 except ftplib.error_perm as detail:
1651 logging.error(f"Cannot get file modification time from the ftp server: {str(detail):s} Aborting...")
1652 modtime = 0
1653
1654 return modtime
1655
1656 def parse_ftp_list(self, line):
1657 """Parse the ftp file listing and return file name, datetime and file size.
1658
1659 An FTP LIST command will give output which looks like this for a file:
1660
1661 -rw-r--r-- 1 1000 free 4084 Jul 18 16:55 sparkCoil.png
1662
1663 and for a directory:
1664
1665 drwxr-xr-x 2 1000 free 4096 Jul 18 16:36 ReadingList
1666
1667 FTP uses UTC for its listings; the conversion to local time is done by the OS.
1668 We can have problems on New Year's Eve. For example, the local file date/time is
1669
1670 Mon Jan 1 06:23:12 2018
1671
1672 But the remote file date/time from FTP listing doesn't show a year even though we
1673 know it was written to the server in 2017.
1674
1675 Mon Dec 31 03:02:00
1676
1677 So we default the remote file year to current year 2018 and get
1678
1679 Mon Dec 31 03:02:00 2018
1680
1681 Now we think that the remote file is newer by 363.860278 days.
1682 """
1683
1684 # Find out if we've a directory or a file.
1685 if line[0] == 'd':
1686 dir_or_file = FileType.DIRECTORY
1687 else:
1688 dir_or_file = FileType.FILE
1689
1690 pattern = self.user_settings.FTP_LISTING
1691
1692 # Sensible defaults.
1693 filesize = 0
1694 filename = ""
1695 # Default the time to midnight.
1696 hour = 0
1697 minute = 0
1698 seconds = 0
1699 # Default the date to Jan 1 of the current year.
1700 month = 1
1701 day = 1
1702 year = WebSite.get_current_year()
1703
1704 # Extract time and date from the ftp listing.
1705 match = pattern.search(line)
1706
1707 if match:
1708 filesize = int(match.group('bytes'))
1709 month = self.user_settings.monthToNumber[match.group('mon')]
1710 day = int(match.group('day'))
1711
1712 # Remote file listing contains the year. The FTP listing will omit the hour and minute.
1713 if match.group('year'):
1714 year = int(match.group('year'))
1715 logging.debug(f"ftp has year = {year} but is probably missing hour and minute")
1716 else:
1717 # Remote file listing omits the year. Default the year to the current UTC time year.
1718 # That may be incorrect (see comments above).
1719 year = WebSite.get_current_year()
1720 logging.debug(f"ftp is missing the year; use the current year = {year}")
1721
1722 # If the FTP listing has the hour and minute, it will omit the year.
1723 if match.group('hour') and match.group('min'):
1724 hour = int(match.group('hour'))
1725 minute = int(match.group('min'))
1726 logging.debug(f"ftp has hour = {hour} and minute = {minute} so is probably missing the year")
1727
1728 filename = match.group('filename')
1729
1730 # Package up the time and date nicely.
1731 # Note if we didn't get any matches, we'll default the remote date and
1732 # time to Jan 1 midnight of the current year.
1733 d = datetime.datetime(year, month, day, hour, minute, seconds)
1734
1735 return [filename, dir_or_file, d, filesize]
1736
1737# ----------------------------------------------------------------------------
1738# Class for synchronizing local and remote web sites.
1739# ----------------------------------------------------------------------------
1740
1741class UpdateWeb(object):
1742 """Given previously scanned local and remote directories, update the remote website."""
1743
1744 def __init__(
1745 self,
1746 user_settings,
1747 local_directory_list,
1748 local_file_info,
1749 remote_directory_list,
1750 remote_file_info):
1751 """Connect to remote site. Accept previously scanned local and remote files and directories."""
1752
1753 # Initialize from args.
1754 self.user_settings = user_settings
1755 self.local_directory_list = local_directory_list
1756 self.remote_directory_list = remote_directory_list
1757 self.local_file_info = local_file_info
1758 self.remote_file_info = remote_file_info
1759
1760 # Initialize defaults.
1761 self.local_files_list = []
1762 self.remote_files_list = []
1763 self.local_file_to_size = {}
1764 self.local_file_to_date_time = {}
1765 self.remote_file_to_date_time = {}
1766 self.local_only_dirs = []
1767 self.local_only_files = []
1768 self.remote_only_dirs = []
1769 self.remote_only_files = []
1770 self.common_files = []
1771
1772 # Connect to FTP server and log in.
1773 try:
1774 self.ftp = ftplib.FTP(self.user_settings.SERVER_NAME)
1775 self.ftp.login(self.user_settings.USER_NAME, self.user_settings.PASSWORD_NAME)
1776 except Exception as detail:
1777 logging.error(f"Cannot login to ftp server: {str(detail):s} Aborting...")
1778 raise UpdateWebException("Problem accessing remote web site. See the log file for details. Aborting... ") from detail
1779 else:
1780 logging.debug("ftp login succeeded.")
1781
1782 logging.debug(f"ftp server welcome message: {self.ftp.getwelcome():s}")
1783
1784 # Local root directory.
1785 self.local_root_dir = self.user_settings.local_root_dir
1786 logging.debug(f"Local root directory: {self.local_root_dir:s}")
1787
1788 # Root directory of FTP server.
1789 self.ftp_root_dir = self.user_settings.FTP_ROOT_NAME
1790 logging.debug(f"ftp root directory (requested) = {self.ftp_root_dir:s}")
1791
1792 # Transform KB string to integer bytes. e.g. "200" => 2048000
1793 self.file_size_limit = int(self.user_settings.FILE_SIZE_LIMIT_NAME) * 1024
1794
1795 try:
1796 # Go to the root directory.
1797 self.ftp.cwd(self.ftp_root_dir)
1798
1799 # Read it back.
1800 self.ftp_root_dir = self.ftp.pwd()
1801 logging.debug(f"ftp root directory (read back from server): {self.ftp_root_dir:s}")
1802 except Exception as detail:
1803 logging.error(f"UpdateWeb(): \tCannot ftp cwd or ftp LIST dir {self.ftp_root_dir:s} {str(detail):s} Aborting...")
1804
1805 def append_root_dir(self, root_dir, name):
1806 """Append the root directory to a path"""
1807
1808 # e.g. root = /, and name = Art/foo.txt yields /Art/foo.txt
1809 # but root = /Sean, and name = Art/foo.txt yields /Sean/Art/foo.txt
1810 if root_dir == self.user_settings.DEFAULT_ROOT_DIR:
1811 return root_dir + name
1812 else:
1813 return root_dir + "/" + name
1814
1815 def file_info(self):
1816 """Create lists of file names from the file information. Also create dictionaries which map file names onto
1817 dates, times, and sizes."""
1818
1819 # Extract file names.
1820 self.local_files_list = [
1821 file_info[self.user_settings.FILE_NAME] for file_info in self.local_file_info]
1822 self.remote_files_list = [
1823 file_info[self.user_settings.FILE_NAME] for file_info in self.remote_file_info]
1824
1825 # Use a dictionary comprehension to create key/value pairs,
1826 # (file name, file date/time)
1827 # which map file names onto date/time.
1828 self.local_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.local_file_info}
1829 self.remote_file_to_date_time = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_DATE_TIME] for file_info in self.remote_file_info}
1830
1831 # Dictionary comprehension creates a mapping of local file names onto file sizes.
1832 self.local_file_to_size = {file_info[self.user_settings.FILE_NAME]: file_info[self.user_settings.FILE_SIZE] for file_info in self.local_file_info}
1833
1834 def update(self):
1835 """Scan through the local website, cleaning it up.
1836 Go to remote website on my servers and synchronize all files."""
1837
1838 self.file_info()
1839
1840 # Which files and directories are different.
1841 self.changes()
1842
1843 # Synchronize with the local web site.
1844 self.synchronize()
1845
1846 def changes(self):
1847 """Find the set of different directories and files on local and remote."""
1848
1849 # Add all directories which are only on local to the dictionary.
1850 dir_to_type = {
1851 d: FileType.ON_LOCAL_ONLY for d in self.local_directory_list}
1852
1853 # Scan through all remote directories, adding those only on remote or
1854 # on both.
1855 for d in self.remote_directory_list:
1856 if d in dir_to_type:
1857 dir_to_type[d] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1858 else:
1859 dir_to_type[d] = FileType.ON_REMOTE_ONLY
1860
1861 # Add all files which are only on local to the dictionary.
1862 file_to_type = {
1863 f: FileType.ON_LOCAL_ONLY for f in self.local_files_list}
1864
1865 # Scan through all remote files, adding those only on remote or on
1866 # both.
1867 for f in self.remote_files_list:
1868 if f in file_to_type:
1869 file_to_type[f] = FileType.ON_BOTH_LOCAL_AND_REMOTE
1870 else:
1871 file_to_type[f] = FileType.ON_REMOTE_ONLY
1872
1873 logging.debug("Raw dictionary dump of directories")
1874 for k, v in dir_to_type.items():
1875 logging.debug(f"\t dir: {str(k):s} type: {str(v):s}")
1876
1877 logging.debug("Raw dictionary dump of files")
1878 for k, v in file_to_type.items():
1879 logging.debug(f"\t file: {str(k):s} type: {str(v):s}")
1880
1881 # List of directories only on local. Keep the ordering.
1882 self.local_only_dirs = [
1883 d for d in self.local_directory_list if dir_to_type[d] == FileType.ON_LOCAL_ONLY]
1884
1885 # List of directories only on remote. Keep the ordering.
1886 self.remote_only_dirs = [
1887 d for d in self.remote_directory_list if dir_to_type[d] == FileType.ON_REMOTE_ONLY]
1888
1889 # We don't care about common directories, only their changed files, if
1890 # any.
1891
1892 # List of files only on local. Keep the ordering.
1893 self.local_only_files = [
1894 f for f in self.local_files_list if file_to_type[f] == FileType.ON_LOCAL_ONLY]
1895
1896 # List of files only on remote. Keep the ordering.
1897 self.remote_only_files = [
1898 f for f in self.remote_files_list if file_to_type[f] == FileType.ON_REMOTE_ONLY]
1899
1900 # List of common files on both local and remote. Keep the ordering.
1901 self.common_files = [
1902 f for f in self.local_files_list if file_to_type[f] == FileType.ON_BOTH_LOCAL_AND_REMOTE]
1903
1904 logging.debug("*** Directories only on local ******************************")
1905 for d in self.local_only_dirs:
1906 logging.debug(f"\t {d:s}")
1907
1908 logging.debug("*** Directories only on remote ******************************")
1909 for d in self.remote_only_dirs:
1910 logging.debug(f"\t {d:s}")
1911
1912 logging.debug("*** Files only on local ******************************")
1913 for f in self.local_only_files:
1914 logging.debug(f"\t {f:s}")
1915
1916 logging.debug("*** Files only on remote ******************************")
1917 for f in self.remote_only_files:
1918 logging.debug(f"\t {f:s}")
1919
1920 logging.debug("*** Common files ******************************")
1921 for f in self.common_files:
1922 logging.debug(f"name {f:s}")
1923 logging.debug(f"\tlocal time {self.local_file_to_date_time[f].ctime():s}")
1924 logging.debug(f"\tremote time {self.remote_file_to_date_time[f].ctime():s}")
1925
1926 def synchronize(self):
1927 """Synchronize files and subdirectories in the remote directory with the local directory."""
1928
1929 # If we have the same files in local and remote, compare their times
1930 # and dates.
1931 for f in self.common_files:
1932 local_file_time = self.local_file_to_date_time[f]
1933 remote_file_time = self.remote_file_to_date_time[f]
1934
1935 # What's the time difference?
1936 time_delta = remote_file_time - local_file_time
1937 # How much difference, either earlier or later?
1938 seconds_different = abs(time_delta.total_seconds())
1939 minutes_different = seconds_different / 60.0
1940 hours_different = minutes_different / 60.0
1941 days_different = hours_different / 24.0
1942
1943 # Assume no upload initially.
1944 upload_to_host = False
1945
1946 logging.debug(f"Common file: {f:s}.")
1947
1948 # Remote file time is newer.
1949 # Allow 200 characters
1950 # Mathematics/AbstractAlgebra/PrimitivePolynomials/Project/Build/PrimpolyXCode/Primpoly/Primpoly.xcodeproj/project.xcworkspace/xcuserdata/seanoconnor.xcuserdatad/UserInterfaceState.xcuserstate
1951
1952 if remote_file_time > local_file_time:
1953 # Remote file time is MUCH newer: suspect time is out of joint on the server, so upload local local file to be safe.
1954 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD:
1955 logging.error(f"Remote file {f:s} is MUCH newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_REMOTE_BEFORE_UPLOAD} minutes]. Upload the file to be safe.")
1956 logging.error(f"\tlocal time {local_file_time.ctime():s}")
1957 logging.error(f"\tremote time {remote_file_time.ctime():s}")
1958
1959 # Set the local file to the current time.
1960 full_file_name = self.append_root_dir(
1961 self.local_root_dir, f)
1962 if os.path.exists(full_file_name):
1963 # Change the access and modify times of the file to the current time.
1964 os.utime(full_file_name, None)
1965 logging.error(f"Touching local file {full_file_name:s} to make it the current time")
1966
1967 upload_to_host = True
1968 # Remote file time is newer, but not by much. Let's just assume a slight time mismatch on the server. Don't upload.
1969 else:
1970 logging.warning(f"Remote file {f:s} is only SLIGHTLY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1971 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1972 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1973 upload_to_host = False
1974
1975 # Local file time is newer.
1976 elif local_file_time > remote_file_time:
1977 # Local file time slightly newer than the remote file. So we are pretty sure the local file really got changed vs the server file.
1978 if minutes_different >= self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD:
1979 logging.warning(f"Local file {f:20s} is SLIGHTLY newer by {minutes_different:8.1f} minutes [which exceeds the threshold = {self.user_settings.MINUTES_NEWER_FOR_LOCAL_BEFORE_UPLOAD} minutes]. Uploading to remote server.")
1980 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1981 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1982 upload_to_host = True
1983 else:
1984 logging.warning(f"Local file {f:20s} is BARELY newer by {seconds_different:8.1f} seconds. Probably just inaccurate time/date on the server. Wait -- don't upload the file yet.")
1985 logging.warning(f"\tlocal time {local_file_time.ctime():s}")
1986 logging.warning(f"\tremote time {remote_file_time.ctime():s}")
1987 upload_to_host = False
1988
1989 # Cancel the upload if the file is too big for the server.
1990 size = self.local_file_to_size[f]
1991 if size >= self.file_size_limit:
1992 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d}; too large for server, limit is {self.file_size_limit:d} bytes")
1993 upload_to_host = False
1994
1995 # Finally do the file upload.
1996 if upload_to_host:
1997 logging.debug(f"Uploading changed file {f:s}")
1998 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
1999 print(f"Uploading changed file {f:s}... ", end='', flush=True)
2000 self.upload(f)
2001
2002 # Remote directory is not in local. Delete it.
2003 for d in self.remote_only_dirs:
2004 logging.debug(f"Deleting remote only directory {d:s}")
2005 print(f"Deleting remote only directory {d:s}... ", end='', flush=True)
2006 self.rmdir(d)
2007
2008 # Local directory missing on remote. Create it.
2009 # Due to breadth first order scan, we'll create parent directories
2010 # before child directories.
2011 for d in self.local_only_dirs:
2012 logging.debug(f"Only on local. Creating new remote dir {d:s}.")
2013 print(f"Creating new remote directory {d:s}... ", end='', flush=True)
2014 self.mkdir(d)
2015
2016 # Local file missing on remote. Upload it.
2017 for f in self.local_only_files:
2018 logging.debug(f"Local only file. Uploading {f:s} to remote.")
2019
2020 # But cancel the upload if the file is too big for the server.
2021 size = self.local_file_to_size[f]
2022 if size >= self.file_size_limit:
2023 logging.error(f"upload(): Skipping upload of file {f:s} of size {size:d};"
2024 f" too large for server, limit is {self.file_size_limit:d} bytes")
2025 else:
2026 logging.debug(f"Uploading new file {f:s}")
2027 print(f"Uploading new file {f:s}... ", end='', flush=True)
2028 self.upload(f)
2029
2030 # Remote contains a file not present on the local. Delete the file.
2031 for f in self.remote_only_files:
2032 logging.debug(f"Remote only file. Deleting remote file {f:s}.")
2033 print(f"Deleting remote file {f:s}... ", end='', flush=True)
2034 self.del_remote(f)
2035
2036 def del_remote(self, relative_file_path):
2037 """Delete a file using ftp."""
2038
2039 logging.debug(f"del_remote(): \trelative file path name: {relative_file_path:s}")
2040
2041 # Parse the relative file path into file name and relative directory.
2042 relative_dir, file_name = os.path.split(relative_file_path)
2043 logging.debug(f"del_remote(): \tfile name: {file_name:s}")
2044 logging.debug(f"del_remote(): \trelative dir: {relative_dir:s}")
2045 logging.debug(f"del_remote(): \tremote root dir: {self.ftp_root_dir:s}")
2046
2047 try:
2048 # Add the remote root path and go to the remote directory.
2049 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2050 logging.debug(f"del_remote(): \tftp cd remote dir: {remote_dir:s}")
2051 self.ftp.cwd(remote_dir)
2052 except Exception as detail:
2053 logging.error(f"del_remote(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2054 else:
2055 try:
2056 logging.debug(f"del_remote(): \tftp rm: {file_name:s}")
2057
2058 # Don't remove zero length file names.
2059 if len(file_name) > 0:
2060 self.ftp.delete(file_name)
2061 else:
2062 logging.warning(
2063 "fdel_remote(): skipping ftp delete; file NAME {file_name:s} had zero length")
2064 except Exception as detail:
2065 logging.error(f"del_remote(): \tCannot ftp rm: {str(detail):s}")
2066
2067 def mkdir(self, relative_dir):
2068 """Create new remote directory using ftp."""
2069
2070 logging.debug(f"mkdir(): \trelative dir path name: {relative_dir:s}")
2071 logging.debug(f"mkdir(): \tremote root dir: {self.ftp_root_dir:s}")
2072
2073 # Parse the relative dir path into prefix dir and suffix dir.
2074 path, d = os.path.split(relative_dir)
2075 logging.debug(f"mkdir(): \tremote prefix dir: {path:s}")
2076 logging.debug(f"mkdir(): \tremote dir: {d:s}")
2077
2078 try:
2079 # Add the remote root path and go to the remote directory.
2080 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2081 logging.debug(f"mkdir(): \tftp cd remote dir: {remote_dir:s}")
2082 self.ftp.cwd(remote_dir)
2083 except Exception as detail:
2084 logging.error(f"mkdir(): \tCannot ftp chrdir: {str(detail):s} Skipping...")
2085 else:
2086 try:
2087 logging.debug(f"mkdir(): \tftp mkd: {d:s}")
2088 self.ftp.mkd(d)
2089 except Exception as detail:
2090 logging.error(f"mkdir(): \tCannot ftp mkdir: {str(detail):s}")
2091
2092 def rmdir(self, relative_dir):
2093 """Delete an empty directory using ftp."""
2094
2095 logging.debug(f"rmdir(): \tintermediate dir path name: {relative_dir:s}")
2096 logging.debug(f"rmdir(): \tremote root dir: {self.ftp_root_dir:s}")
2097
2098 # Parse the relative dir path into prefix dir and suffix dir.
2099 path, d = os.path.split(relative_dir)
2100 logging.debug(f"rmdir(): \tremote prefix dir: {path:s}")
2101 logging.debug(f"rmdir(): \tremote dir: {d:s}")
2102
2103 try:
2104 # Add the remote root path and go to the remote directory.
2105 remote_dir = self.append_root_dir(self.ftp_root_dir, path)
2106 logging.debug(f"rmdir(): \tftp cd remote dir: {remote_dir:s}")
2107 self.ftp.cwd(remote_dir)
2108 except Exception as detail:
2109 logging.error(f"rmdir(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2110 else:
2111 try:
2112 logging.debug(f"rmdir(): \tftp rmd: {d:s}")
2113 self.ftp.rmd(d)
2114 except Exception as detail:
2115 logging.error(f"rmdir(): \tCannot ftp rmdir dir {d:s}: {str(detail):s}. Directory is probably not empty. Do a manual delete.")
2116
2117 def download(self, relative_file_path):
2118 """Download a binary file using ftp."""
2119
2120 logging.debug(f"download(): \tfile name: {relative_file_path:s}")
2121
2122 # Parse the relative file path into file name and relative directory.
2123 relative_dir, file_name = os.path.split(relative_file_path)
2124 logging.debug(f"download(): \tfile name: {file_name:s}")
2125 logging.debug(f"download(): \trelative dir: {relative_dir:s}")
2126 logging.debug(f"download(): \troot dir: {self.ftp_root_dir:s}")
2127
2128 # Add the remote root path and go to the remote directory.
2129 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2130 logging.debug(f"download(): \tftp cd remote dir: {remote_dir:s}")
2131
2132 try:
2133 self.ftp.cwd(remote_dir)
2134 except Exception as detail:
2135 logging.error(f"download(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2136 else:
2137 # Add the local root path to get the local file name.
2138 # Open local binary file to write into.
2139 local_file_name = self.append_root_dir(
2140 self.local_root_dir, relative_file_path)
2141 logging.debug(f"download(): \topen local file name: {local_file_name:s}")
2142 try:
2143 f = open(local_file_name, "wb")
2144 try:
2145 # Calls f.write() on each block of the binary file.
2146 # ftp.retrbinary( "RETR " + file_name, f.write )
2147 pass
2148 except Exception as detail:
2149 logging.error(f"download(): \tCannot cannot ftp retrbinary: {str(detail):s}")
2150 f.close()
2151 except IOError as detail:
2152 logging.error(f"download(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2153
2154 def upload(self, relative_file_path):
2155 """Upload a binary file using ftp."""
2156
2157 logging.debug(f"upload(): \trelative file path name: {relative_file_path:s}")
2158
2159 # Parse the relative file path into file name and relative directory.
2160 relative_dir, file_name = os.path.split(relative_file_path)
2161 logging.debug(f"upload(): \tfile name: {file_name:s}")
2162 logging.debug(f"upload(): \trelative dir: {relative_dir:s}")
2163 logging.debug(f"upload(): \tremote root dir: {self.ftp_root_dir:s}")
2164
2165 # Add the remote root path and go to the remote directory.
2166 remote_dir = self.append_root_dir(self.ftp_root_dir, relative_dir)
2167 logging.debug(f"upload(): \tftp cd remote dir: {remote_dir:s}")
2168
2169 try:
2170 self.ftp.cwd(remote_dir)
2171 except Exception as detail:
2172 logging.error(f"upload(): \tCannot ftp chdir: {str(detail):s} Skipping...")
2173 else:
2174 # Add the local root path to get the local file name.
2175 # Open local binary file to read from.
2176 local_file_name = self.append_root_dir(
2177 self.local_root_dir, relative_file_path)
2178 logging.debug(f"upload(): \topen local file name: {local_file_name:s}")
2179
2180 try:
2181 f = open(local_file_name, "rb")
2182 try:
2183 # f.read() is called on each block of the binary file until
2184 # EOF.
2185 logging.debug(f"upload(): \tftp STOR file {file_name:s}")
2186 self.ftp.storbinary("STOR " + file_name, f)
2187 except Exception as detail:
2188 logging.error(f"upload(): \tCannot ftp storbinary: {str(detail):s}")
2189 f.close()
2190 except IOError as detail:
2191 logging.error(f"upload(): \tCannot open local file {local_file_name:s} for reading: {str(detail):s}")
2192
2193 def finish(self):
2194 """Log out of an ftp session"""
2195 logging.debug(f"Finished with UpdateWeb object of class {type(self)}")
2196 try:
2197 self.ftp.quit()
2198 except Exception as detail:
2199 logging.error(f"Cannot ftp quit because {str(detail):s}")
2200
2201# ----------------------------------------------------------------------------
2202# Main function
2203# ----------------------------------------------------------------------------
2204
2205def main(raw_args=None):
2206 """Main program. Clean up and update my website."""
2207
2208 # Print the obligatory legal notice.
2209 print("""
2210 updateweb Version 7.3 - A Python utility program which maintains my web site.
2211 Copyright (C) 2007-2025 by Sean Erik O'Connor. All Rights Reserved.
2212
2213 It deletes temporary files, rewrites old copyright lines and email address
2214 lines in source files, then synchronizes all changes to my web sites.
2215
2216 updateweb comes with ABSOLUTELY NO WARRANTY; for details see the
2217 GNU General Public License. This is free software, and you are welcome
2218 to redistribute it under certain conditions; see the GNU General Public
2219 License for details.
2220 """)
2221
2222 # Put ALL the main code into a try block!
2223 try:
2224 # ---------------------------------------------------------------------
2225 # Load default settings and start logging.
2226 # ---------------------------------------------------------------------
2227
2228 # Default user settings.
2229 user_settings = UserSettings()
2230
2231 print( f"Running main( {raw_args} ) Python version\
2232 {sys.version_info[0]:d}.{sys.version_info[1]:d}.{sys.version_info[2]:d}\
2233 local web directory\
2234 {user_settings.local_root_dir}\n")
2235 # Get command line options such as --verbose. Pass them back as flags in
2236 # user_settings.
2237 CommandLineSettings(user_settings, raw_args)
2238
2239 # Load all unit test functions named test_* from UnitTest class, run the tests and exit.
2240 if user_settings.UNITTEST:
2241 suite = unittest.TestLoader().loadTestsFromTestCase(UnitTest)
2242 unittest.TextTestRunner(verbosity=2).run(suite)
2243 # We are done!
2244 print(" ...done!", flush=True)
2245 return
2246
2247 # Start logging to file. Verbose turns on logging for
2248 # DEBUG, INFO, WARNING, ERROR, and CRITICAL levels,
2249 # otherwise we log only WARNING, ERROR, and CRITICAL levels.
2250 if user_settings.VERBOSE:
2251 loglevel = logging.DEBUG
2252 else:
2253 loglevel = logging.WARNING
2254
2255 # Pick the log file name on the host.
2256 if user_settings.CLEAN:
2257 user_settings.LOGFILENAME = "/private/logLocal.txt"
2258 else:
2259 user_settings.LOGFILENAME = "/private/logRemote.txt"
2260
2261 # Default is to skip processing or uploading MathJax files in /mathjax to the server.
2262 if not user_settings.MATHJAX:
2263 user_settings.DIR_TO_SKIP += "|mathjax"
2264 else:
2265 print(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files... ", end='', flush=True)
2266 print(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections... ", end='', flush=True)
2267 logging.debug(f"Processing and uploading mathjax files. git restore any changed files and git clean -f to remove extra files.")
2268 logging.debug(f"Since you are loading MathJax for the first time --- Also don't forget to upload .htaccess manually. If using FileZilla, change your FreeServer settings: Files->Site Manager->Transfer Settings->Limit number of simultaneous connections->Check the box. This avoids ERROR 421 Too many connections...")
2269
2270 logging.basicConfig(
2271 level=loglevel,
2272 format='%(asctime)s %(levelname)-8s %(message)s',
2273 datefmt='%a, %d %b %Y %H:%M:%S',
2274 filename=user_settings.local_root_dir + user_settings.LOGFILENAME,
2275 filemode='w')
2276
2277 logging.debug("********** Begin logging")
2278
2279 # ---------------------------------------------------------------------
2280 # Scan the local website, finding out all files and directories.
2281 # ---------------------------------------------------------------------
2282
2283 # Suppress newline to keep the message to the console more compact. Flush output buffer, so we can see the message right away.
2284 print(f"Scanning the local web site from the root dir = {user_settings.local_root_dir}... ", end='', flush=True)
2285 logging.debug(f"========================== Scanning the local web site from the root dir = {user_settings.local_root_dir}")
2286
2287 local = LocalWebSite(user_settings)
2288 local.scan()
2289
2290 # ---------------------------------------------------------------------
2291 # Clean up local website.
2292 # ---------------------------------------------------------------------
2293
2294 # Clean up the directory by rewriting source code and hypertext and removing temporary files.
2295 print("Cleaning local web site... ", end='', flush=True)
2296 logging.debug("========================== Cleaning the local web site")
2297 local.clean()
2298
2299 # We are done with the first scan of the local web site and will dispose of it.
2300 local.finish()
2301 del local
2302
2303 # ---------------------------------------------------------------------
2304 # Rescan the local website since there will be changes to source
2305 # files from the clean up stage.
2306 # ---------------------------------------------------------------------
2307
2308 print(f"Rescan the local web site from root dir = {user_settings.local_root_dir}", end='', flush=True)
2309 logging.debug(f"========================== Re-Scan the local web site from root dir = {user_settings.local_root_dir}")
2310
2311 local = LocalWebSite(user_settings)
2312
2313 local.scan()
2314
2315 # ---------------------------------------------------------------------
2316 # List all the local directories and files and their sizes.
2317 # ---------------------------------------------------------------------
2318
2319 # Local website directories.
2320 local_directory_list = local.directories
2321 logging.debug("********** List of all the Local Directories")
2322 for d in local_directory_list:
2323 logging.debug(f"\t {d:s}")
2324
2325 # Generate lists of the local website filenames only, and their sizes in byteskjjjj
2326 local_files_name_size_pairs = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in local.files]
2327 total_number_of_files = len( local_files_name_size_pairs )
2328 logging.debug(f"********** List of all the Local Files from largest to smallest. There are {total_number_of_files:15d} files.")
2329 local_files_name_size_pairs = sorted(local_files_name_size_pairs, key=lambda name_size: name_size[1], reverse=True)
2330
2331 # Local website filenames only, and their dates and times.
2332 local_file_datetime_pairs = [[file_info[user_settings.FILE_NAME],file_info[user_settings.FILE_DATE_TIME]] for file_info in local.files]
2333 logging.debug(f"********** List of all Local Files Showing Their Date and Time")
2334 for file_datetime_pair in local_file_datetime_pairs:
2335 logging.debug(f"\t {file_datetime_pair[1].ctime():s} UTC {file_datetime_pair[0]:s}")
2336
2337 # Total number of bytes in the local files.
2338 total_number_of_bytes = 0
2339 for file_size_pair in local_files_name_size_pairs:
2340 logging.debug(f"\t {file_size_pair[1]:10d} bytes {file_size_pair[0]:s}")
2341 total_number_of_bytes += file_size_pair[1]
2342 logging.debug(f"********** Total local file size = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB (not counting skipped files and directories)")
2343
2344 local.finish()
2345
2346 if user_settings.CLEAN:
2347 logging.debug("========================== Done with local file and directory cleanup...")
2348 del local
2349 print("...done!", flush=True)
2350 return
2351
2352 # ---------------------------------------------------------------------
2353 # Scan the remote hosted web site.
2354 # ---------------------------------------------------------------------
2355
2356 print("Scanning remote web site...", end='', flush=True)
2357 logging.debug("========================== Scanning the remote web site...")
2358
2359 # Pick which website to update.
2360 logging.debug("Connecting to primary remote site.")
2361 remote = RemoteWebSite(user_settings)
2362 remote.scan()
2363 remote.finish()
2364
2365 # ---------------------------------------------------------------------
2366 # List all the remote server directories and files and their sizes.
2367 # ---------------------------------------------------------------------
2368
2369 remote_directory_list = remote.directories
2370 logging.debug("********** Remote Directories")
2371 for d in remote_directory_list:
2372 logging.debug(f"\t {d:s}")
2373
2374 # Local website filenames only, and their sizes in bytes.
2375 remote_files_name_size_list = [[file_info[user_settings.FILE_NAME], file_info[user_settings.FILE_SIZE]] for file_info in remote.files]
2376 total_number_of_files = len( remote_files_name_size_list )
2377 logging.debug(f"********** Remote Files [num files = {total_number_of_files:15d}]")
2378 remote_files_name_size_list = sorted(remote_files_name_size_list, key=lambda name_size: name_size[1], reverse=True)
2379 total_number_of_bytes = 0
2380 for file_size in remote_files_name_size_list:
2381 logging.debug(f"\t {file_size[1]:10d} bytes {file_size[0]:s}")
2382 total_number_of_bytes += file_size[1]
2383 logging.debug(f"\tTotal file size on remote (not counting skipped files and directories) = {total_number_of_bytes:10d} bytes = {total_number_of_bytes/(1024 ** 2):10.2f} MB")
2384
2385 # ---------------------------------------------------------------------
2386 # Synchronize the local and remote web sites.
2387 # ---------------------------------------------------------------------
2388
2389 print("Synchronizing remote and local web sites...", end='', flush=True)
2390 logging.debug("========================= Synchronizing remote and local web sites...")
2391
2392 # Primary website.
2393 logging.debug("Connecting to primary remote site for synchronization.")
2394 sync = UpdateWeb(user_settings,
2395 local.directories,
2396 local.files,
2397 remote.directories,
2398 remote.files)
2399
2400 sync.update()
2401 sync.finish()
2402
2403 del sync
2404 del remote
2405 del local
2406 print("...done!", flush=True)
2407
2408 except UpdateWebException as detail:
2409 logging.error(f"Couldn't update the web directory: {str(detail):s}. Aborting...")
2410
2411 except RecursionError as detail:
2412 logging.error(f"Walking the directory tree became too deep for Python's recursion stack depth of {sys.getrecursionlimit():d} You can increase it with sys.setrecursionlimit(limit) {str(detail):s}. Aborting...")
2413
2414if __name__ == '__main__':
2415 """Python executes all code in this file. Finally, we come here.
2416
2417 * If we are executing this file as a standalone Python script,
2418 the name of the current module is set to __main__ and thus we'll call the main() function.
2419
2420 * But if we are importing this code as a module, and calling it from another script, we will do this instead:
2421
2422 import updateweb
2423 updateweb.main(["--test"])"""
2424
2425 main()