diff --git a/.gitignore b/.gitignore index 08262bce2a6..129448228d6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,12 @@ hugo.exe /htmltest.exe /.hugo_build.lock - +/_scripts/venv/ +/_scripts/aliasLinkWarnings.log +/_scripts/compareDocsToExcel.log + +/_scripts/~$aliases-list.xlsx + # For Mac users .DS_Store diff --git a/_scripts/aliases-list.xlsx b/_scripts/aliases-list.xlsx new file mode 100644 index 00000000000..b30e1eb4782 Binary files /dev/null and b/_scripts/aliases-list.xlsx differ diff --git a/_scripts/checkAliases-requirements.txt b/_scripts/checkAliases-requirements.txt new file mode 100644 index 00000000000..3a0e96fe45f Binary files /dev/null and b/_scripts/checkAliases-requirements.txt differ diff --git a/_scripts/checkAliases.py b/_scripts/checkAliases.py new file mode 100644 index 00000000000..ad3222baf23 --- /dev/null +++ b/_scripts/checkAliases.py @@ -0,0 +1,249 @@ +""" +checkAliases.py goes through all .md files to grab 'aliases' parameter in front matter +It goes through every .md file within given startDir. +It outputs a JSON list of all files containing aliases. +It outputs a CSV list of all files containing aliases. +It goes through content again to check all cross-reference links against aliases list. +It outputs the aliases in cross-references to warnings.log +""" +import generics +import tableFunctions +import frontmatter +from markdown_it import MarkdownIt +from markdown_it.tree import SyntaxTreeNode +import json +import pathlib + +# Add front matter to list of dict entries +def addItem(post): + # Grabs front matter data + aliases = post.get("aliases") + title = post.get("title") + url = post.get("url") + map = post.get("mapped") + # Creates a dictionary entry for file with aliases + append = False + if url is None: + return + itemDict = {"Title": title, "URL": "docs.mendix.com" + url, "Front matter": "", "aliases": aliases} + + # If 'aliases' exists in front matter + if aliases != None: + # Flags append + append = True + # Each entry of alias in a file gets added to aliasCompare list + for each in aliases: + aliasCompare.append(each) + # If 'mapped' exists in front matter and is true + if map == True: + # Flags append + append = True + # Appends dictionary to aliasList if doc is mapped + itemDict["Front matter"] = "mapped" + # Appends dictionary to aliasList + if append == True: + aliasList.append(itemDict) + +# Checks for the use of aliases in cross reference links +def aliasCheck(post, relDir): + # Calls Markdown class and sets type to GitHub-like + md = MarkdownIt("gfm-like") + # Makes text parsable, won't work with feeding file + text = post.content + # Gives Markdown tokens + tokens = md.parse(text) + # Takes tokens to create a SyntaxTree + node = SyntaxTreeNode(tokens) + + # Walks through all tree nodes + for node in node.walk(): + # If a node is a link type + if node.type == "link": + # Gives back the dict value of node attribute with key 'href' + link = node.attrs['href'] + # Removes any anchor from link + cleanedLink = link.split('#', maxsplit=1) + # If the link contains anything before # + if cleanedLink[0] != '': + # Checks if the link is in the alias list + if cleanedLink[0] in aliasCompare: + # If it is an alias, adds it to log + aliasLogger.warning('Link %s in file %s is an alias, please replace', cleanedLink[0], relDir) + +# Go through .md file types +# Can toggle parsing of front matter +# Can toggle finding aliases in .md body content +def parseMdFile(filePath, frontMatterGrab=True, checkAlias=False): + # creates a relative path from starting directory + # used in logs + relDir = filePath.relative_to(start) + # Opens Markdown file + with open(filePath, mode='r', encoding="utf-8") as file: + # Loads .md file into post via frontmatter module + post = frontmatter.load(file) + # Flag for going through functions that parse front matter + if frontMatterGrab == True: + addItem(post) + # Flag for going through content body to check for aliases used in cross references + if checkAlias == True: + # Calls function that checks aliases in cross references + aliasCheck(post, relDir) + +# Compare two lists and log the difference +# URL value of items in list is used as the first verification step. If URL values don't match the search loop is stopped. +def compareLists(list1: list, list2: list): + # List for logger at end of comparison + diff = [] + # For each item in docs content list + for item in list1: + # Set flags to empty/false for each item + end1Flag = False + list1Msg = "" + title1Flag = False + URL1Flag = False + mapped1Flag = False + alias1Flag = False + # If item URL matches to URL already in diff list, it doesn't need to be checked again + for d1 in diff: + if item["URL"] == d1["URL"]: + end1Flag = True + break + # As long as item URL is not in diff list, the loop below continues + if end1Flag == False: + # For each item in Excel list + for i in list2: + # If the item's URL from docs list and item's URL from Excel list don't match this part is skipped + # If item values match, flags get set to true + if item["URL"] == i["URL"]: + URL1Flag = True + if item["Title"] == i["Title"]: + title1Flag = True + if item["Front matter"] == i["Front matter"]: + mapped1Flag = True + if item["aliases"] == i["aliases"]: + alias1Flag = True + break + # If all flags are false, the entire entry is not present in Excel + # Depending on the flags, text gets added to the warning message + if URL1Flag == False and title1Flag == False and mapped1Flag == False and alias1Flag == False: + list1Msg += "| Entry missing from Excel |" + else: + # Each flag can be true or false separately from others, each is checked + if URL1Flag == False: + list1Msg += "| URL mismatch" + if title1Flag == False: + list1Msg += "| Title mismatch" + if mapped1Flag == False: + list1Msg += "| Front matter mismatch" + if alias1Flag == False: + list1Msg += "| Aliases mismatch" + # If the warning message is not empty, add the item to the diff list + if list1Msg != "": + item["Warning"] = list1Msg + diff.append(item) + # Repeat loop with logic for opposite lists + # For each item in Excel list + for item in list2: + # Set flags to empty/false for each item + end2Flag = False + list2Msg = "" + title2Flag = False + URL2Flag = False + mapped2Flag = False + alias2Flag = False + # If item URL matches to URL already in diff list, it doesn't need to be checked again + for d2 in diff: + if item["URL"] == d2["URL"]: + end2Flag = True + break + # As long as item URL is not in diff list, the loop below continues + if end2Flag == False: + # For each item in docs content list + for i in list1: + # If the item's URL from Excel list and item's URL from docs list don't match this part is skipped + # If item values match, flags get set to true + if item["URL"] == i["URL"]: + URL2Flag = True + if item["Title"] == i["Title"]: + title2Flag = True + if item["Front matter"] == i["Front matter"]: + mapped2Flag = True + if item["aliases"] == i["aliases"]: + alias2Flag = True + break + # If all flags are false, the entire entry is not present in docs content + # Depending on the flags, text gets added to the warning message + if URL2Flag == False and title2Flag == False and mapped2Flag == False and alias2Flag == False: + list2Msg += "| Doc missing in repo |" + else: + # Each flag can be true or false separately from others, each is checked + if URL2Flag == False: + list2Msg += "| URL mismatch " + if title2Flag == False: + list2Msg += "| Title mismatch " + if mapped2Flag == False: + list2Msg += "| Front matter mismatch " + if alias2Flag == False: + list2Msg += "| Aliases mismatch " + # If the warning message is not empty, add the item to the diff list + if list2Msg != "": + item["Warning"] = list2Msg + diff.append(item) + # If the diff list is not empty then log results for each entry in list + result = len(diff) == 0 + if not result: + compareLogger.warning('The lists do not match! There are %d differences:',len(diff)) + for line in diff: + compareLogger.warning('%d. %s', (diff.index(line)+1), line) + +# Grab working directory +# TO DO - the hardcoded link will need changing +startDir = input('Specify FULL PATH to local content directory:\n(For example, C:\\Users\\user.name\\Documents\\docs\\content\\en\\docs\\) ') +start = pathlib.Path(startDir) + +# Empty lists to help with parsing data +aliasList = [] +aliasCompare = [] + +# Intitialize loggers for aliases and comparison +aliasLogger = generics.initLogger('aliasLog', 'aliasLinkWarnings.log') +compareLogger = generics.initLogger('compareLog', 'compareDocsToExcel.log') + +# Walk through all directories and files to find .md files +dirList = generics.dirWalk(start, "**/*.md") + +# For all .md files in dirList parse their front matter +for path in dirList: + parseMdFile(path, frontMatterGrab=True, checkAlias=False) + +# Uncomment and run lines below only if there is no mapping table +populateTablePrompt = input("Do you want to populate the mapping table? (Y/n)") +if populateTablePrompt.lower() == "y": + tableName = input("What is the Excel table name?") + ".xlsx" + tableFunctions.populateExcelFromList(aliasList, tableName) +else: + pass + +# Parse Excel file into managable list +tableToParse = input("Specify FULL PATH and name of Excel table to compare against content\n(For example, C:\\Users\\user.name\\Documents\\mapping-table.xlsx)") +myNewList = tableFunctions.createListFromExcel(tableToParse) + +# Store all docs grabbed and all excel entries into sorted lists +# This is just for testing +docsList = sorted(aliasList, key=lambda x: x['URL'], reverse=False) +excelList = sorted(myNewList, key=lambda x: x['URL'], reverse=False) + +# Writes a list of all aliases as a JSON file (list of dicts) +# Useful for troubleshooting or if one wants to compare the list results in JSON +# with open('listFromExcel.json', 'w') as logfile: +# json.dump(excelList, logfile) +# with open('docsList.json', 'w') as logfile: +# json.dump(docsList, logfile) + +# Compares the lists, logs any differences +compareLists(docsList, excelList) + +# For all .md files in dirList check their text body for aliases instead of proper relative URL in cross references +# Comment out to skip +for path in dirList: + parseMdFile(path, frontMatterGrab=False, checkAlias=True) diff --git a/_scripts/generics.py b/_scripts/generics.py new file mode 100644 index 00000000000..69b30c18f3f --- /dev/null +++ b/_scripts/generics.py @@ -0,0 +1,21 @@ +""" +Module for generic functions that can be reused in other scripts +""" + +import logging + +# Walk through all directories and files of given directory +# globPattern can be used to specify file type, defualts to all directories and files +# Returns a list of path objects +def dirWalk(start, globPattern="**/*"): + dirList = list(start.glob(globPattern)) + return dirList + +# To setup multiple loggers +def initLogger(name, logFile, level=logging.WARNING): + handler = logging.FileHandler(logFile) + handler.setFormatter(logging.Formatter('%(message)s')) + logger = logging.getLogger(name) + logger.setLevel(level) + logger.addHandler(handler) + return logger \ No newline at end of file diff --git a/_scripts/guide-for-checkAliases.md b/_scripts/guide-for-checkAliases.md new file mode 100644 index 00000000000..367fce55314 --- /dev/null +++ b/_scripts/guide-for-checkAliases.md @@ -0,0 +1,28 @@ +## Script Overview + +The script checkAliases.py goes through all of docs content and tries to help with keeping track of aliases. It is meant to do several things: + +1. Combs through all of docs content files. If a doc has `aliases` or `mapped` parameters in their front matter data, it gets added to a list. Each doc in the list has the `title`, `url`, `mapped` and `aliases` parameters saved. +2. Using the list it can create an excel table from this data. This excel table is only meant to be created once. After it is populated for the first time it should be used to replace the current Mapping doc. +3. The script takes an excel file (with columns `title`, `url`, `mapped` and `aliases`) and saves it as a list. +4. It orders both lists based on the URL of the entries. +5. It compares the ordered lists and logs any differences in compareDocsToExcel.log. +6. As a last step it loops through all of content docs again, checking if any alias entry is used in a cross reference link. If so, it's logged in aliasLinkWarnings.log. + +## Assumptions + +The script and its intended use assume the following: + +* The script will initially be run on a local checkout of the repository. +* On the first run an excel table will be generated. This excel table will replace the current Mapping doc. This will be done by porting over any useful information from the Mapping doc into the excel table. +* A new front matter parameter `mapped: true` will be added to docs that are mapped from other Mendix products. The parameter does not need to be present for the initial table creation, but should be present in content files for any subsequent script run. This enables better tracking of mapped files through the repo and the excel table. The idea is to add `mapped: true` to any doc that is mapped, as well as "mapped" to any document entry in the excel table, under column "Front matter". +* After the inital table has been created, the function for its generation can be commented out of the script to skip. +* It is possible to comment out (skip) the check for aliases within cross references. + +## Script Dependencies + +The script has been tested on Python version 3.10.5. + +The package dependencies for the script can be found in file checkAliases-requirements.txt. To install dependencies for running the script via pip run: + +`python3 -m pip install -r checkAliases-requirements.txt` diff --git a/_scripts/tableFunctions.py b/_scripts/tableFunctions.py new file mode 100644 index 00000000000..4786d1b154a --- /dev/null +++ b/_scripts/tableFunctions.py @@ -0,0 +1,88 @@ +import openpyxl + +# Populates a blank excel table with aliases and mapped URLs from .md files +# Should only be run once, after table exists is not needed +def populateExcelFromList(list, table): + # Open excel file + #workbook = openpyxl.load_workbook(filename=table) + workbook = openpyxl.Workbook() + + sheet = workbook.active + + # Go through list + for item in list: + # Sets special row number for first list item + # TO DO - see if there's a better way to set this + if list.index(item) == 0: + sheet.cell(row=1, column=1, value="Title") + sheet.cell(row=1, column=2, value="URL") + sheet.cell(row=1, column=3, value="Front matter") + sheet.cell(row=1, column=4, value="Aliases") + sheet.cell(row=1, column=5, value="Product") + sheet.cell(row=1, column=6, value="First version") + sheet.cell(row=1, column=7, value="Last version") + sheet.cell(row=1, column=8, value="Location in product") + sheet.cell(row=1, column=9, value="Reported to team") + sheet.cell(row=1, column=10, value="Follow up") + sheet.cell(row=1, column=11, value="Notes") + rowNumber = 2 + # For every subsequent list item calculate the row number + else: + rowNumber = rowNumber + 1 + # Put in values from list to cells + scratchedCell = openpyxl.styles.PatternFill("lightGrid",fill_type=None,fgColor="00C0C0C0") + sheet.cell(row=rowNumber, column=1, value=item["Title"]) + sheet.cell(row=rowNumber, column=2, value=item["URL"]) + sheet.cell(row=rowNumber, column=3, value=item["Front matter"]) + sheet.cell(row=rowNumber, column=4, value="").fill = scratchedCell + if item["aliases"] != None: + # Aliases jump to a new blank row + # There can be more than one + for alias in item["aliases"]: + rowNumber = rowNumber + 1 + sheet.cell(row=rowNumber, column=1, value="").fill = scratchedCell + sheet.cell(row=rowNumber, column=2, value="").fill = scratchedCell + sheet.cell(row=rowNumber, column=3, value="").fill = scratchedCell + sheet.cell(row=rowNumber, column=4, value=alias) + # Save the excel file + workbook.save(filename=table) + +# Creates a list for comparison from excel table +# The list matches the format of initial .md file parsing +# which enables comparison of this and the initial list +def createListFromExcel(table): + # Open excel file + workbook = openpyxl.load_workbook(filename=table) + + sheet = workbook.active + startRow = 2 + listFromTable = [] + + # Loop to keep going through rows until the last one is reached + while startRow < sheet.max_row: + + aliases = [] + nextRow = startRow + 1 + # Grab values from cells + title = sheet.cell(row=startRow, column=1).value + url = sheet.cell(row=startRow, column=2).value + mapped = sheet.cell(row=startRow, column=3).value + if mapped == None: + mapped = "" + # Aliases jump to a new blank row + # There can be more than one + checkNext = sheet.cell(row=nextRow, column=1).value + alias = sheet.cell(row=nextRow, column=4).value + while (checkNext == None) and (alias != None): + aliases.append(alias) + nextRow = nextRow + 1 + alias = sheet.cell(row=nextRow, column=4).value + checkNext = sheet.cell(row=nextRow, column=1).value + # Dump all grabbed values into dict for list + itemDict = {"Title": title, "URL": url, "Front matter": mapped, "aliases": aliases} + listFromTable.append(itemDict) + startRow = nextRow + # Save the excel file + workbook.save(filename=table) + # Return the new list + return listFromTable