['Objects']['Basic Usage']
File too large for this context:
https://github.com/samfisherirl/Useful-AHK-v2-Libraries-and-Classes/blob/main/AHKDocumentation.json
Code: Select all
import os
import json
from bs4 import BeautifulSoup
from pathlib import Path
def extract_text(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
extracted = {}
h1 = soup.find('h1')
if h1:
extracted['Title'] = h1.get_text()
sections = soup.find_all(['h2', 'h3', 'h4', 'h5', 'h6'])
for section in sections:
section_title = section.get_text().strip()
next_node = section.next_sibling
section_content = []
while next_node and next_node.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
text = next_node.get_text().strip()
if text:
section_content.append(text)
next_node = next_node.next_sibling
extracted[section_title] = ' '.join(section_content)
return extracted
def process_folder(folder_path):
content_dict = {}
for p in folder_path.rglob("*.htm"):
if p.exists():
with open(str(p), 'r', encoding='utf-8') as file:
html_content = file.read()
content_dict[str(p.name)] = extract_text(html_content)
return content_dict
def write_to_json_file(data, filepath):
with open(filepath, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, ensure_ascii=False, indent=4)
# Set the folder path assuming this py file is in docs folder with documentation locally.
folder_path = Path(__file__).parent.resolve()
output_json_path = 'output.json'
# Process the folder and extract data
data = process_folder(folder_path)
# Write the data to a JSON file
write_to_json_file(data, output_json_path)