add megalinter config. Fix my rubbish code to appease the linter

Signed-off-by: Tim Collins <tim@thecollins.team>
pull/3135/head
Tim Collins 2025-01-24 09:37:23 +00:00
parent 5835ac4e5c
commit c304c37c51
No known key found for this signature in database
GPG Key ID: 09F625E00301ED18
8 changed files with 182 additions and 90 deletions

View File

@ -0,0 +1,21 @@
# Configuration file for MegaLinter
# Run megalinter locally with: `docker run --rm -v /var/run/docker.sock:/var/run/docker.sock:rw -v $(pwd):/tmp/lint:rw oxsecurity/megalinter:v8`
ENABLE:
- DOCKERFILE
- PYTHON
DISABLE_LINTERS:
- PYTHON_FLAKE8
- PYTHON_PYRIGHT
- PYTHON_RUFF
- PYTHON_BANDIT
CLEAR_REPORT_FOLDER: true
PYTHON_PYLINT_PRE_COMMANDS:
- command: pip install -r /tmp/lint/requirements.txt
venv: pylint
REPORT_OUTPUT_FOLDER: none
# You might want to enable this locally to fix some stuff without guessing what you need to change. Check before committing to git.
#APPLY_FIXES: all

View File

@ -5,4 +5,4 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY main.py fetch_releases.py fetch_helmet_releases.py merge_csvs.py plot_graph.py ./
RUN chmod +x main.py fetch_releases.py fetch_helmet_releases.py merge_csvs.py plot_graph.py
CMD [ "python", "./main.py" ]
CMD [ "python", "-u", "./main.py" ]

View File

@ -19,3 +19,9 @@ docker run --rm -e GITHUB_TOKEN=$GITHUB_TOKEN -v ${PWD}:/app team-helm-analysis
```
You should get 3 csvs and 4 graphs once this completes. It takes around 5 mins to run.
## Linting
I ran megalinter against this just to give some confidence that it's not completely broken. It's not perfect, but it's something.
Run megalinter locally against this directory with: `docker run --rm -v /var/run/docker.sock:/var/run/docker.sock:rw -v $(pwd):/tmp/lint:rw oxsecurity/megalinter:v8`

View File

@ -1,36 +1,40 @@
import requests
import csv
from datetime import datetime
import os
from datetime import datetime
import requests
# GitHub repository URL
repo_url = "https://api.github.com/repos/argoproj/argo-helm/releases"
# Get the GitHub token from environment variables
github_token = os.getenv('GITHUB_TOKEN')
github_token = os.getenv("GITHUB_TOKEN")
if not github_token:
raise ValueError("GITHUB_TOKEN environment variable is not set")
# Function to fetch all releases with pagination
def fetch_all_releases(url):
releases = []
headers = {'Authorization': f'token {github_token}'}
headers = {"Authorization": f"token {github_token}"}
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
releases.extend(response.json())
url = response.links.get('next', {}).get('url')
url = response.links.get("next", {}).get("url")
return releases
# Function to get the content of Chart.yaml in a release
def get_chart_yaml(repo, tag, chart_path):
url = f"https://raw.githubusercontent.com/{repo}/refs/tags/{tag}/charts/{chart_path}/Chart.yaml"
headers = {'Authorization': f'token {github_token}'}
headers = {"Authorization": f"token {github_token}"}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
# Function to extract appVersion from Chart.yaml content
def extract_app_version(chart_yaml):
for line in chart_yaml.splitlines():
@ -38,41 +42,49 @@ def extract_app_version(chart_yaml):
return line.split(":")[1].strip()
return None
# Function to fetch releases and write to a CSV file
def fetch_and_write_helmet_releases(csv_file):
# Fetch all releases
releases = fetch_all_releases(repo_url)
# Write the release data to the CSV file
with open(csv_file, mode='w', newline='') as file:
writer = csv.writer(file, quoting=csv.QUOTE_NONE, escapechar='\\')
with open(csv_file, mode="w", newline="") as file:
writer = csv.writer(file, quoting=csv.QUOTE_NONE, escapechar="\\")
writer.writerow(["Release Name", "Release Date", "Release Time", "App Version"])
for release in releases:
tag_name = release['tag_name']
published_at = release['published_at']
tag_name = release["tag_name"]
published_at = release["published_at"]
release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").date()
release_time = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").time()
# Extract chart path from the release name
chart_path = '-'.join(tag_name.split('-')[:-1])
current_chart_yaml = get_chart_yaml("argoproj/argo-helm", tag_name, chart_path)
chart_path = "-".join(tag_name.split("-")[:-1])
current_chart_yaml = get_chart_yaml(
"argoproj/argo-helm", tag_name, chart_path
)
if current_chart_yaml:
current_app_version = extract_app_version(current_chart_yaml)
writer.writerow([tag_name, release_date, release_time, current_app_version])
writer.writerow(
[tag_name, release_date, release_time, current_app_version]
)
# Read the CSV file, remove any instances of `\"`, and write back the cleaned content
with open(csv_file, mode='r') as file:
with open(csv_file, mode="r") as file:
content = file.read()
cleaned_content = content.replace('\\"', '')
cleaned_content = content.replace('\\"', "")
with open(csv_file, mode='w', newline='') as file:
with open(csv_file, mode="w", newline="") as file:
file.write(cleaned_content)
print(f"Release data has been written to {csv_file} and cleaned of any instances of \\\"")
print(
f'Release data has been written to {csv_file} and cleaned of any instances of \\"'
)
# Example usage
if __name__ == "__main__":
fetch_and_write_helmet_releases('argo_helm_releases.csv')
fetch_and_write_helmet_releases("argo_helm_releases.csv")

View File

@ -1,7 +1,8 @@
import requests
import csv
from datetime import datetime
import os
from datetime import datetime
import requests
# List of GitHub repository URLs we care about
repos = [
@ -12,38 +13,45 @@ repos = [
]
# Get the GitHub token from environment variables
github_token = os.getenv('GITHUB_TOKEN')
github_token = os.getenv("GITHUB_TOKEN")
if not github_token:
raise ValueError("GITHUB_TOKEN environment variable is not set")
# Fetch all releases with pagination
def fetch_all_releases(url):
releases = []
headers = {'Authorization': f'token {github_token}'}
headers = {"Authorization": f"token {github_token}"}
while url:
response = requests.get(url, headers=headers)
response.raise_for_status()
releases.extend(response.json())
url = response.links.get('next', {}).get('url')
url = response.links.get("next", {}).get("url")
return releases
# Fetch releases and write to a CSV file
def fetch_and_write_releases(csv_file):
with open(csv_file, mode='w', newline='') as file:
with open(csv_file, mode="w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["Repository", "Release Tag", "Release Date", "Release Time"])
for repo_name, repo_url in repos:
releases = fetch_all_releases(repo_url)
for release in releases:
tag_name = release['tag_name']
published_at = release['published_at']
release_date = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").date()
release_time = datetime.strptime(published_at, "%Y-%m-%dT%H:%M:%SZ").time()
tag_name = release["tag_name"]
published_at = release["published_at"]
release_date = datetime.strptime(
published_at, "%Y-%m-%dT%H:%M:%SZ"
).date()
release_time = datetime.strptime(
published_at, "%Y-%m-%dT%H:%M:%SZ"
).time()
writer.writerow([repo_name, tag_name, release_date, release_time])
print(f"Release data has been written to {csv_file}")
# Example usage
if __name__ == "__main__":
fetch_and_write_releases('argo_releases.csv')
fetch_and_write_releases("argo_releases.csv")

View File

@ -1,30 +1,31 @@
from fetch_releases import fetch_and_write_releases
from fetch_helmet_releases import fetch_and_write_helmet_releases
from merge_csvs import merge_csv_files
from plot_graph import plot_time_difference
import os
from fetch_helmet_releases import fetch_and_write_helmet_releases
from fetch_releases import fetch_and_write_releases
from merge_csvs import merge_csv_files
from plot_graph import plot_time_difference
# Check there is a github token
github_token = os.getenv('GITHUB_TOKEN')
github_token = os.getenv("GITHUB_TOKEN")
if not github_token:
raise ValueError("GITHUB_TOKEN environment variable is not set")
# Do the thing
print("Fetching releases...")
fetch_and_write_releases('argo_releases.csv')
fetch_and_write_releases("argo_releases.csv")
print("Done")
print("Fetching Team Helmet releases...")
fetch_and_write_helmet_releases('argo_helm_releases.csv')
fetch_and_write_helmet_releases("argo_helm_releases.csv")
print("Done")
print("Merging release info...")
merge_csv_files('argo_releases.csv', 'argo_helm_releases.csv', 'merged_releases.csv')
merge_csv_files("argo_releases.csv", "argo_helm_releases.csv", "merged_releases.csv")
print("Done")
print("Plotting time difference graphs...")
plot_time_difference('merged_releases.csv')
plot_time_difference("merged_releases.csv")
print("Done")
# Delete __pycache__ directories
os.system('rm -rf __pycache__')
os.system("rm -rf __pycache__")

View File

@ -1,70 +1,98 @@
import csv
from datetime import datetime
def merge_csv_files(csv_file_1, csv_file_2, output_csv_file):
# Read the first CSV file into a dictionary
releases_1 = {}
with open(csv_file_1, mode='r') as file:
with open(csv_file_1, mode="r") as file:
reader = csv.DictReader(file)
for row in reader:
key = (row['Repository'], row['Release Tag'])
key = (row["Repository"], row["Release Tag"])
releases_1[key] = row
# Read the second CSV file and find the oldest release for each appVersion
oldest_releases = {}
valid_repos = {'argo-cd', 'argo-events', 'argo-workflows', 'argo-rollouts'}
with open(csv_file_2, mode='r') as file:
valid_repos = {"argo-cd", "argo-events", "argo-workflows", "argo-rollouts"}
with open(csv_file_2, mode="r") as file:
reader = csv.DictReader(file)
for row in reader:
release_name = row['Release Name']
repo_name = '-'.join(release_name.split('-')[:-1])
release_name = row["Release Name"]
repo_name = "-".join(release_name.split("-")[:-1])
if repo_name in valid_repos:
app_version = row['App Version']
release_datetime = datetime.strptime(f"{row['Release Date']} {row['Release Time']}", "%Y-%m-%d %H:%M:%S")
if (repo_name, app_version) not in oldest_releases or release_datetime < oldest_releases[(repo_name, app_version)]['datetime']:
app_version = row["App Version"]
release_datetime = datetime.strptime(
f"{row['Release Date']} {row['Release Time']}", "%Y-%m-%d %H:%M:%S"
)
if (
repo_name,
app_version,
) not in oldest_releases or release_datetime < oldest_releases[
(repo_name, app_version)
][
"datetime"
]:
oldest_releases[(repo_name, app_version)] = {
'row': row,
'datetime': release_datetime
"row": row,
"datetime": release_datetime,
}
# Merge the oldest releases with the first CSV file
merged_releases = []
for (repo_name, app_version), data in oldest_releases.items():
row = data['row']
row = data["row"]
for key, release in releases_1.items():
if repo_name == release['Repository'] and app_version == release['Release Tag']:
time_difference = data['datetime'] - datetime.strptime(f"{release['Release Date']} {release['Release Time']}", "%Y-%m-%d %H:%M:%S")
time_difference_hours = time_difference.total_seconds() / 3600 # Convert to hours
if (
repo_name == release["Repository"]
and app_version == release["Release Tag"]
):
time_difference = data["datetime"] - datetime.strptime(
f"{release['Release Date']} {release['Release Time']}",
"%Y-%m-%d %H:%M:%S",
)
time_difference_hours = (
time_difference.total_seconds() / 3600
) # Convert to hours
merged_row = {
'Repository': release['Repository'],
'Release Tag': release['Release Tag'],
'Release Date': release['Release Date'],
'Release Time': release['Release Time'],
'App Version': app_version,
'Release Name': row['Release Name'],
'Release Date 2': row['Release Date'],
'Release Time 2': row['Release Time'],
'Time Difference': time_difference_hours
"Repository": release["Repository"],
"Release Tag": release["Release Tag"],
"Release Date": release["Release Date"],
"Release Time": release["Release Time"],
"App Version": app_version,
"Release Name": row["Release Name"],
"Release Date 2": row["Release Date"],
"Release Time 2": row["Release Time"],
"Time Difference": time_difference_hours,
}
merged_releases.append(merged_row)
break
else:
merged_row = {
'Repository': repo_name,
'Release Tag': '',
'Release Date': '',
'Release Time': '',
'App Version': app_version,
'Release Name': row['Release Name'],
'Release Date 2': row['Release Date'],
'Release Time 2': row['Release Time'],
'Time Difference': ''
"Repository": repo_name,
"Release Tag": "",
"Release Date": "",
"Release Time": "",
"App Version": app_version,
"Release Name": row["Release Name"],
"Release Date 2": row["Release Date"],
"Release Time 2": row["Release Time"],
"Time Difference": "",
}
merged_releases.append(merged_row)
# Write the merged data to a new CSV file
with open(output_csv_file, mode='w', newline='') as file:
fieldnames = ['Repository', 'Release Tag', 'Release Date', 'Release Time', 'App Version', 'Release Name', 'Release Date 2', 'Release Time 2', 'Time Difference']
with open(output_csv_file, mode="w", newline="") as file:
fieldnames = [
"Repository",
"Release Tag",
"Release Date",
"Release Time",
"App Version",
"Release Name",
"Release Date 2",
"Release Time 2",
"Time Difference",
]
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for row in merged_releases:
@ -72,6 +100,9 @@ def merge_csv_files(csv_file_1, csv_file_2, output_csv_file):
print(f"Merged data has been written to {output_csv_file}")
# Example usage
if __name__ == "__main__":
merge_csv_files('argo_releases.csv', 'argo_helm_releases.csv', 'merged_releases.csv')
merge_csv_files(
"argo_releases.csv", "argo_helm_releases.csv", "merged_releases.csv"
)

View File

@ -1,17 +1,24 @@
import csv
import matplotlib.pyplot as plt
from packaging import version
def plot_time_difference(csv_file):
# Read the CSV file and process the data
data = {'argo-cd': [], 'argo-events': [], 'argo-workflows': [], 'argo-rollouts': []}
release_tags = {'argo-cd': [], 'argo-events': [], 'argo-workflows': [], 'argo-rollouts': []}
with open(csv_file, mode='r') as file:
data = {"argo-cd": [], "argo-events": [], "argo-workflows": [], "argo-rollouts": []}
release_tags = {
"argo-cd": [],
"argo-events": [],
"argo-workflows": [],
"argo-rollouts": [],
}
with open(csv_file, mode="r") as file:
reader = csv.DictReader(file)
for row in reader:
repo = row['Repository']
time_diff_str = row['Time Difference']
release_tag = row['Release Tag']
repo = row["Repository"]
time_diff_str = row["Time Difference"]
release_tag = row["Release Tag"]
if repo in data and time_diff_str:
time_diff = float(time_diff_str)
data[repo].append(time_diff)
@ -19,27 +26,33 @@ def plot_time_difference(csv_file):
# Sort the release tags based on semantic versioning
for repo in release_tags:
sorted_indices = sorted(range(len(release_tags[repo])), key=lambda i: version.parse(release_tags[repo][i]))
sorted_indices = sorted(
range(len(release_tags[repo])),
key=lambda i: version.parse(release_tags[repo][i]),
)
release_tags[repo] = [release_tags[repo][i] for i in sorted_indices]
data[repo] = [data[repo][i] for i in sorted_indices]
# Plot the data
for repo, time_diffs in data.items():
plt.figure(figsize=(10, 6))
plt.plot(release_tags[repo], time_diffs, marker='o', label=repo)
plt.axhline(y=72, color='r', linestyle='--', label='SLA (72 hours)')
plt.xlabel('Upstream Release Tag')
plt.ylabel('Time difference between upstream release and Helm Chart release (hours)')
plt.title(f'Time to Release Helm Chart for {repo}')
plt.plot(release_tags[repo], time_diffs, marker="o", label=repo)
plt.axhline(y=72, color="r", linestyle="--", label="SLA (72 hours)")
plt.xlabel("Upstream Release Tag")
plt.ylabel(
"Time difference between upstream release and Helm Chart release (hours)"
)
plt.title(f"Time to Release Helm Chart for {repo}")
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(f'time_difference_plot_{repo}.png')
plt.savefig(f"time_difference_plot_{repo}.png")
plt.close()
print("The plots have been saved as 'time_difference_plot_<repo>.png'")
# Example usage
if __name__ == "__main__":
plot_time_difference('merged_releases.csv')
plot_time_difference("merged_releases.csv")