import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.dates import date2num
from plotnine import *
from datetime import datetime, timedelta
This notebook
* fetches the number of stackoverflow questions per month with the Python
tag from stackoverflow using their API
* plots them
In [7]:
In [3]:
# Set the API URL for fetching data
= "https://api.stackexchange.com/2.3/questions"
base_url
# Set the parameters
= {
params "site": "stackoverflow",
"tagged": "python",
"pagesize": 1,
"fromdate": None,
"todate": None,
"filter": "total",
}
In [4]:
# Get stackoverflow data for the last 96 months
= datetime.now()
current_date = current_date.replace(day=1) - timedelta(days=1)
end_date = end_date - timedelta(days=96*30)
start_date
= pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1), freq='MS')
months = []
data
# Fetch the data from the API
for i in range(len(months) - 1):
"fromdate"] = int(months[i].timestamp())
params["todate"] = int(months[i+1].timestamp())
params[
= requests.get(base_url, params=params)
response = response.json()["total"]
total_questions
"month": months[i].strftime("%Y-%m"), "questions": total_questions})
data.append({
= pd.DataFrame(data) df
In [8]:
= df[df["month"] == "2022-11"].iloc[0]
nov_2022_data set(style="white")
sns."font.family"] = "sans-serif"
plt.rcParams[= plt.subplots(figsize=(7, 3.5))
fig, ax
sns.lineplot(="month", y="questions", data=df, ax=ax, linewidth=2, color="#1f77b4"
x
)
sns.scatterplot(="month", y="questions", data=df, ax=ax, color="#1f77b4", s=20
x
)=nov_2022_data["month"], ymin=0, ymax=1, linestyle="--", color="grey")
ax.axvline(x
ax.annotate("ChatGPT\nrelease",
=(nov_2022_data["month"], nov_2022_data["questions"] + 1000),
xy=(5, 30),
xytext="offset points",
textcoords=dict(arrowstyle="->", color="#3B4252"),
arrowprops="#3B4252",
color
)
= 5
num_labels = len(df["month"]) // (num_labels - 1)
step = sorted(
xticks list(set(df["month"][::step].tolist() + [nov_2022_data["month"]]))[:-1]
)
ax.set_xticks(xticks)=60, fontsize=12)
plt.xticks(rotation5000, 30000)
ax.set_ylim(range(10000, 31000, 10000))
ax.set_yticks(=12)
plt.yticks(fontsize"Month", fontsize=14, fontweight="bold", labelpad=15)
ax.set_xlabel(
ax.set_ylabel("# of questions with python tag", fontsize=14, fontweight="bold", labelpad=15
)="both", colors="grey")
ax.tick_params(axisfor spine in ["bottom", "left"]:
"grey")
ax.spines[spine].set_color(False)
ax.grid(
sns.despine() plt.show()
In [6]:
#df.to_csv("stackoverflow_python_questions.csv", index=False)