208 lines
7.4 KiB
Python
208 lines
7.4 KiB
Python
# To add a new cell, type '# %%'
|
|
# To add a new markdown cell, type '# %% [markdown]'
|
|
# %%
|
|
import requests
|
|
from requests_oauthlib import OAuth2Session
|
|
import os
|
|
import json
|
|
import pandas as pd
|
|
import time
|
|
from datetime import datetime
|
|
|
|
|
|
# %%
|
|
MAXTIME = 365
|
|
RESULT_FOLDER = "results"
|
|
CLIENTID = "hoernschen"
|
|
TOKEN = "8f8f5c00e15f7ae7dc6dcbc38b01015c1c1a51d9"
|
|
FILE = "decentral_communication_protocols.csv"
|
|
|
|
|
|
# %%
|
|
def getGithubOAuthSession():
|
|
token = {
|
|
'access_token': TOKEN
|
|
}
|
|
return OAuth2Session(CLIENTID, token=token)
|
|
|
|
|
|
# %%
|
|
def createCSV(pathToFile, list):
|
|
df = pd.DataFrame(list)
|
|
df.to_csv(pathToFile)
|
|
|
|
|
|
# %%
|
|
def getCommits(repoName, oAuthSession):
|
|
commitList = []
|
|
devList = []
|
|
headers = {'Accept': 'application/vnd.github.mercy-preview+json'}
|
|
commitsResponseJson = None
|
|
i = 0
|
|
commitsTooOld = False
|
|
while commitsResponseJson is None or len(commitsResponseJson) > 0:
|
|
commitsResponse = oAuthSession.get('https://api.github.com/repos/' + repoName + '/commits?page=' + str(i), headers=headers)
|
|
if commitsResponse.status_code == 200:
|
|
commitsResponseJson = commitsResponse.json()
|
|
i = i + 1
|
|
for commit in commitsResponseJson:
|
|
if MAXTIME is not None and (datetime.now() - datetime.strptime(commit['commit']['author']['date'], '%Y-%m-%dT%H:%M:%SZ')).days > MAXTIME:
|
|
commitsTooOld = True
|
|
break
|
|
committer = commit['committer']
|
|
if committer is not None and "login" in committer.keys() and committer['login'] not in devList:
|
|
devList.append(committer['login'])
|
|
commitList.append(commit)
|
|
if commitsTooOld:
|
|
break
|
|
elif commitsResponse.status_code == 409: # empty repo
|
|
break
|
|
else:
|
|
time.sleep(3600)
|
|
print("Error in Http Request (commitsRequest):", commitsResponse.status_code, commitsResponse.text)
|
|
return commitList, devList
|
|
|
|
|
|
# %%
|
|
def getIssues(repoName, oAuthSession):
|
|
issueList = []
|
|
devList = []
|
|
headers = {'Accept': 'application/vnd.github.mercy-preview+json'}
|
|
issuesResponseJson = None
|
|
i = 0
|
|
issuesTooOld = False
|
|
while issuesResponseJson is None or len(issuesResponseJson) > 0:
|
|
issuesResponse = oAuthSession.get('https://api.github.com/repos/' + repoName + '/issues?state=all&page=' + str(i), headers=headers)
|
|
if issuesResponse.status_code == 200:
|
|
i = i + 1
|
|
issuesResponseJson = issuesResponse.json()
|
|
for issue in issuesResponseJson:
|
|
if MAXTIME is not None and (datetime.now() - datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')).days > MAXTIME:
|
|
issuesTooOld = True
|
|
break
|
|
assignee = issue['assignee']
|
|
assignees = issue['assignees']
|
|
if assignee is not None and 'login' in assignee.keys() and assignee['login'] not in devList:
|
|
devList.append(assignee['login'])
|
|
for assignee in assignees:
|
|
if 'login' in assignee.keys() and assignee['login'] not in devList:
|
|
devList.append(assignee['login'])
|
|
issueList.append(issue)
|
|
if issuesTooOld:
|
|
break
|
|
elif issuesResponse.status_code == 409: # empty repo
|
|
break
|
|
else:
|
|
print("Error in Http Request (issuesRequest):", issuesResponse.status_code, issuesResponse.text)
|
|
time.sleep(3600)
|
|
return issueList, devList
|
|
|
|
|
|
# %%
|
|
def getRepoFacts(repoName, oAuthSession):
|
|
devList = []
|
|
issueList = []
|
|
commitList = []
|
|
issueList, devListIssues = getIssues(repoName, oAuthSession)
|
|
commitList, devListCommits = getCommits(repoName, oAuthSession)
|
|
devList.append(devListIssues)
|
|
for dev in devListCommits:
|
|
if dev not in devList:
|
|
devList.append(dev)
|
|
return issueList, commitList, devList
|
|
|
|
|
|
# %%
|
|
def getTopicFacts(topicName):
|
|
amountRepos = 0
|
|
amountStars = 0
|
|
amountIssues = 0
|
|
amountCommits = 0
|
|
amountDevs = 0
|
|
|
|
repoNameList = []
|
|
repoList = []
|
|
issueList = []
|
|
commitList = []
|
|
devList = []
|
|
|
|
oAuthSession = getGithubOAuthSession()
|
|
headers = {'Accept': 'application/vnd.github.mercy-preview+json'}
|
|
topicResponse = oAuthSession.get('https://api.github.com/search/repositories?q=topic:' + topicName, headers=headers)
|
|
|
|
if topicResponse.status_code == 200:
|
|
topicResponseJson = topicResponse.json()
|
|
i = 0
|
|
while "items" in topicResponseJson.keys() and len(topicResponseJson['items']) > 0:
|
|
i = i + 1
|
|
for repo in topicResponseJson['items']:
|
|
print(repo['full_name'])
|
|
if repo['full_name'] not in repoNameList:
|
|
issueListRepo, commitListRepo, devListRepo = getRepoFacts(repo['full_name'], oAuthSession)
|
|
for issue in issueListRepo:
|
|
issueList.append(issue)
|
|
for commit in commitListRepo:
|
|
commitList.append(commit)
|
|
for dev in devListRepo:
|
|
if dev not in devList:
|
|
devList.append(dev)
|
|
amountStars = amountStars + int(repo['stargazers_count'])
|
|
repoNameList.append(repo['full_name'])
|
|
repoList.append(repo)
|
|
topicResponseJson = oAuthSession.get('https://api.github.com/search/repositories?q=topic:' + topicName + '&page=' + str(i), headers=headers).json()
|
|
|
|
createCSV(topicName + "_repos.csv", repoList)
|
|
createCSV(topicName + "_issues.csv", issueList)
|
|
createCSV(topicName + "_commits.csv", commitList)
|
|
|
|
amountRepos = len(repoList)
|
|
amountDevs = len(devList)
|
|
amountIssues = len(issueList)
|
|
amountCommits = len(commitList)
|
|
print("Amount Repos:", str(amountRepos))
|
|
print("Amount Stars:", str(amountStars))
|
|
print("Amount Issues:", str(amountIssues))
|
|
print("Amount Commits:", str(amountCommits))
|
|
print("Amount Devs:", str(amountDevs))
|
|
else:
|
|
print("Error in Http Request (topicRequest):", topicResponse.status_code, topicResponse.text)
|
|
return amountRepos, amountStars, amountIssues, amountCommits, amountDevs
|
|
|
|
|
|
# %%
|
|
def main():
|
|
amountReposList = []
|
|
amountStarsList = []
|
|
amountIssuesList = []
|
|
amountCommitsList = []
|
|
amountDevsList = []
|
|
#result_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), RESULT_FOLDER)
|
|
#if not os.path.exists(result_path):
|
|
# os.makedirs(result_path)
|
|
protocols = pd.read_csv(FILE)
|
|
for i in range(len(protocols)):
|
|
print("Protocol:", protocols["Github Tag"][i])
|
|
amountRepos, amountStars, amountIssues, amountCommits, amountDevs = getTopicFacts(protocols["Github Tag"][i])
|
|
|
|
amountReposList.append(amountRepos)
|
|
amountStarsList.append(amountStars)
|
|
amountIssuesList.append(amountIssues)
|
|
amountCommitsList.append(amountCommits)
|
|
amountDevsList.append(amountDevs)
|
|
|
|
protocols.assign(**{
|
|
'amountRepos':amountReposList,
|
|
'amountStars':amountStarsList,
|
|
'amountIssues':amountIssuesList,
|
|
'amountCommits':amountCommitsList,
|
|
'amountDevs':amountDevsList
|
|
})
|
|
protocols.to_csv("new.csv")
|
|
|
|
# %%
|
|
if __name__ == "__main__":
|
|
print("Start")
|
|
main()
|
|
print("End")
|
|
|
|
|