#!/usr/bin/env python3
# -*- coding: UTF-8 -*-

import os
import hashlib
import tempfile
import shutil
import traceback
import requests


requests.packages.urllib3.disable_warnings()

# 获取文件md5


def file2md5(_file):
    with open(_file, 'rb') as f:
        md5obj = hashlib.md5()
        md5obj.update(f.read())
        return md5obj.hexdigest()

# 搜索代码


def searchcode(keyword, page=1, per_page=100):
    headers = {
        'Authorization': 'token {}'.format(os.getenv('GH_TOKEN')),
        'Connection': 'close',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
    }
    data = {'q': keyword, 'sort': 'indexed',
            'order': 'desc', 'page': page, 'per_page': per_page}
    try:
        rj = requests.get('https://api.github.com/search/code', params=data,
                          headers=headers, verify=False, allow_redirects=False, timeout=10).json()
        return rj
    except:
        return {}


if __name__ == '__main__':
    # 旧poc
    old_poc = {}
    for path in ['go', 'json']:
        for file in os.listdir(path):
            if not file.endswith('.json') and not file.endswith('.go'):
                continue
            old_poc[file2md5(os.path.join(path, file))] = 0

    root_path = os.path.dirname(os.path.abspath(__file__))
    print(root_path)
    # 搜索代码获取项目主页
    html_urls = []
    for keyword in ['GobyQuery+language:Go', 'GobyQuery+language:Json']:
        for i in range(1, 11):
            try:
                rs = searchcode(keyword, page=i, per_page=100)
                html_urls += [item['repository']['html_url']
                              for item in rs.get('items', []) if item.get('repository', {}).get('html_url')]
            except:
                traceback.print_exc()
    html_urls = set(html_urls)
    for url in html_urls:
        print(url)
        try:
            temp_dir = tempfile.TemporaryDirectory().name
            if not os.path.exists(temp_dir):
                os.makedirs(temp_dir)
            os.chdir(temp_dir)
            # clone项目
            os.system('git clone {}'.format(url))
            author, repo = url[19:].split('/', 1)
            repo_path = os.path.join(temp_dir, repo)
            print(repo_path)
            # 复制poc
            if os.path.exists(repo_path):
                for root, _, files in os.walk(repo_path):
                    for file in files:
                        if not file.endswith('.json') and not file.endswith('.go'):
                            continue
                        file_path = os.path.join(root, file)
                        try:
                            with open(file_path, 'r', encoding='utf8') as f:
                                content = f.read()
                            if 'GobyQuery' in content and 'ScanSteps' in content:
                                print(file_path)
                                md5 = file2md5(file_path)
                                if md5 not in old_poc:
                                    if file.endswith('.json'):
                                        shutil.copyfile(file_path, os.path.join(
                                            root_path, 'json', file))
                                    if file.endswith('.go'):
                                        shutil.copyfile(file_path, os.path.join(
                                            root_path, 'go', file))
                        except:
                            traceback.print_exc()
            os.chdir(root_path)
        except:
            traceback.print_exc()
    os.chdir(root_path)
    with open('README.md', 'w', encoding='utf8') as f:
        f.write('# Goby POC统计\n| 文件类型 | 数量 |\n| :----:| :----: |\n| .go | {} |\n| .json | {} |'.format(
            len([file for file in os.listdir('go') if file.endswith('.go')]), len([file for file in os.listdir('json') if file.endswith('.json')])))