搜尋此網誌

2012-12-19

簡易資料庫 API - 以 XBRL 為例

自製 XBRL 資料庫完成了將 XBRL 轉檔為 sqlite 的動作,接著需要一個工具來抽取資料庫的資料。

程式碼
from sqlite3 import *

db = connect("xbrldata.sqlite")
cur = db.cursor()

def query_db(t, s, d):
    if t == 0:
        cur.execute("select * from tw where s=? and d=?", (s, d))
    else:
        cur.execute("select * from tw where t=? and s=? and d=?", (t, s, d))
    return cur.fetchall()

def ratio(s1, s2, d1, d2=''):
    if d2 == '':
        d2 = d1
    t1 = cur.execute("select t, v from tw where s=? and d=?", (s1, d1)).fetchall()
    t2 = cur.execute("select t, v from tw where s=? and d=?", (s2, d2)).fetchall()
    t1, t2 = dict(t1), dict(t2)
    Intersection = set(t1.keys()) & set(t2.keys())
    result = {}
    for i in Intersection:
        result[i] = t1[i] / t2[i]
    return result

def sorting(dic, direction=1):
    result = []
    for r in dic:
        result.append([dic[r], r])
    if direction == 1:
        result = sorted(result)
    elif direction == -1:
        result = sorted(result, key=None, reverse=True)
    return result

def ROE(d1, d2=''):
    if d2 == '':
        d2 = d1
    result1 = ratio('NetIncomeLoss', 'StockholdersEquities', d1, d2)
    # basi 類別
    result2 = ratio('NetIncomeLoss', 'StockholdersEquity', d1, d2)
    # bd 類別
    result3 = ratio('NetIncomeLoss-IncomeStatement', 'StockholdersEquities', d1, d2)
    # fh 類別
    result4 = ratio('ConsolidatedNetIncomeLoss-ConsolidatedNetIncomeLoss-cr', 'StockholdersEquity-cr', d1, d2)
    result1.update(result2)
    result1.update(result3)
    result1.update(result4)
    return result1

# 結果需要乘以 10,因為 CommonStock 的面額是 10 元
def BVPS(d):
    return ratio('StockholdersEquities', 'CommonStock', d)

def GrossMargin(d):
    # 排除金融業
    t1 = cur.execute("select t, v from tw where g!='basi' and s=? and d=?", ('GrossProfitLossOperations', d)).fetchall()
    t2 = cur.execute("select t, v from tw where g!='basi' and s=? and d=?", ('OperatingRevenue', d)).fetchall()
    t1, t2 = dict(t1), dict(t2)
    Intersection = set(t1.keys()) & set(t2.keys())
    result = {}
    for i in Intersection:
        if t2[i] != 0:
            result[i] = t1[i] / t2[i]
    return result

def ProfitMargin(d):
    # 排除金融業
    t1 = cur.execute("select t, v from tw where g!='basi' and s=? and d=?", ('OperatingIncomeLoss', d)).fetchall()
    t2 = cur.execute("select t, v from tw where g!='basi' and s=? and d=?", ('OperatingRevenue', d)).fetchall()
    t1, t2 = dict(t1), dict(t2)
    Intersection = set(t1.keys()) & set(t2.keys())
    result = {}
    for i in Intersection:
        if t2[i] != 0:
            result[i] = t1[i] / t2[i]
    return result

def Quarterly(tickers, field):
    result = {}
    for r in tickers:
        data = cur.execute("select d, v, q from tw where t=? and s=? order by d", (r, field)).fetchall()
        # 處理因合併而導致不同期的財報數字不同
        for i in reversed(range(1, len(data))):
            if data[i][0] == data[i-1][0]:
                if data[i][2] > data[i-1][2]:
                    del data[i-1]
                else:
                    del data[i]
        temp = []
        if data[0][:4] == 'From':
            if data[0][0][-4:] == '0331':
                temp.append([data[0][0][4:8] + 'Q1', data[0][1]])
            for i in range(1, len(data)):
                year = data[i][0][4:8]
                if data[i][0][-4:] == '0331' and data[i-1][0][-4:] == '1231' and (int(data[i][0][4:8]) - int(data[i-1][0][4:8]) == 1):
                    temp.append([year + 'Q1', data[i][1]])
                elif data[i][0][-4:] == '0630' and data[i-1][0][-4:] == '0331' and data[i][0][4:8] == data[i-1][0][4:8]:
                    temp.append([year + 'Q2', data[i][1] - data[i-1][1]])
                elif data[i][0][-4:] == '0930' and data[i-1][0][-4:] == '0630' and data[i][0][4:8] == data[i-1][0][4:8]:
                    temp.append([year + 'Q3', data[i][1] - data[i-1][1]])
                elif data[i][0][-4:] == '1231' and data[i-1][0][-4:] == '0930' and data[i][0][4:8] == data[i-1][0][4:8]:
                    temp.append([year + 'Q4', data[i][1] - data[i-1][1]])
        else:
            for i in data:
                temp.append([i[0][4:8] + i[2][-2:], i[1]])
        result[r] = dict(temp)
    return result

def QuarterlyRatio(tickers, field1, field2):
    result = {}
    for r in tickers:
        data1 = cur.execute("select d, v, q from tw where t=? and s=? order by d", (r, field1)).fetchall()
        data2 = cur.execute("select d, v, q from tw where t=? and s=? order by d", (r, field2)).fetchall()
        # 處理因合併而導致不同期的財報數字不同
        for i in reversed(range(1, len(data1))):
            if data1[i][0] == data1[i-1][0]:
                if data1[i][2] > data1[i-1][2]:
                    del data1[i-1]
                    del data2[i-1]
                else:
                    del data1[i]
                    del data2[i]
        temp = []
        if data1[0][:4] == 'From' and data2[0][:4] == 'From':
            if data1[0][0][-4:] == '0331':
                if data2[0][1] != 0:
                    temp.append([data1[0][0][4:8] + data1[0][2][-2:], data1[0][1] / data2[0][1]])
            for i in range(1, len(data1)):
                if data2[i][1] != 0:
                    interval = data1[i][0][4:8] + data1[i][2][-2:]
                    if data1[i][0][-4:] == '0331' and data1[i-1][0][-4:] == '1231' and (int(data1[i][0][4:8]) - int(data1[i-1][0][4:8]) == 1):
                        temp.append([interval, data1[i][1] / data2[i][1]])
                    elif data1[i][0][-4:] == '0630' and data1[i-1][0][-4:] == '0331' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / (data2[i][1] - data2[i-1][1])])
                    elif data1[i][0][-4:] == '0930' and data1[i-1][0][-4:] == '0630' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / (data2[i][1] - data2[i-1][1])])
                    elif data1[i][0][-4:] == '1231' and data1[i-1][0][-4:] == '0930' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / (data2[i][1] - data2[i-1][1])])
        elif data1[0][:4] == 'From' and data2[0][:4] == 'AsOf':
            if data1[0][0][-4:] == '0331':
                if data2[0][1] != 0:
                    temp.append([data1[0][0][4:8] + data1[0][2][-2:], data1[0][1] / data2[0][1]])
            for i in range(1, len(data1)):
                if data2[i][1] != 0:
                    interval = data1[i][0][4:8] + data1[i][2][-2:]
                    if data1[i][0][-4:] == '0331' and data1[i-1][0][-4:] == '1231' and (int(data1[i][0][4:8]) - int(data1[i-1][0][4:8]) == 1):
                        temp.append([interval, data1[i][1] / data2[i][1]])
                    elif data1[i][0][-4:] == '0630' and data1[i-1][0][-4:] == '0331' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / data2[i][1]])
                    elif data1[i][0][-4:] == '0930' and data1[i-1][0][-4:] == '0630' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / data2[i][1]])
                    elif data1[i][0][-4:] == '1231' and data1[i-1][0][-4:] == '0930' and data1[i][0][4:8] == data1[i-1][0][4:8]:
                        temp.append([interval, (data1[i][1] - data1[i-1][1]) / data2[i][1]])
        else:
            for i in range(len(data1)):
                if data2[i][1] != 0:
                    temp.append([data1[i][0][4:8] + data1[i][2][-2:], data1[i][1] / data2[i][1]])
        result[r] = dict(temp)
    return result

def Yearly(tickers, field):
    result = {}
    if type(tickers) != list:
        tickers = cur.execute("select distinct t from tw").fetchall()
    for r in tickers:
        data = cur.execute("select d, v, q from tw where t=? and s=? and d like '%1231' order by d", (r, field)).fetchall()
        # 處理因合併而導致不同期的財報數字不同
        for i in reversed(range(1, len(data))):
            if data[i][0] == data[i-1][0]:
                if data[i][2] > data[i-1][2]:
                    del data[i-1]
                else:
                    del data[i]
        temp = []
        for i in range(len(data)):
            temp.append([data[i][0][4:8], data[i][1]])
        result[r] = dict(temp)
    return result

def YearlyRatio(tickers, field1, field2):
    result = {}
    for r in tickers:
        data1 = cur.execute("select d, v, q from tw where t=? and s=? and d like '%1231' order by d", (r, field1)).fetchall()
        data2 = cur.execute("select d, v, q from tw where t=? and s=? and d like '%1231' order by d", (r, field2)).fetchall()
        # 處理因合併而導致不同期的財報數字不同
        for i in reversed(range(1, len(data1))):
            if data1[i][0] == data1[i-1][0]:
                if data1[i][2] > data1[i-1][2]:
                    del data1[i-1]
                    del data2[i-1]
                else:
                    del data1[i]
                    del data2[i]
        temp = []
        for i in range(len(data1)):
            temp.append([data1[i][0][4:8], data1[i][1] / data2[i][1]])
        result[r] = dict(temp)
    return result
回傳 2011Q4 所有獨特的 tickers
>>> tickers = [r[0] for r in cur.execute("select distinct t from tw where d='From20110101To20111231'").fetchall()]
2330 的 2011Q2 的 Sales
>>> query_db(2330, 'Sales', 'From20110101To20110630')
[(2330, 'Sales', 'From20110101To20110630', 212301752000, 'ci', '2011Q2'), (2330, 'Sales', 'From20110101To20110630', 212301752000, 'ci', '2012Q2')]
全部公司 2012Q2 的負債比率
>>> ratios2012Q2 = ratio('Liabilities', 'Assets', 'AsOf20120630')
從上面的結果 ratios2012Q2 中查詢 2330 的負債比率
>>> ratios2012Q2[2330]
0.28155126779953576
求出全部 2011Q4 的 ROE,這個 function 頗麻煩,因為不同類別的公司科目名稱不同
>>> ROE2011Q4 = ROE('From20110101To20111231', 'AsOf20111231')
查詢 2330、2882、A00037(2882 的子公司) 的 ROE
>>> ROE2011Q4[2330], ROE2011Q4[2882], ROE2011Q4['A00037']
(0.21315543444432625, 0.05183247653995867, 0.16480948755724995)
全部公司 2011Q4 的毛利率,排除金融業
>>> Gross2011Q4 = GrossMargin('From20110101To20111231')
2330 的毛利率
>>> Gross2011Q4[2330]
0.4427123019828931
全部公司 2011Q4 的淨利率,排除金融業
>>> Profit2011Q4 = ProfitMargin('From20110101To20111231')
2330 的淨利率
>>> Profit2011Q4[2330]
0.33211538516208233
2330 與 2303 的每季 OperatingRevenue-cr(合併營業收入)
>>> QSales = Quarterly([2330, 2303], 'OperatingRevenue-cr')
查詢 2330 與 2303 的 2012Q2 的 OperatingRevenue-cr(合併營業收入)
>>> QSales[2330]['2012Q2'], QSales[2303]['2012Q2']
(128186331000, 30377330000)
2330 與 2303 的每季毛利率
>>> QRatio = QuarterlyRatio([2330, 2303], 'GrossProfitLossOperations', 'OperatingRevenue')
2330 的 2012Q1 毛利率
>>> QRatio[2330]['2012Q1']
0.46519396647648564
2330 與 2303 的每年淨利
>>> YProfit = Yearly([2330, 2303], 'NetIncomeLoss')
2303 在 2012Q1 的淨利
>>> YProfit[2303]['2011']
10609695000
2330 與 2303 的每年研發佔營業額比例
>>> YRatio = YearlyRatio([2330, 2303], 'ResearchDevelopmentExpenses', 'Sales')
2330 與 2303 在 2011 年,研發佔營業額的比例
>>> YRatio[2330]['2011'], YRatio[2303]['2011']
(0.07496115395181556, 0.08651388300561276)
結束。

沒有留言:

張貼留言