Commit 85be62fe authored by Alexandre Dulaunoy's avatar Alexandre Dulaunoy
Browse files

First working version of the NIST ref importer into Redis (db 12)

parent b97757cf
Loading
Loading
Loading
Loading
+44 −16
Original line number Diff line number Diff line
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Import NIST CVE Reference Key/Maps
# Import NIST CVE Reference Key/Maps into Redis
#
# Software is free software released under the "Modified BSD license"
#
@@ -10,14 +10,15 @@
# Imports
import os
import sys
import re
from lxml.html import fromstring
runPath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(runPath, ".."))

from urllib.request import urlopen
import zipfile
import shutil

from lib.ProgressBar import progressbar
verbose = False
from lib.Config import Configuration

RefUrl = Configuration.getRefURL()
@@ -29,35 +30,62 @@ info = db.info
try:
    u = urlopen(RefUrl)
except:
    sys.exit("Cannot open url %s. Bad URL or not connected to the internet?"%(vFeedurl))
    sys.exit("Cannot open url %s. Bad URL or not connected to the internet?"%(RefUrl))
i = info.find_one({'db': 'ref'})
if i is not None:
    if u.headers['last-modified'] == i['last-modified']:
        print("Not modified")
        sys.exit(0)
# create temp file and download and unpack database

#Create temp file and download and unpack database
if not os.path.exists('./tmp'):
    os.mkdir('./tmp')

with open('./tmp/allrefmaps.zip', 'wb') as fp:
    shutil.copyfileobj(u, fp)

try:
    r = Configuration.getRedisRefConnection()
except:
    sys.exit(1)


x = zipfile.ZipFile('./tmp/allrefmaps.zip')
for e in x.namelist():
    print (e)
    filename = e
    with x.open(filename) as infile:
        try:
            f1 = filename.split(".")[0]
            try:
                vendor = f1.split("-",1)[1]
            except:
                continue
        except:
            continue
        htmlfile = infile.read()
        page = fromstring(htmlfile)
        rows = page.xpath("//table//tr//*")
        current = None

# extract each file (skip index.html)
# split the prefix name
#set s:CVEID of "alt link"
#key/value r:BEA  - url
#key/value d:BEA  - description of prefix
#Import each tables into Redis
        for e in rows:
            if not e.text:
                continue
            pattern = re.compile("^"+str(vendor))
            if re.match(pattern, e.text):
                current = e.text
                continue
            element = e.text
            if not element.isspace():
                if verbose:
                    print (str(element) + "-->" + str(current))
                r.sadd(str(element), str(current))

# Data format in Redis

#with open('./tmp/vfeed.db.tgz', 'wb') as fp:
#    shutil.copyfileobj(u, fp)
#t = tarfile.open(name='./tmp/vfeed.db.tgz', mode='r')
#t.extract('vfeed.db', path='./tmp/')
#t.close
# SET
# CVEID -> SET of REF
# REF is VENDOR:THEIRID

#update database info after successful program-run
info.update({'db': 'ref'}, {"$set": {'last-modified': u.headers['last-modified']}}, upsert=True)