xapian #1

Merged
cayop merged 26 commits from xapian into master 2024-09-17 10:11:28 +00:00
4 changed files with 109 additions and 129 deletions
Showing only changes of commit 247ba5bd15 - Show all commits

View file

@ -1,11 +1,6 @@
import json
import uuid
import hashlib
import datetime
from django import forms
from evidence.models import Annotation
from evidence.xapian import index
from utils.device import create_annotation, create_doc, create_index
DEVICE_TYPES = [
("Desktop", "Desktop"),
@ -28,7 +23,7 @@ DEVICE_TYPES = [
class DeviceForm(forms.Form):
type = forms.ChoiceField(choices = DEVICE_TYPES, required=False)
amount = forms.IntegerField(required=False, initial=1)
tag = forms.CharField(required=False)
customer_id = forms.CharField(required=False)
name = forms.CharField(required=False)
value = forms.CharField(required=False)
@ -42,63 +37,29 @@ class BaseDeviceFormSet(forms.BaseFormSet):
def save(self, user, commit=True):
self.user = user
doc = {}
device = {}
kv = {}
self.uuid = str(uuid.uuid4())
tag = hashlib.sha3_256(self.uuid.encode()).hexdigest()
row = {}
for f in self.forms:
d = f.cleaned_data
if not d:
continue
if d.get("type"):
device["type"] = d["type"]
row["type"] = d["type"]
if d.get("amount"):
device["amount"] = d["amount"]
row["amount"] = d["amount"]
if d.get("name"):
kv[d["name"]] = d.get("value", '')
if d.get("tag"):
tag = d["tag"]
if not device:
return
doc["device"] = device
if kv:
doc["kv"] = kv
date = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
if doc:
doc["uuid"] = self.uuid
doc["endTime"] = date
doc["software"] = "DeviceHub"
doc["CUSTOMER_ID"] = tag
doc["type"] = "WebSnapshot"
row[d["name"]] = d.get("value", '')
if d.get("customer_id"):
row['CUSTOMER_ID']= d["customer_id"]
doc = create_doc(row)
if not commit:
return doc
self.index(doc)
self.create_annotations(tag)
create_index(doc)
create_annotation(doc, user, commit=commit)
return doc
def index(self, doc):
snap = json.dumps(doc)
index(self.uuid, snap)
def create_annotations(self, tag):
Annotation.objects.create(
uuid=self.uuid,
owner=self.user,
type=Annotation.Type.SYSTEM,
key='CUSTOM_ID',
value=tag
)
DeviceFormSet = forms.formset_factory(form=DeviceForm, formset=BaseDeviceFormSet, extra=1)

View file

@ -61,7 +61,7 @@
</div>
<div class="row mb-2">
<div class="col">
{% bootstrap_field form.0.tag %}
{% bootstrap_field form.0.customer_id %}
</div>
</div>
{% for f in form %}

View file

@ -1,16 +1,13 @@
import json
import uuid
import hashlib
import datetime
import pandas as pd
from django import forms
from django.core.exceptions import ValidationError
from django.utils.translation import gettext_lazy as _
from utils.device import create_annotation, create_doc, create_index
from utils.forms import MultipleFileField
from device.models import Device
from evidence.parse import Build
from evidence.xapian import index
from evidence.models import Annotation
@ -104,11 +101,9 @@ class ImportForm(forms.Form):
self.exception(_("The file you try to import is empty!"))
for n in data_pd.keys():
# import pdb; pdb.set_trace()
if 'type' not in [x.lower() for x in data_pd[n]]:
raise ValidationError("You need a column with name 'type'")
for k, v in data_pd[n].items():
if k.lower() == "type":
if v not in Device.Types.values:
@ -118,83 +113,18 @@ class ImportForm(forms.Form):
return data
def save(self, commit=True):
table = []
for row in self.rows:
table.append(self.create_annotation(row))
doc = create_doc(row)
annotation = create_annotation(doc, self.user)
table.append((doc, annotation))
if commit:
for doc, cred in table:
cred.save()
self.index(doc)
create_index(doc)
return table
return
def create_annotation(self, row):
doc = self.create_doc(row)
if not doc:
return []
data = {
'uuid': doc['uuid'],
'owner': self.user,
'type': Annotation.Type.SYSTEM,
'key': 'CUSTOM_ID',
'value': doc['CUSTOMER_ID'],
}
return [doc, Annotation(**data)]
def index(self, doc):
_uuid = doc['uuid']
ev = json.dumps(doc)
index(_uuid, ev)
def create_doc(self, row):
doc = {}
device = {"manufacturer": "", "model": ""}
kv = {}
_uuid = str(uuid.uuid4())
tag = hashlib.sha3_256(_uuid.encode()).hexdigest()
for k, v in row.items():
if k.upper() == "CUSTOM_ID":
tag = v
if not v:
continue
if k.lower() == "type":
device["type"] = v
elif k.lower() == "amount":
try:
device["amount"] = int(v)
except Exception:
device["amount"] = 1
else:
kv[k] = v
if 'amount' not in row.keys():
device["amount"] = 1
if not device:
return
doc["device"] = device
if kv:
doc["kv"] = kv
date = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
if doc:
doc["uuid"] = _uuid
doc["endTime"] = date
doc["software"] = "DeviceHub"
doc["CUSTOMER_ID"] = tag
doc["type"] = "WebSnapshot"
return doc

89
utils/device.py Normal file
View file

@ -0,0 +1,89 @@
import json
import uuid
import hashlib
import datetime
from django.core.exceptions import ValidationError
from evidence.xapian import index
from evidence.models import Annotation
from device.models import Device
def create_doc(data):
if not data:
return
doc = {}
device = {"manufacturer": "", "model": "", "amount": 1}
kv = {}
_uuid = str(uuid.uuid4())
customer_id = hashlib.sha3_256(_uuid.encode()).hexdigest()
for k, v in data.items():
if not v:
continue
if k.upper() == "CUSTOMER_ID":
customer_id = v
continue
if k.lower() == "type":
if v not in Device.Types.values:
raise ValidationError("{} is not a valid device".format(v))
device["type"] = v
elif k.lower() == "amount":
try:
amount = int(v)
device["amount"] = amount
except Exception:
pass
else:
kv[k] = v
if not device:
return
doc["device"] = device
if kv:
doc["kv"] = kv
date = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
if doc:
doc["uuid"] = _uuid
doc["endTime"] = date
doc["software"] = "DeviceHub"
doc["CUSTOMER_ID"] = customer_id
doc["type"] = "WebSnapshot"
return doc
def create_annotation(doc, user, commit=False):
if not doc or not doc.get('uuid') or not doc.get("CUSTOMER_ID"):
return []
data = {
'uuid': doc['uuid'],
'owner': user,
'type': Annotation.Type.SYSTEM,
'key': 'CUSTOMER_ID',
'value': doc['CUSTOMER_ID'],
}
if commit:
return Annotation.objects.create(**data)
return Annotation(**data)
def create_index(doc):
if not doc or not doc.get('uuid'):
return []
_uuid = doc['uuid']
ev = json.dumps(doc)
index(_uuid, ev)