import pandas as pd
import numpy as np
import re
import fiona
#get empty lines
agsData = open('../inputData/Queen_Mary_Reservoir.ags').readlines()
#empty dict to store the line groups
groupDict = {}
for index, line in enumerate(agsData):
if line.startswith('"GROUP"'):
#terms
terms = line.split(",")
#finding the last line
for index2, line2 in enumerate(agsData[index:]):
if line2 == "\n":
lineList = [index+1, index+index2-1]
print(lineList)
#adding the secuence to the dict omiting the " and \n
groupDict[terms[1][1:-2]]=lineList
#break the inner for
break
[1, 4]
[7, 19]
[22, 27]
[30, 33]
[36, 47]
[50, 55]
[58, 1164]
[1167, 1436]
[1439, 1689]
#show line intervals
groupDict
{'PROJ': [1, 4],
'ABBR': [7, 19],
'DICT': [22, 27],
'TRAN': [30, 33],
'TYPE': [36, 47],
'UNIT': [50, 55],
'GEOL': [58, 1164],
'LOCA': [1167, 1436],
'WSTG': [1439, 1689]}
#create empty dataframe for geological information
geolDf = pd.DataFrame(columns=['LOCA_ID', 'GEOL_TOP', 'GEOL_BASE', 'GEOL_DESC'])
#counter for the dataframe index
i = 0
#interpret the numerical data as numbers
for line in agsData[groupDict['GEOL'][0]:groupDict['GEOL'][1]]:
#get rid of the "
line = re.sub('"','',line)
#split the line in columns
lineItems = line.split(",")
#working only with the "DATA" rows
if lineItems[0] == 'DATA':
#interpret numbers as float or int
for index,item in enumerate(lineItems):
try:
if "." in item:
lineItems[index]=float(item)
else:
lineItems[index]=int(item)
except ValueError:
pass
#add first four columns to dataframe
geolDf.loc[i] = lineItems[1:5]
#add counter
i += 1
#review the resulting dataframe
geolDf.head()
|
LOCA_ID |
GEOL_TOP |
GEOL_BASE |
GEOL_DESC |
0 |
572547 |
0.00 |
1.22 |
Soil |
1 |
572547 |
1.22 |
2.44 |
Dirty ballast |
2 |
572547 |
2.44 |
8.84 |
Clean yellow sandy ballast |
3 |
572547 |
8.84 |
11.89 |
London Clay (slightly sandy) |
4 |
572548 |
0.00 |
0.30 |
Soil |
#change id to integer
geolDf['LOCA_ID'] = pd.to_numeric(geolDf['LOCA_ID'], downcast="integer")
#check datatypes
geolDf.dtypes
LOCA_ID int32
GEOL_TOP float64
GEOL_BASE float64
GEOL_DESC object
dtype: object
#get unique values of lithology
litoArray = geolDf["GEOL_DESC"].unique()
litoArray
array(['Soil', 'Dirty ballast', 'Clean yellow sandy ballast',
'London Clay (slightly sandy)', 'London Clay (good)',
'Yellow loam', 'Dirty loamy ballast', 'Loam (yellow)',
'Running sand (blue)', 'London Clay (sandy)', 'Blue running sand',
'London Clay (very sandy)', 'Blue running sand and silt',
'London Clay (very sandy). Stood to bore without driving tubing',
'Loam', 'London Clay (rather sandy)', 'Clean grey sandy ballast',
'Clean yellow sand', 'Clean yellow sandy', 'Black loam', 'Ballast',
'Clay', 'Running sand', 'Clean sandy ballast',
'Grey Thames ballast', 'Grey sandy silt', 'Hard blue sand',
'Clean Thames ballast', 'Blue clay', 'Clean red sandy ballast',
'Loamy ballast', 'Clean grey ballast', 'Brown loam'], dtype=object)
for lito in litoArray:
print('%s has %d records'%
(lito,geolDf[geolDf["GEOL_DESC"]==lito].shape[0]))
Soil has 224 records
Dirty ballast has 219 records
Clean yellow sandy ballast has 220 records
London Clay (slightly sandy) has 52 records
London Clay (good) has 142 records
Yellow loam has 48 records
Dirty loamy ballast has 2 records
Loam (yellow) has 11 records
Running sand (blue) has 1 records
London Clay (sandy) has 9 records
Blue running sand has 36 records
London Clay (very sandy) has 33 records
Blue running sand and silt has 1 records
London Clay (very sandy). Stood to bore without driving tubing has 1 records
Loam has 3 records
London Clay (rather sandy) has 13 records
Clean grey sandy ballast has 19 records
Clean yellow sand has 1 records
Clean yellow sandy has 1 records
Black loam has 2 records
Ballast has 19 records
Clay has 18 records
Running sand has 2 records
Clean sandy ballast has 5 records
Grey Thames ballast has 5 records
Grey sandy silt has 1 records
Hard blue sand has 1 records
Clean Thames ballast has 1 records
Blue clay has 1 records
Clean red sandy ballast has 3 records
Loamy ballast has 1 records
Clean grey ballast has 1 records
Brown loam has 7 records
#create column with code other for storing lito codes
geolDf['litoCode'] = int(7)
#define lito dict for reclass
litoDict = {
'soil':1,
'dirty ballast':2,
'sandy ballast':3,
'london clay':4,
'loam':5,
'sand':6,
'other':7
}
for index, row in geolDf.iterrows():
for lito in litoDict:
if lito in row["GEOL_DESC"].lower():
geolDf.loc[index,'litoCode'] = litoDict[lito]
break
geolDf.tail()
|
LOCA_ID |
GEOL_TOP |
GEOL_BASE |
GEOL_DESC |
litoCode |
1098 |
575212 |
1.83 |
7.92 |
Clean yellow sandy ballast |
3 |
1099 |
575212 |
7.92 |
9.14 |
Blue running sand |
6 |
1100 |
575212 |
9.14 |
12.19 |
London Clay (very sandy) |
4 |
1101 |
575213 |
0.00 |
0.30 |
Soil |
1 |
1102 |
575213 |
0.30 |
4.88 |
Ballast |
7 |
#check the amount of reclassified records
#get unique values of lithology code
litoCodes = geolDf["litoCode"].unique()
print(litoCodes)
for lito in litoCodes:
print('%s has %d records'%
(lito,geolDf[geolDf["litoCode"]==lito].shape[0]))
[1 2 3 4 5 6 7]
1 has 224 records
2 has 219 records
3 has 247 records
4 has 250 records
5 has 74 records
6 has 44 records
7 has 45 records
#set LOCA_ID as Index
geolDf = geolDf.set_index('LOCA_ID')
geolDf.head()
|
GEOL_TOP |
GEOL_BASE |
GEOL_DESC |
litoCode |
LOCA_ID |
|
|
|
|
572547 |
0.00 |
1.22 |
Soil |
1 |
572547 |
1.22 |
2.44 |
Dirty ballast |
2 |
572547 |
2.44 |
8.84 |
Clean yellow sandy ballast |
3 |
572547 |
8.84 |
11.89 |
London Clay (slightly sandy) |
4 |
572548 |
0.00 |
0.30 |
Soil |
1 |
#export lito to csv
geolDf.to_csv('../Csv/litoData.csv')
Working with borehole location
groupDict['LOCA']
[1167, 1436]
#create empty dataframe for geological information
#boreholeid, easting, northing, groundelevation
locaDf = pd.DataFrame(columns=['LOCA_ID', 'LOCA_NATE', 'LOCA_NATN', 'LOCA_GL'])
#counter for the dataframe index
i = 0
#interpret the numerical data as numbers
for line in agsData[groupDict['LOCA'][0]:groupDict['LOCA'][1]]:
#get rid of the "
line = re.sub('"','',line)
#split the line in columns
lineItems = line.split(",")
#working only with the "DATA" rows
if lineItems[0] == 'DATA':
#interpret numbers as float or int
for index,item in enumerate(lineItems):
try:
if "." in item:
lineItems[index]=float(item)
else:
lineItems[index]=int(item)
except ValueError:
pass
#add first four columns to dataframe
#print([lineItems[1], lineItems[4], lineItems[5], lineItems[7]])
locaDf.loc[i] = [int(lineItems[1]), lineItems[4], lineItems[5], lineItems[7]]
#add counter
i += 1
#change id to integer
locaDf['LOCA_ID'] = pd.to_numeric(locaDf['LOCA_ID'], downcast="integer")
#check datatypes
locaDf.dtypes
LOCA_ID int32
LOCA_NATE float64
LOCA_NATN float64
LOCA_GL float64
dtype: object
#set ID as index
locaDf = locaDf.set_index('LOCA_ID')
locaDf.head()
|
LOCA_NATE |
LOCA_NATN |
LOCA_GL |
LOCA_ID |
|
|
|
572547 |
506930.0 |
169160.0 |
13.21 |
572548 |
506930.0 |
169250.0 |
12.91 |
572549 |
506950.0 |
169340.0 |
12.58 |
572550 |
506960.0 |
169430.0 |
12.25 |
572551 |
506970.0 |
169520.0 |
12.38 |
#create point shapefile
# define schema
schema = {
'geometry':'Point',
'properties':[('Id','int'),('Easting','float'),('Northing','float'),('Elevation','float')]
}
#open a fiona object
pointShp = fiona.open('../Shp/boreholePoints.shp', mode='w', driver='ESRI Shapefile',
schema = schema, crs = "EPSG:27700")
#iterate over each row in the dataframe and save record
for index, row in locaDf.iterrows():
rowDict = {
'geometry' : {'type':'Point',
'coordinates': (row.LOCA_NATE,row.LOCA_NATN)},
'properties': {'Id' : index,
'Easting':row.LOCA_NATE,
'Northing':row.LOCA_NATN,
'Elevation':row.LOCA_GL},
}
pointShp.write(rowDict)
#close fiona object
pointShp.close()
#export dataframe as csv
locaDf.to_csv('../Csv/locationData.csv')