Source code for procs

'''
Python module for data processing and lithofacies modelling

'''
from __future__ import annotations

from warnings import filterwarnings
filterwarnings('ignore')
import pandas as pd
import numpy as np
from typing import Optional


[docs]def set_alias(df:pd.DataFrame, DEPTH:str, GR:str, RT:str, NPHI:str, RHOB:str, DT:Optional[str]=None) -> pd.DataFrame: r''' Function to rename the log curves in order to maintain petrophysics conventions Parameters --------- df : pd.DataFrame dataframe object DEPTH : str Depth column GR : str Gamma ray column RT : str Resistivity column NPHI : str Neutron porosity column RHOB : str Bulk density column DT : str, default None Sonic column (optional) Returns ------- Returns data of renamed log curves Example ------- >>> df = set_alias(df, 'DEPT', 'GR','RES', 'NPHI', 'RHOB') >>> print(df.columns) >>> ['DEPTH', 'GR', 'RT', 'NPHI', 'RHOB'] ''' if DT != None: dataframe = df.rename({DEPTH:'DEPTH', GR:'GR', RT:'RT', NPHI:'NPHI', RHOB:'RHOB', DT:'DT'}, axis=1) else: dataframe = df.rename({DEPTH:'DEPTH', GR:'GR', RT:'RT', NPHI:'NPHI', RHOB:'RHOB'}, axis=1) return dataframe
[docs]def process_data(df:pd.DataFrame, gr:str, rt:str, nphi:str, rhob:str, dt:Optional[str]=None, trim:str='both') -> pd.DataFrame: r''' Function to preprocess data before beginning petrophysics workflow. This processing workflow uses conventional values for the log curves. To use user-defined preprocessing method , refer to the `petrolib.data.procs.trim()` Arguments --------- df : pd.DataFrame dataframe object gr : str Gamma ray column rt : str Resistivity column nphi : str Neutron porosity column rhob : str Bulk density column sonic : str, default None Sonic column (optional) trim : str default 'both' Conditions for trim arbitrary values * 'max' : to trim values higher than conventional maximum values * 'min' : to trim values lower than conventional lower values * default 'both' : to trim both lower and higher values to conventional high and lower values Returns ------- A new copy of dataframe containing processed data Example ------- >>> df = process_data(df, 'GR', 'RT', 'NPHI', 'RHOB') >>> df.describe() |DEPTH | GR | RT | NPHI | RHOB| +-------+-------+-------+--------+------+ count | 35361 | 34671 | 34211 | 10524 |10551 | -------+-------+-------+-------+--------+------+ mean | 1913.9| 56.97 | 1.95 | 0.17 | 2.48 | -------+-------+-------+-------+--------+------+ min | 145.9| 0.15 | 0.2 | 0.03 | 1.98 | ------+-------+-------+-------+--------+------+ max | 3681.9| 200 | 2000 | 0.45 | 2.93| ---------------------------------------------- ''' df.replace(-999.00, np.nan, inplace=True) data = df.copy() if trim == 'max': data[gr] = data[gr].mask(data[gr]>150, 150) data[rt] = data[rt].mask(data[rt]>2000, 2000) data[nphi] = data[nphi].mask(data[nphi]>.45, .45) data[rhob] = data[rhob].mask(data[rhob]>2.95, 2.95) if dt != None: data[dt] = data[dt].mask(data[dt] > 150, 150) elif trim == 'min': data[gr] = data[gr].mask(data[gr]<0, 0) data[rt] = data[rt].mask(data[rt]<0.2, 0.2) data[nphi] = data[nphi].mask(data[nphi]<-.15, -.15) data[rhob] = data[rhob].mask(data[rhob]<1.95, 1.95) if dt != None: data[dt] = data[dt].mask(data[dt] < 40, 40) elif trim == 'both': data[gr] = data[gr].mask(data[gr]>150, 150) data[gr] = data[gr].mask(data[gr]<0, 0) data[rt] = data[rt].mask(data[rt]>2000, 2000) data[rt] = data[rt].mask(data[rt]<0.2, 0.2) data[nphi] = data[nphi].mask(data[nphi]>.45, .45) data[nphi] = data[nphi].mask(data[nphi]<-.15, -.15) data[rhob] = data[rhob].mask(data[rhob]>2.95, 2.95) data[rhob] = data[rhob].mask(data[rhob]<1.95, 1.95) if dt != None: data[dt] = data[dt].mask(data[dt] > 200, 200) data[dt] = data[dt].mask(data[dt] < 40, 40) return data
[docs]def trim(df:pd.DataFrame, col:str, lower:int|float, upper:int|float): ''' Function to preprocess data by trimming arbitrary values Parameters --------- df : pd.DataFrame Dataframe col : str Log curve to trim its values lower : int or float Lower limit or minimum value upper : int or float Upper limit or maximum value Returns ------- Dataframe with user defined log limits Example ------- >>> trim(df, 'GR', lower=0, upper=200) ''' assert col in df.columns, f'{col} not in dataframe.' df[col] = df[col].mask(df[col]<lower, lower) df[col] = df[col].mask(df[col]>upper, upper) return df
[docs]def model_facies(df:pd.DataFrame, gr:str, env:str='SS') -> pd.DataFrame: ''' Models lithofacies from Gamma ray log specific to a particular environment Parameters ---------- df : pd.DataFrame dataframe gr : str Gamma ray log column env : str Environment type. Either siliciclastic or carbonate * 'SS' for Siliclastic (Shale and Sandstone) environment * 'CO' for carbonate (Anhydrite, Limestone, Dolomite, Sandstone, Shale) environment Example ------- >>> from petrolib.interp import model_facies >>> model_facies(df, gr='GR', env='SS') ''' litho = list() if env == 'SS': for i in df[gr]: if i < 75.0: litho.append('Sandstone') else: litho.append('Shale') elif env == 'CO': for i in df[gr]: if i>= 8. and i<11.: litho.append('Anhydrite') elif i>=10 and i<15.: litho.append('Limestone') elif i>=15 and i<=40.: litho.append('Dolomite') elif i>40 and i<=75: litho.append('Sandstone') elif i > 75.: litho.append('Shale') df['litho'] = litho return df