前言
Python作为当前最流行最火的编程语言,主要有三大作用:
- web开发:比如web框架Django
- 数据科学:包括机器学习、数据分析和数据可视化
- 脚本(自动化处理,比如数据处理)
本文主要针对Python中有序字典使用方法和excel操作方法进行总结,并给出实例。
1、参考
【Python系列】Python处理csv文件(读写操作)
【Python系列】创建excel文档.csv
【Python系列】excel读写操作以及对应插件xlwd/xlrd安装方法
2、Python中有序字典用法
from collections import OrderedDict
dict =OrderedDict()
dict['foo']=3
dcit['aol']=1
3、Python中excel操作方法总结
【Python系列】excel读写操作以及对应插件xlwd/xlrd安装方法
【Python系列】Python处理csv文件
【Python系列】创建excel文档.csv
4、Python实战
==提取特定格式的txt文本数据到excel中==
4.1、示例1
#-*- coding:utf-8 -*-
# Function: get data from multiple txt with certern format to excel
import os
import re
import sys
import glob
import shutil
import commands
import subprocess
import subprocess as sub
import csv
import codecs
from collections import OrderedDict
space = ' '
delimiter = '/'
MAX_FILE_NAME = 100
count = [0,0]
#new excel file
def create_excel(excel_name):
file = open(excel_name,'wb')
writer=csv.writer(file)
file.close()
#extract file name
def get_file_name(fullfilename):
tmp = fullfilename.strip()
name = os.path.split(tmp)[-1] #提取出文件名,不包含路径
return os.path.splitext(name)[0]
#获取目录以及子目录下的所有文件
allfiles = []
def get_all_files(rawdir):
allfilelist = os.listdir(rawdir)
for f in allfilelist:
filepath = os.path.join(rawdir, f)
if os.path.isdir(filepath):
get_all_files(filepath)
allfiles.append(filepath)
return allfiles
#获取特定类型的文件
def get_raw_log(rawdir):
isfile = 0
if os.path.isdir(rawdir):
get_all_files(rawdir)
files = [f for f in allfiles if re.research('txt$',f)]
elif os.path.isfile(rawdir)
iffile = 1;
files = [rawdir]
else:
files = []
print("Error: " + sys.argv[1] + " is not a dir or file!")
files.sort(key=str.lower)
return [files, isfile]
# collect data
def collect_data_to_excel(excelname, inputfile):
pFile = open(inputfile, 'a+')
lines = pFile.readlines()
data = OrderedDict()
splitValue = []
filename = get_file_name(inputfile)
for i in range(len(lines)):
if lines[i].find('Anchor') != -1:
splitValue = ((lines[i].split()).split(':')[3] ##此处根据具体文本数据格式进行进行分割提取
data[filename] = [filename, splitValue[0], splitValue[1], splitValue[2], splitValue[3], 0]
if lines[i].find('Anchor') != -1:
splitValue2 = ((lines[i].split()).split(':')[3] ##此处根据具体文本数据格式进行进行分割提取
data[filename][5] = splitValue2
pFile = open(excelname, 'a+')
pFile.write(codecs.BOM_UTF8)
csv.writer(pFile, dialect='excel')
if count[0]==0:
title=['name', 'key1', 'key2', 'key3', 'key4', 'key5']
csv_writer.writerow(title)
count[0]=count[0]+1
for key, value in data.items():
csv_writer.writerow(value)
pFile.close()
########main Function Entry##########
if __name__=='__main__':
if(len(sys.argv) < 3):
print('Usage: '+ '<CollectDataDir> \n')
print('For example: python autoCollectData.py E:\test\')
os._exit(0)
collectDataDir = sys.argv[1]
outExcelData = collectDataDir + '__result.csv'
create_excel(outExcelData)
[files,isfile] = get_raw_log(collectDataDir) #获取当前目录中指定格式的文件
#遍历每个指定格式的文件进行数据提取
for collectData in files:
ret = collect_data_to_excel(outExcelData,collectData)
if(ret!=0):
print("--------Process finished!--------")
os._exit(0)
4.2、示例2
#-*- coding:utf-8 -*-
# Function: get data from multiple txt with certern format to excel
import os
import re
import sys
import glob
import shutil
import commands
import subprocess
import subprocess as sub
import csv
import codecs
from collections import OrderedDict
space = ' '
delimiter = '/'
MAX_FILE_NAME = 100
#new excel file
def create_excel(excel_name):
file = open(excel_name,'wb')
writer=csv.writer(file)
file.close()
#extract file name
def get_file_name(fullfilename):
tmp = fullfilename.strip()
name = os.path.split(tmp)[-1] #提取出文件名,不包含路径
return os.path.splitext(name)[0]
#获取目录以及子目录下的所有文件
allfiles = []
def get_all_files(rawdir):
allfilelist = os.listdir(rawdir)
for f in allfilelist:
filepath = os.path.join(rawdir, f)
if os.path.isdir(filepath):
get_all_files(filepath)
allfiles.append(filepath)
return allfiles
def make_all_dir(path):
path = path.strip()
isExist = os.path.exists(path)
if (not isExist):
os.makedirs(path)
print(path+' Successful Create!')
return True
#获取特定类型的文件
def get_raw_log(rawdir):
isfile = 0
if os.path.isdir(rawdir):
get_all_files(rawdir)
files = [f for f in allfiles if re.search('txt$',f)]
elif os.path.isfile(rawdir):
iffile = 1
files = [rawdir]
else:
files = []
print("Error: " + sys.argv[1] + " is not a dir or file!")
files.sort(key=str.lower)
return [files, isfile]
count = [0,0]
# collect data
def collect_data_to_excel(excelname, inputfile):
pFile = open(inputfile, 'a+')
lines = pFile.readlines()
data = OrderedDict()
splitValue = []
filename = get_file_name(inputfile)
for i in range(len(lines)):
#print lines
if lines[i].find('"mac":') != -1 and i==8:
splitValue = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue
data[filename] = [filename, splitValue, 0, 0, 0, 0, 0, 0]
if lines[i].find('"imei":') != -1 and i==43:
splitValue3 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue3
data[filename][3] = str(splitValue3)+'\t'
if lines[i].find('"imsi":') != -1 and i==44:
splitValue4 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue4
data[filename][4] = str(splitValue4)+'\t'
if lines[i].find('"iccid":') != -1 and i==45:
splitValue2 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue2
data[filename][2] = str(splitValue2)+'\t'
if lines[i].find('"imei":') != -1 and i==95:
splitValue5 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue5
data[filename][5] = str(splitValue5)+'\t'
if lines[i].find('"imsi":') != -1 and i==96:
splitValue6 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue6
data[filename][6] = str(splitValue6)+'\t'
if lines[i].find('"iccid":') != -1 and i==97:
splitValue7 = (lines[i].split())[0].split(",")[0].split("\"")[-2]
print splitValue7
data[filename][7] = str(splitValue7)+'\t'
pFile = open(excelname, 'ab+') # newline=''
pFile.write(codecs.BOM_UTF8)
csv_writer = csv.writer(pFile, dialect='excel')
if count[0]==0:
title=['filename', 'MAC', 'ICCID', 'IMEI', 'IMSI', 'ICCID', 'IMEI', 'IMSI']
csv_writer.writerow(title)
count[0]=count[0]+1
for key, value in data.items():
csv_writer.writerow(value)
pFile.close()
########main Function Entry##########
if __name__ == '__main__':
if(len(sys.argv) < 3):
print("Usage: autoExtractToExcel.py targetDir outResultDir\n")
sys.exit(1)
collectDataDir = sys.argv[1]
outResultDir = sys.argv[2]
if(not os.path.exists(outResultDir)):
make_all_dir(outResultDir)
outExcelData = outResultDir + delimiter + '__result.xls'
create_excel(outExcelData)
[files,isfile] = get_raw_log(collectDataDir) #获取当前目录中指定格式的文件
#遍历每个指定格式的文件进行数据提取
for collectData in files:
print collectData
ret = collect_data_to_excel(outExcelData,collectData)
if(ret!=0):
print("--------Process finished!--------")
os._exit(0)
THE END!
本博文只能阅读,谢绝转载,欢迎指出任何有错误或不够清晰的表达。可以在下面评论区评论,也可以邮件至 2963033731@qq.com