前端之家收集整理的这篇文章主要介绍了
description中fits解析——正则表达式,
前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
from functools import reduce
import numpy as np
import pandas as pd
import re
from bs4 import BeautifulSoup
def get_description_string(description):
RULES = [
# [r'^[\s\S](fitment)',""] [r'Item[- ]?offered[\s\S]*$',""],[r'Item[- ]?Included[\s\S]*$',[r'position[\s\S]*$',[r'Material[\s\S]*$',[r'Item[- ]?Condition[\s\S]*$',[r'</(div|h|p)>','\n'],[r'<br.*?>',[r'<[^>]+?>',''],[r' ',' '],[r'&','&'],[r'<','<'],[r'>','>'],[r'"','"'],[r'^[\n\s]*',[r'^\s+',[r'^make[\s\S]*$',]
result = reduce(lambda desc,rule: re.sub(rule[0],rule[1],desc,flags=re.I | re.M),RULES,description).strip()
return result
def description_parse(infile,outfile):
df = pd.read_excel(infile,encoding="utf-8")
df["description"]=df["description"].fillna("")
print(df.info())
dataframe = []
for ind in df.index:
sku = df.loc[ind,"product_sku"]
print(sku)
des = df.loc[ind,"description"]
des = get_description_string(des)
print(des)
df.loc[ind,"fit"]=des
df.to_excel(outfile,index=False)
description_parse("ACES_fitment_description.xlsx","ACES_fitment_description_parse.xlsx")