description中fits解析——正则表达式

前端之家收集整理的这篇文章主要介绍了description中fits解析——正则表达式前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
from functools import reduce
import numpy as np
import pandas as pd
import re
from bs4 import BeautifulSoup
def get_description_string(description):
    RULES = [
        # [r'^[\s\S](fitment)',""]  [r'Item[- ]?offered[\s\S]*$',""],[r'Item[- ]?Included[\s\S]*$',[r'position[\s\S]*$',[r'Material[\s\S]*$',[r'Item[- ]?Condition[\s\S]*$',[r'</(div|h|p)>','\n'],[r'<br.*?>',[r'<[^>]+?>',''],[r'&nbsp;',' '],[r'&amp;','&'],[r'&lt;','<'],[r'&gt;','>'],[r'&quot;','"'],[r'^[\n\s]*',[r'^\s+',[r'^make[\s\S]*$',]
    result = reduce(lambda desc,rule: re.sub(rule[0],rule[1],desc,flags=re.I | re.M),RULES,description).strip()
    return result
def description_parse(infile,outfile):

    df = pd.read_excel(infile,encoding="utf-8")
    df["description"]=df["description"].fillna("")
    print(df.info())
    dataframe = []
    for ind in df.index:
        sku = df.loc[ind,"product_sku"]
        print(sku)
        des = df.loc[ind,"description"]
        des = get_description_string(des)
        print(des)
        df.loc[ind,"fit"]=des
    df.to_excel(outfile,index=False)


description_parse("ACES_fitment_description.xlsx","ACES_fitment_description_parse.xlsx")



猜你在找的正则表达式相关文章