Explorar el Código

implement input validation

Clovis at parent hace 1 año
padre
commit
c45ce12f2a
Se han modificado 3 ficheros con 84 adiciones y 18 borrados
  1. 9 5
      app/api/endpoints/project.py
  2. 38 13
      app/importData/gsheet.py
  3. 37 0
      app/importData/validation.py

+ 9 - 5
app/api/endpoints/project.py

@@ -12,7 +12,6 @@ from app.models import (
     Slot,
     SlotTemplate,
     SlotTag,
-    SlotTemplate,
     Sms,
     User,
     Volunteer,
@@ -23,7 +22,7 @@ from app.schemas.requests import (
     ProjectSMSBatchRequest,
 )
 from app.schemas.responses import ProjectListResponse, ProjectResponse, SMSResponse
-from app.gsheet import parseGsheet, extract_doc_uid
+from app.importData.gsheet import parseGsheet, extract_doc_uid, ParsingError
 
 router = APIRouter()
 
@@ -124,7 +123,12 @@ async def update_project_from_gsheet(
         p.slots = []
         p.sms = []
         p.volunteers = []
-    df_contact, df_creneau, df_planning = parseGsheet(doc_id, gsheet.satursday_date)
+    # Parse the gsheets
+    try:
+        df_contact, df_creneau, df_planning = parseGsheet(doc_id, gsheet.satursday_date)
+    except ParsingError as exc:
+        raise HTTPException(status_code=422, detail=str(exc))
+
     # Create the volunteer list
     volunteer_map: dict[str, Volunteer] = {}
     for _, row in df_contact.iterrows():
@@ -134,12 +138,12 @@ async def update_project_from_gsheet(
             surname=row["Nom"],
             email=row["Mail"],
             phone_number=row["Tél"],
-            automatic_sms=row["SMS"] == "Oui",
+            automatic_sms=row["SMS"],
         )
         volunteer_map[row.key] = volunteer
         session.add(volunteer)
 
-    # Create creaneau templates
+    # Create creneau templates
     template_map = {}
     tags_map = {}
 

+ 38 - 13
app/gsheet.py → app/importData/gsheet.py

@@ -7,6 +7,10 @@ from uuid import uuid4
 
 import pandas as pd
 import requests
+from pandera.typing import DataFrame
+from pandera.errors import SchemaError
+
+from .validation import ContactSchema, CreneauSchema, CreneauDataSchema
 
 planning_gid = "1001381542"
 creneau_gid = "1884137958"
@@ -61,6 +65,10 @@ class InvalidUrlError(Exception):
     pass
 
 
+class ParsingError(Exception):
+    pass
+
+
 def extract_doc_uid(url: str) -> str:
     res = urlparse(url)
     if res.netloc != "docs.google.com":
@@ -86,7 +94,9 @@ def downloadAndSave(doc_ui, sheet_gid, fname):
         f.write(rep.content)
 
 
-def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
+def getContactDataFrame(
+    csv_filename: str, skiprows: int = 2
+) -> DataFrame[ContactSchema]:
     df_contact = pd.read_csv(csv_filename, skiprows=skiprows)
     column_to_drop = [name for name in df_contact.columns if "Unnamed" in name]
     df_contact.drop(column_to_drop, axis=1, inplace=True)
@@ -94,14 +104,19 @@ def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
     # Filter out empty name
     df_contact = df_contact[~df_contact.Nom.isnull()]
     df_contact.reset_index()
-    return df_contact
+    # coerce SMS to boolean
+    df_contact["SMS"] = df_contact["SMS"] == "Oui"
+    # create unique contact key
+    df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
+    return ContactSchema.validate(df_contact)
 
 
-def getCreneauDataFrame(csv_filename: str) -> pd.DataFrame:
+def getCreneauDataFrame(csv_filename: str) -> DataFrame[CreneauDataSchema]:
     df_creneau = pd.read_csv(csv_filename)
     df_creneau.columns = ["title", "lieu", "description", "responsable", "tags"]
-    df_creneau[df_creneau.tags.isnull()].tags = ""
-    return df_creneau
+    df_creneau.loc[df_creneau.tags.isnull(), "tags"] = ""
+    df_creneau.loc[df_creneau.lieu.isnull(), "lieu"] = ""
+    return CreneauDataSchema.validate(df_creneau)
 
 
 def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
@@ -164,7 +179,7 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
                 row[j] == "" or row[j] != current_benevole_name
             ):
                 new_creneau = {
-                    "id": uuid4(),
+                    "id": str(uuid4()),
                     "template_id": row[0],
                     "nom": row[1],
                     "benevole_nom": current_benevole_name,
@@ -182,7 +197,7 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
                     current_time["end"] = column_to_dates[j]["end"]
         if current_benevole_name != "":
             new_creneau = {
-                "id": uuid4(),
+                "id": str(uuid4()),
                 "template_id": row[0],
                 "nom": row[1],
                 "benevole_nom": current_benevole_name,
@@ -192,7 +207,8 @@ def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
             list_creneau.append(new_creneau)
 
     print(f"{len(list_creneau)} créneaux trouvés")
-    return pd.DataFrame.from_dict(list_creneau)
+    df = pd.DataFrame.from_dict(list_creneau)
+    return CreneauSchema.validate(df)
 
 
 def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime):
@@ -204,11 +220,20 @@ def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime):
     downloadAndSave(doc_uuid, creneau_gid, fname_creneau)
     downloadAndSave(doc_uuid, benevole_gid, fname_contact)
 
-    df_contact = getContactDataFrame(fname_contact)
-    df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
-
-    df_creneau = getCreneauDataFrame(fname_creneau)
-    df_planning = getPlanningDataFrame(fname_planning, saturday_date)
+    try:
+        df_contact = getContactDataFrame(fname_contact)
+    except SchemaError as exc:
+        msg = "Donnée erronée sur les bénévoles\n" + str(exc)
+        raise ParsingError(msg)
+
+    try:
+        df_creneau = getCreneauDataFrame(fname_creneau)
+    except SchemaError as exc:
+        raise ParsingError("Donnée erronée des description des créneaux\n" + str(exc))
+    try:
+        df_planning = getPlanningDataFrame(fname_planning, saturday_date)
+    except SchemaError as exc:
+        raise ParsingError("Donnée erronée des créneaux\n" + str(exc))
 
     os.remove(fname_planning)
     os.remove(fname_creneau)

+ 37 - 0
app/importData/validation.py

@@ -0,0 +1,37 @@
+from typing import Optional
+
+import pandas as pd
+import pandera as pa
+
+from pandera.typing import Series
+
+
+class ContactSchema(pa.DataFrameModel):
+    key: Series[str] = pa.Field(unique=True)
+    Prénom: Series[str]
+    Nom: Series[str]
+    Mail: Series[str]
+    Tél: Series[str]
+    SMS: Series[bool]
+
+
+class CreneauDataSchema(pa.DataFrameModel):
+    title: Series[str]
+    lieu: Optional[Series[str]]
+    description: Series[str]
+    responsable: Series[str]
+    tags: Optional[Series[str]]
+
+
+class CreneauSchema(pa.DataFrameModel):
+    id: Series[str]
+    template_id: Series[str]
+    nom: Series[str]
+    benevole_nom: Series[str]
+    ligne: Series[int]
+    start: Series[pd.DatetimeTZDtype] = pa.Field(
+        dtype_kwargs={"unit": "ns", "tz": "UTC"}, coerce=True
+    )
+    end: Series[pd.DatetimeTZDtype] = pa.Field(
+        dtype_kwargs={"unit": "ns", "tz": "UTC"}, coerce=True
+    )