exctractGsheet.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import requests
  2. import pandas as pd
  3. import datetime
  4. import io
  5. from uuid import uuid4
  6. import ics
  7. from typing import Tuple
  8. from utils import split_csv_row
  9. doc_uuid = "1iSA5LsNdarYqrZRxE76lE2VkEsoQksIu6rFG7VAfCNQ"
  10. planning_gid = "1001381542"
  11. creneau_gid = "1884137958"
  12. benevole_gid = "2065754071"
  13. def build_sheet_url(doc_id, sheet_id):
  14. return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
  15. def downloadAndSave(doc_ui, sheet_gid, fname):
  16. url = build_sheet_url(doc_ui, sheet_gid)
  17. print("Downloading " + fname)
  18. rep = requests.get(url)
  19. with open(fname, "wb") as f:
  20. f.write(rep.content)
  21. def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
  22. list_creneau = []
  23. with io.open(csv_filename, "r", encoding="utf-8") as f:
  24. datas = [split_csv_row(s.replace("\n", "")) for s in f.readlines()]
  25. def getDate(day: str):
  26. s = day.lower()
  27. if s.startswith("jeudi"):
  28. return starting_date
  29. elif s.startswith("vendredi"):
  30. return starting_date + datetime.timedelta(days=1)
  31. elif s.startswith("samedi"):
  32. return starting_date + datetime.timedelta(days=2)
  33. elif s.startswith("dimanche"):
  34. return starting_date + datetime.timedelta(days=3)
  35. elif s.startswith("lundi"):
  36. return starting_date + datetime.timedelta(days=4)
  37. def getTime(time_str: str):
  38. l = time_str.split("h")
  39. hours = int(l[0])
  40. if hours < 5:
  41. hours += 24
  42. if len(l) > 1 and l[1] != "":
  43. return datetime.timedelta(hours=hours, minutes=int(l[1]))
  44. else:
  45. return datetime.timedelta(hours=hours)
  46. def getStartEnd(time_str: str):
  47. l = time_str.split("-")
  48. if len(l) == 2:
  49. return getTime(l[0]), getTime(l[1])
  50. else:
  51. start = getTime(time_str.split("+")[0])
  52. return start, start + datetime.timedelta(hours=1)
  53. # Parse headers
  54. headers = datas[skip_column : skip_column + 2]
  55. days = headers[0]
  56. hours = headers[1]
  57. column_to_dates = {}
  58. assert len(hours) == len(hours)
  59. for i in range(skip_column, len(days)):
  60. days[i] = getDate(days[i])
  61. start, end = getStartEnd(hours[i])
  62. column_to_dates[i] = {"start": days[i] + start, "end": days[i] + end}
  63. list_creneau = []
  64. for i in range(5, len(datas)):
  65. row = datas[i]
  66. current_benevole_name = ""
  67. current_time = None
  68. for j in range(skip_column, len(row)):
  69. if (current_benevole_name != "") and (
  70. row[j] == "" or row[j] != current_benevole_name
  71. ):
  72. new_creneau = {
  73. "id": uuid4(),
  74. "description_id": row[0],
  75. "nom": row[1],
  76. "benevole_nom": current_benevole_name,
  77. "ligne": i + 1,
  78. **current_time,
  79. }
  80. list_creneau.append(new_creneau)
  81. current_benevole_name = ""
  82. current_time = None
  83. if row[j] != "":
  84. current_benevole_name = row[j]
  85. if current_time is None:
  86. current_time = column_to_dates[j].copy()
  87. else:
  88. current_time["end"] = column_to_dates[j]["end"]
  89. if current_benevole_name != "":
  90. new_creneau = {
  91. "id": uuid4(),
  92. "nom": row[1],
  93. "benevole_nom": current_benevole_name,
  94. **current_time,
  95. "ligne": i + 1,
  96. }
  97. list_creneau.append(new_creneau)
  98. print(f"{len(list_creneau)} créneaux trouvés")
  99. return pd.DataFrame.from_dict(list_creneau)
  100. def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
  101. df_contact = pd.read_csv(csv_filename, skiprows=skiprows)
  102. column_to_drop = [name for name in df_contact.columns if "Unnamed" in name]
  103. df_contact.drop(column_to_drop, axis=1, inplace=True)
  104. # Filter out empty name
  105. df_contact = df_contact[~df_contact.Nom.isnull()]
  106. df_contact.reset_index()
  107. return df_contact
  108. def getCreneauDataFrame(csv_filename: str) -> pd.DataFrame:
  109. df_creneau = pd.read_csv(csv_filename)
  110. df_creneau.columns = ["nom", "lieu", "description", "responsable"]
  111. return df_creneau
  112. def getDataFrameFromGsheet(
  113. doc_uuid: str,
  114. planning_id="1001381542",
  115. creneau_id="1884137958",
  116. benevole_id="2065754071",
  117. suffix="",
  118. starting_date=datetime.datetime(2021, 9, 8),
  119. ) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
  120. fname_planning = f"./data/planning{suffix}.csv"
  121. fname_creneau = f"./data/creneau{suffix}.csv"
  122. fname_contact = f"./data/benevole{suffix}.csv"
  123. downloadAndSave(doc_uuid, planning_id, fname_planning)
  124. downloadAndSave(doc_uuid, creneau_id, fname_creneau)
  125. downloadAndSave(doc_uuid, benevole_id, fname_contact)
  126. return (
  127. getPlanningDataFrame(fname_planning, starting_date),
  128. getCreneauDataFrame(fname_creneau),
  129. getContactDataFrame(fname_contact),
  130. )