gsheet.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import datetime
  2. import io
  3. import os
  4. from enum import Enum
  5. from urllib.parse import urlparse
  6. from uuid import uuid4
  7. import pandas as pd
  8. import requests
  9. planning_gid = "1001381542"
  10. creneau_gid = "1884137958"
  11. benevole_gid = "82247394"
  12. class ParserState(Enum):
  13. STARTING_VALUE = 0
  14. READING_VALUE = 1
  15. ESCAPING = 2
  16. CLOSING_ESCAPE = 3
  17. def split_csv_row(raw_data: str, separator: str = ",", escape: str = '"') -> list[str]:
  18. state: ParserState = ParserState.STARTING_VALUE
  19. arr = []
  20. current_item = ""
  21. for c in raw_data:
  22. if state == ParserState.STARTING_VALUE:
  23. if c == escape:
  24. state = ParserState.ESCAPING
  25. elif c == separator:
  26. arr.append("")
  27. else:
  28. state = ParserState.READING_VALUE
  29. current_item = c
  30. elif state == ParserState.READING_VALUE:
  31. if c == separator:
  32. state = ParserState.STARTING_VALUE
  33. arr.append(current_item)
  34. current_item = ""
  35. else:
  36. current_item += c
  37. elif state == ParserState.ESCAPING:
  38. if c == escape:
  39. state = ParserState.CLOSING_ESCAPE
  40. else:
  41. current_item += c
  42. elif state == ParserState.CLOSING_ESCAPE:
  43. if c == escape:
  44. state = ParserState.ESCAPING
  45. current_item += c
  46. else:
  47. state = ParserState.READING_VALUE
  48. arr.append(current_item)
  49. current_item = ""
  50. arr.append(current_item)
  51. return arr
  52. class InvalidUrlError(Exception):
  53. pass
  54. def extract_doc_uid(url: str) -> str:
  55. res = urlparse(url)
  56. if res.netloc != "docs.google.com":
  57. raise InvalidUrlError("Invalid netloc")
  58. if not res.path.startswith("/spreadsheets/d/"):
  59. raise InvalidUrlError("Invalid path")
  60. doc_id = res.path.split("/")[3]
  61. l_doc_id = len(doc_id)
  62. if l_doc_id < 32 and 50 > l_doc_id:
  63. raise InvalidUrlError("Invalid path")
  64. return doc_id
  65. def build_sheet_url(doc_id, sheet_id):
  66. return f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
  67. def downloadAndSave(doc_ui, sheet_gid, fname):
  68. url = build_sheet_url(doc_ui, sheet_gid)
  69. print("Downloading " + fname)
  70. rep = requests.get(url)
  71. with open(fname, "wb") as f:
  72. f.write(rep.content)
  73. def getContactDataFrame(csv_filename: str, skiprows: int = 2) -> pd.DataFrame:
  74. df_contact = pd.read_csv(csv_filename, skiprows=skiprows)
  75. column_to_drop = [name for name in df_contact.columns if "Unnamed" in name]
  76. df_contact.drop(column_to_drop, axis=1, inplace=True)
  77. # Filter out empty name
  78. df_contact = df_contact[~df_contact.Nom.isnull()]
  79. df_contact.reset_index()
  80. return df_contact
  81. def getCreneauDataFrame(csv_filename: str) -> pd.DataFrame:
  82. df_creneau = pd.read_csv(csv_filename)
  83. df_creneau.columns = ["title", "lieu", "description", "responsable", "tags"]
  84. df_creneau[df_creneau.tags.isnull()].tags = ""
  85. return df_creneau
  86. def getPlanningDataFrame(csv_filename, starting_date, skip_column=3):
  87. list_creneau = []
  88. with io.open(csv_filename, "r", encoding="utf-8") as f:
  89. datas = [split_csv_row(s.replace("\n", "")) for s in f.readlines()]
  90. def getDate(day: str) -> datetime.datetime:
  91. s = day.lower()
  92. if s.startswith("mercredi"):
  93. return starting_date + datetime.timedelta(days=-3)
  94. elif s.startswith("jeudi"):
  95. return starting_date + datetime.timedelta(days=-2)
  96. elif s.startswith("vendredi"):
  97. return starting_date + datetime.timedelta(days=-1)
  98. elif s.startswith("samedi"):
  99. return starting_date
  100. elif s.startswith("dimanche"):
  101. return starting_date + datetime.timedelta(days=1)
  102. elif s.startswith("lundi"):
  103. return starting_date + datetime.timedelta(days=2)
  104. raise KeyError("This day is not valid : " + s)
  105. def getTime(time_str: str) -> datetime.timedelta:
  106. splitted_time = time_str.split("h")
  107. hours = int(splitted_time[0])
  108. if hours < 5:
  109. hours += 24
  110. if len(splitted_time) > 1 and splitted_time[1] != "":
  111. return datetime.timedelta(hours=hours, minutes=int(splitted_time[1]))
  112. else:
  113. return datetime.timedelta(hours=hours)
  114. def getStartEnd(time_str: str) -> tuple:
  115. pair = time_str.split("-")
  116. if len(pair) == 2:
  117. return getTime(pair[0]), getTime(pair[1])
  118. else:
  119. start = getTime(time_str.split("+")[0])
  120. return start, start + datetime.timedelta(hours=1)
  121. # Parse headers
  122. headers = datas[skip_column : skip_column + 2]
  123. days_str = headers[0]
  124. hours = headers[1]
  125. column_to_dates: dict[int, dict[str, datetime.datetime]] = {}
  126. for i in range(skip_column, len(days_str)):
  127. day = getDate(days_str[i])
  128. start, end = getStartEnd(hours[i])
  129. column_to_dates[i] = {"start": day + start, "end": day + end}
  130. list_creneau: list[dict] = []
  131. for i in range(5, len(datas)):
  132. row = datas[i]
  133. current_benevole_name = ""
  134. current_time: dict[str, datetime.datetime] = {}
  135. for j in range(skip_column, len(row)):
  136. if (current_benevole_name != "") and (
  137. row[j] == "" or row[j] != current_benevole_name
  138. ):
  139. new_creneau = {
  140. "id": uuid4(),
  141. "template_id": row[0],
  142. "nom": row[1],
  143. "benevole_nom": current_benevole_name,
  144. "ligne": i + 1,
  145. **current_time,
  146. }
  147. list_creneau.append(new_creneau)
  148. current_benevole_name = ""
  149. current_time = {}
  150. if row[j] != "":
  151. current_benevole_name = row[j]
  152. if len(current_time.keys()) == 0:
  153. current_time = column_to_dates[j].copy()
  154. else:
  155. current_time["end"] = column_to_dates[j]["end"]
  156. if current_benevole_name != "":
  157. new_creneau = {
  158. "id": uuid4(),
  159. "template_id": row[0],
  160. "nom": row[1],
  161. "benevole_nom": current_benevole_name,
  162. "ligne": i + 1,
  163. **current_time,
  164. }
  165. list_creneau.append(new_creneau)
  166. print(f"{len(list_creneau)} créneaux trouvés")
  167. return pd.DataFrame.from_dict(list_creneau)
  168. def parseGsheet(doc_uuid: str, saturday_date: datetime.datetime):
  169. suffix = "_2023"
  170. fname_planning = f"./planning{suffix}.csv"
  171. fname_creneau = f"./creneau{suffix}.csv"
  172. fname_contact = f"./benevole{suffix}.csv"
  173. downloadAndSave(doc_uuid, planning_gid, fname_planning)
  174. downloadAndSave(doc_uuid, creneau_gid, fname_creneau)
  175. downloadAndSave(doc_uuid, benevole_gid, fname_contact)
  176. df_contact = getContactDataFrame(fname_contact)
  177. df_contact["key"] = df_contact["Prénom"] + " " + df_contact.Nom.str.slice(0, 1)
  178. df_creneau = getCreneauDataFrame(fname_creneau)
  179. df_planning = getPlanningDataFrame(fname_planning, saturday_date)
  180. os.remove(fname_planning)
  181. os.remove(fname_creneau)
  182. os.remove(fname_contact)
  183. return df_contact, df_creneau, df_planning